From 25c964f7a2a6f7417f59449b7d1ca0905fa665bc Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 17:01:52 -0400 Subject: [PATCH 01/43] move manual tests to unit tests: checkpoint --- c/containers/test_maps_and_sets.c | 56 +++++++++++++------------------ 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/c/containers/test_maps_and_sets.c b/c/containers/test_maps_and_sets.c index 829216a06..6fb357d1f 100644 --- a/c/containers/test_maps_and_sets.c +++ b/c/containers/test_maps_and_sets.c @@ -7,6 +7,7 @@ #include "containers/hss.h" #include "containers/lhmsi.h" #include "containers/lhms2v.h" +#include "containers/lhmslv.h" #ifdef __TEST_MAPS_AND_SETS_MAIN__ int tests_run = 0; @@ -221,16 +222,6 @@ static char* test_lhmsi() { return NULL; } -// lhmsi_remove(pmap, "y"); -// printf("map size = %d\n", pmap->num_occupied); -// lhmsi_dump(pmap); -// printf("map has(\"w\") = %d\n", lhmsi_has_key(pmap, "w")); -// printf("map has(\"x\") = %d\n", lhmsi_has_key(pmap, "x")); -// printf("map has(\"y\") = %d\n", lhmsi_has_key(pmap, "y")); -// printf("map has(\"z\") = %d\n", lhmsi_has_key(pmap, "z")); -// lhmsi_check_counts(pmap); -// lhmsi_free(pmap); - // ---------------------------------------------------------------- static char* test_lhms2v() { mu_assert_lf(0 == 0); @@ -272,32 +263,31 @@ static char* test_lhms2v() { static char* test_lhmslv() { mu_assert_lf(0 == 0); + slls_t* ax = slls_alloc(); + slls_add_no_free(ax, "a"); + slls_add_no_free(ax, "x"); + // xxx assertions here + + slls_t* ay = slls_alloc(); + slls_add_no_free(ay, "a"); + slls_add_no_free(ay, "y"); + + slls_t* bz = slls_alloc(); + slls_add_no_free(bz, "b"); + slls_add_no_free(bz, "z"); + + lhmslv_t *pmap = lhmslv_alloc(); + lhmslv_put(pmap, ax, "3"); + lhmslv_put(pmap, ay, "5"); + lhmslv_put(pmap, ax, "4"); + lhmslv_put(pmap, bz, "7"); + lhmslv_remove(pmap, ay); + + lhmslv_free(pmap); + return NULL; } -// slls_t* ax = slls_alloc(); -// slls_add_no_free(ax, "a"); -// slls_add_no_free(ax, "x"); -// -// slls_t* ay = slls_alloc(); -// slls_add_no_free(ay, "a"); -// slls_add_no_free(ay, "y"); -// -// slls_t* bz = slls_alloc(); -// slls_add_no_free(bz, "b"); -// slls_add_no_free(bz, "z"); -// -// lhmslv_t *pmap = lhmslv_alloc(); -// lhmslv_put(pmap, ax, "3"); -// lhmslv_put(pmap, ay, "5"); -// lhmslv_put(pmap, ax, "4"); -// lhmslv_put(pmap, bz, "7"); -// lhmslv_remove(pmap, ay); -// printf("map size = %d\n", lhmslv_size(pmap)); -// lhmslv_dump(pmap); -// lhmslv_check_counts(pmap); -// lhmslv_free(pmap); - // ---------------------------------------------------------------- static char* test_lhmss() { mu_assert_lf(0 == 0); From cc1ed4e33e21db14239ec7a85223aff91b176c63 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 17:07:30 -0400 Subject: [PATCH 02/43] move manual tests to unit tests: checkpoint --- c/containers/lhmslv.c | 8 ++++--- c/containers/lhmslv.h | 3 +++ c/containers/test_maps_and_sets.c | 37 +++++++++++++++++++++---------- 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/c/containers/lhmslv.c b/c/containers/lhmslv.c index 77d43c3aa..301b6b6d2 100644 --- a/c/containers/lhmslv.c +++ b/c/containers/lhmslv.c @@ -246,6 +246,7 @@ void* lhmslv_remove(lhmslv_t* pmap, slls_t* key) { void lhmslv_clear(lhmslv_t* pmap) { for (int i = 0; i < pmap->array_length; i++) { lhmslve_clear(&pmap->entries[i]); + pmap->states[i] = EMPTY; } pmap->num_occupied = 0; pmap->num_freed = 0; @@ -271,7 +272,7 @@ static void lhmslv_enlarge(lhmslv_t* pmap) { } // ---------------------------------------------------------------- -void lhmslv_check_counts(lhmslv_t* pmap) { +int lhmslv_check_counts(lhmslv_t* pmap) { int nocc = 0; int ndel = 0; for (int index = 0; index < pmap->array_length; index++) { @@ -284,14 +285,15 @@ void lhmslv_check_counts(lhmslv_t* pmap) { fprintf(stderr, "occupancy-count mismatch: actual %d != cached %d\n", nocc, pmap->num_occupied); - exit(1); + return FALSE; } if (ndel != pmap->num_freed) { fprintf(stderr, "freed-count mismatch: actual %d != cached %d\n", ndel, pmap->num_freed); - exit(1); + return FALSE; } + return TRUE; } // ---------------------------------------------------------------- diff --git a/c/containers/lhmslv.h b/c/containers/lhmslv.h index 91a3a6a09..eb7d43c26 100644 --- a/c/containers/lhmslv.h +++ b/c/containers/lhmslv.h @@ -49,4 +49,7 @@ void* lhmslv_remove(lhmslv_t* pmap, slls_t* key); void lhmslv_clear(lhmslv_t* pmap); int lhmslv_size(lhmslv_t* pmap); +// Unit-test hook +int lhmslv_check_counts(lhmslv_t* pmap); + #endif // LHMSLV_H diff --git a/c/containers/test_maps_and_sets.c b/c/containers/test_maps_and_sets.c index 6fb357d1f..ca656c794 100644 --- a/c/containers/test_maps_and_sets.c +++ b/c/containers/test_maps_and_sets.c @@ -263,25 +263,38 @@ static char* test_lhms2v() { static char* test_lhmslv() { mu_assert_lf(0 == 0); - slls_t* ax = slls_alloc(); - slls_add_no_free(ax, "a"); - slls_add_no_free(ax, "x"); - // xxx assertions here - - slls_t* ay = slls_alloc(); - slls_add_no_free(ay, "a"); - slls_add_no_free(ay, "y"); - - slls_t* bz = slls_alloc(); - slls_add_no_free(bz, "b"); - slls_add_no_free(bz, "z"); + slls_t* ax = slls_alloc(); slls_add_no_free(ax, "a"); slls_add_no_free(ax, "x"); + slls_t* ay = slls_alloc(); slls_add_no_free(ay, "a"); slls_add_no_free(ay, "y"); + slls_t* bz = slls_alloc(); slls_add_no_free(bz, "b"); slls_add_no_free(bz, "z"); + // xxx more assertions here lhmslv_t *pmap = lhmslv_alloc(); + mu_assert_lf(pmap->num_occupied == 0); + mu_assert_lf(lhmslv_check_counts(pmap)); + lhmslv_put(pmap, ax, "3"); + mu_assert_lf(pmap->num_occupied == 1); + mu_assert_lf(lhmslv_check_counts(pmap)); + lhmslv_put(pmap, ay, "5"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(lhmslv_check_counts(pmap)); + lhmslv_put(pmap, ax, "4"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(lhmslv_check_counts(pmap)); + lhmslv_put(pmap, bz, "7"); + mu_assert_lf(pmap->num_occupied == 3); + mu_assert_lf(lhmslv_check_counts(pmap)); + lhmslv_remove(pmap, ay); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(lhmslv_check_counts(pmap)); + + lhmslv_clear(pmap); + mu_assert_lf(pmap->num_occupied == 0); + mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_free(pmap); From e532ce93046a904d88be0fe12f6e2a8754c8c618 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 17:38:11 -0400 Subject: [PATCH 03/43] move manual tests to unit tests: checkpoint --- c/Makefile | 1 + c/containers/dheap.c | 13 ++--- c/containers/dheap.h | 8 +-- c/containers/test_maps_and_sets.c | 90 ++++++++++++++++++++++--------- 4 files changed, 79 insertions(+), 33 deletions(-) diff --git a/c/Makefile b/c/Makefile index baee359bc..6f33e9797 100644 --- a/c/Makefile +++ b/c/Makefile @@ -64,6 +64,7 @@ containers/hss.c \ containers/lhmsi.c \ containers/lhms2v.c \ containers/lhmslv.c \ +containers/dheap.c \ input/file_reader_mmap.c input/file_reader_stdio.c \ input/lrec_reader_mmap_csvlite.c input/lrec_reader_stdio_csvlite.c \ input/lrec_reader_mmap_dkvp.c input/lrec_reader_stdio_dkvp.c \ diff --git a/c/containers/dheap.c b/c/containers/dheap.c index ea46bdff0..74916ae87 100644 --- a/c/containers/dheap.c +++ b/c/containers/dheap.c @@ -120,20 +120,20 @@ void dheap_print(dheap_t *pdheap) // 4 5 6 7 // 8 9 10 11 12 13 14 15 -static void dheap_check_aux(dheap_t *pdheap, int i, char *file, int line) +static int dheap_check_aux(dheap_t *pdheap, int i, char *file, int line) { int n = pdheap->n; double *pe = pdheap->elements; if (i >= n) - return; + return TRUE; int li = dheap_left_child_index (i, pdheap->n); int ri = dheap_right_child_index(i, pdheap->n); if (li != -1) { if (pe[i] < pe[li]) { fprintf(stderr, "dheap check fail %s:%d pe[%d]=%lf < pe[%d]=%lf\n", file, line, i, pe[i], li, pe[li]); - exit(1); + return FALSE; } dheap_check_aux(pdheap, li, file, line); } @@ -141,15 +141,16 @@ static void dheap_check_aux(dheap_t *pdheap, int i, char *file, int line) if (pe[i] < pe[ri]) { fprintf(stderr, "dheap check fail %s:%d pe[%d]=%lf < pe[%d]=%lf\n", file, line, i, pe[i], ri, pe[ri]); - exit(1); + return FALSE; } dheap_check_aux(pdheap, ri, file, line); } + return TRUE; } -void dheap_check(dheap_t *pdheap, char *file, int line) +int dheap_check(dheap_t *pdheap, char *file, int line) { - dheap_check_aux(pdheap, 1, file, line); + return dheap_check_aux(pdheap, 1, file, line); } // ---------------------------------------------------------------- diff --git a/c/containers/dheap.h b/c/containers/dheap.h index 94a165034..e1533f1f6 100644 --- a/c/containers/dheap.h +++ b/c/containers/dheap.h @@ -19,10 +19,12 @@ dheap_t *dheap_alloc(); dheap_t *dheap_from_array(double *array, int n); void dheap_free(dheap_t *pheap); -void dheap_print(dheap_t *pdheap); -void dheap_check(dheap_t *pdheap, char *file, int line); - void dheap_add(dheap_t *pdheap, double v); double dheap_remove(dheap_t *pdheap); +// For debug +void dheap_print(dheap_t *pdheap); +// For unit test +int dheap_check(dheap_t *pdheap, char *file, int line); + #endif // DHEAP_H diff --git a/c/containers/test_maps_and_sets.c b/c/containers/test_maps_and_sets.c index ca656c794..b52d71133 100644 --- a/c/containers/test_maps_and_sets.c +++ b/c/containers/test_maps_and_sets.c @@ -8,6 +8,7 @@ #include "containers/lhmsi.h" #include "containers/lhms2v.h" #include "containers/lhmslv.h" +#include "containers/dheap.h" #ifdef __TEST_MAPS_AND_SETS_MAIN__ int tests_run = 0; @@ -226,6 +227,7 @@ static char* test_lhmsi() { static char* test_lhms2v() { mu_assert_lf(0 == 0); + // xxx more assertions here lhms2v_t *pmap = lhms2v_alloc(); mu_assert_lf(pmap->num_occupied == 0); mu_assert_lf(lhms2v_check_counts(pmap)); @@ -421,33 +423,73 @@ static char* test_top_keeper() { static char* test_dheap() { mu_assert_lf(0 == 0); + dheap_t *pdheap = dheap_alloc(); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 0); + + dheap_add(pdheap, 4.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 1); + + dheap_add(pdheap, 3.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 2); + + dheap_add(pdheap, 2.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 3); + + dheap_add(pdheap, 6.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 4); + + dheap_add(pdheap, 5.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 5); + + dheap_add(pdheap, 8.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 6); + + dheap_add(pdheap, 7.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 7); + + dheap_print(pdheap); + + mu_assert_lf(dheap_remove(pdheap) == 8.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 6); + + mu_assert_lf(dheap_remove(pdheap) == 7.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 5); + + mu_assert_lf(dheap_remove(pdheap) == 6.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 4); + + mu_assert_lf(dheap_remove(pdheap) == 5.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 3); + + mu_assert_lf(dheap_remove(pdheap) == 4.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 2); + + mu_assert_lf(dheap_remove(pdheap) == 3.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 1); + + mu_assert_lf(dheap_remove(pdheap) == 2.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 0); + + dheap_free(pdheap); + return NULL; } -// dheap_t *pdheap = dheap_alloc(); -// dheap_check(pdheap, __FILE__, __LINE__); -// dheap_add(pdheap, 4.1); -// dheap_add(pdheap, 3.1); -// dheap_add(pdheap, 2.1); -// dheap_add(pdheap, 6.1); -// dheap_add(pdheap, 5.1); -// dheap_add(pdheap, 8.1); -// dheap_add(pdheap, 7.1); -// dheap_print(pdheap); -// dheap_check(pdheap, __FILE__, __LINE__); -// -// printf("\n"); -// printf("remove %lf\n", dheap_remove(pdheap)); -// printf("remove %lf\n", dheap_remove(pdheap)); -// printf("remove %lf\n", dheap_remove(pdheap)); -// printf("remove %lf\n", dheap_remove(pdheap)); -// printf("\n"); -// -// dheap_print(pdheap); -// dheap_check(pdheap, __FILE__, __LINE__); -// -// dheap_free(pdheap); - // ================================================================ static char * run_all_tests() { mu_run_test(test_slls); From 3eed5696675e17923c6512f3c65a1c726940ad2d Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 17:39:11 -0400 Subject: [PATCH 04/43] move manual tests to unit tests: checkpoint --- c/containers/test_maps_and_sets.c | 1 + 1 file changed, 1 insertion(+) diff --git a/c/containers/test_maps_and_sets.c b/c/containers/test_maps_and_sets.c index b52d71133..58e17be6b 100644 --- a/c/containers/test_maps_and_sets.c +++ b/c/containers/test_maps_and_sets.c @@ -9,6 +9,7 @@ #include "containers/lhms2v.h" #include "containers/lhmslv.h" #include "containers/dheap.h" +#include "containers/top_keeper.h" #ifdef __TEST_MAPS_AND_SETS_MAIN__ int tests_run = 0; From 34da7b8f48923879e56d217ff30ad3ce4a73911b Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 17:41:09 -0400 Subject: [PATCH 05/43] move manual tests to unit tests: checkpoint --- c/Makefile | 14 +++++++------- ..._maps_and_sets.c => test_multiple_containers.c} | 0 2 files changed, 7 insertions(+), 7 deletions(-) rename c/containers/{test_maps_and_sets.c => test_multiple_containers.c} (100%) diff --git a/c/Makefile b/c/Makefile index 6f33e9797..96b418b9b 100644 --- a/c/Makefile +++ b/c/Makefile @@ -70,7 +70,7 @@ input/lrec_reader_mmap_csvlite.c input/lrec_reader_stdio_csvlite.c \ input/lrec_reader_mmap_dkvp.c input/lrec_reader_stdio_dkvp.c \ input/lrec_reader_mmap_nidx.c input/lrec_reader_stdio_nidx.c \ input/lrec_reader_mmap_xtab.c input/lrec_reader_stdio_xtab.c \ -containers/test_maps_and_sets.c +containers/test_multiple_containers.c TEST_JOIN_BUCKET_KEEPER_SRCS = \ lib/mlrutil.c lib/mlr_globals.c lib/string_builder.c \ @@ -160,13 +160,13 @@ mlrp: .always dsls tests: unit-test reg-test -unit-test: test-mlrutil test-byte-readers test-peek-file-reader test-parse-trie test-lrec test-maps-and-sets test-string-builder test-join-bucket-keeper +unit-test: test-mlrutil test-byte-readers test-peek-file-reader test-parse-trie test-lrec test-multiple-containers test-string-builder test-join-bucket-keeper ./test-mlrutil ./test-byte-readers ./test-peek-file-reader ./test-parse-trie ./test-lrec - ./test-maps-and-sets + ./test-multiple-containers ./test-string-builder ./test-join-bucket-keeper @echo @@ -182,14 +182,14 @@ dev-tests: dev-unit-test reg-test # Unfortunately --error-exitcode=1 doesn't work well since there are # valgrind-detected errors in stdlibs. :( -dev-unit-test: test-mlrutil test-byte-readers test-peek-file-reader test-parse-trie test-lrec test-maps-and-sets test-string-builder test-join-bucket-keeper +dev-unit-test: test-mlrutil test-byte-readers test-peek-file-reader test-parse-trie test-lrec test-multiple-containers test-string-builder test-join-bucket-keeper #valgrind --leak-check=full --error-exitcode=1 ./a.out valgrind --leak-check=full ./test-mlrutil valgrind --leak-check=full ./test-byte-readers valgrind --leak-check=full ./test-Peek-file-reader valgrind --leak-check=full ./test-parse-trie valgrind --leak-check=full ./test-lrec - valgrind --leak-check=full ./test-maps-and-sets + valgrind --leak-check=full ./test-multiple-containers valgrind --leak-check=full ./test-string-builder valgrind --leak-check=full ./test-join-bucket-keeper @echo @@ -212,8 +212,8 @@ test-peek-file-reader: .always test-lrec: .always $(CCDEBUG) -D__TEST_LREC_MAIN__ $(TEST_LREC_SRCS) -o test-lrec -test-maps-and-sets: .always - $(CCDEBUG) -D__TEST_MAPS_AND_SETS_MAIN__ $(TEST_MAPS_AND_SETS_SRCS) -o test-maps-and-sets +test-multiple-containers: .always + $(CCDEBUG) -D__TEST_MAPS_AND_SETS_MAIN__ $(TEST_MAPS_AND_SETS_SRCS) -o test-multiple-containers test-mlrutil: .always $(CCDEBUG) -D__TEST_MLRUTIL_MAIN__ lib/mlrutil.c lib/test_mlrutil.c -o test-mlrutil diff --git a/c/containers/test_maps_and_sets.c b/c/containers/test_multiple_containers.c similarity index 100% rename from c/containers/test_maps_and_sets.c rename to c/containers/test_multiple_containers.c From 60d5b0b18d78da86ea4bc9aa3f08d804f346a1da Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 17:49:10 -0400 Subject: [PATCH 06/43] move manual tests to unit tests: checkpoint --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 37bb79e76..226f69027 100644 --- a/.gitignore +++ b/.gitignore @@ -9,7 +9,7 @@ test-byte-readers test-peek-file-reader test-parse-trie test-lrec -test-maps-and-sets +test-multiple-containers test-join-bucket-keeper termcvt a.out From 484b2deea04bfb706fb9ddce1b22a98ff1a891b7 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 17:59:04 -0400 Subject: [PATCH 07/43] move manual tests to unit tests: checkpoint --- c/Makefile | 1 + c/containers/test_multiple_containers.c | 72 +++++++++++++------------ c/containers/top_keeper.c | 10 ++++ c/containers/top_keeper.h | 3 ++ 4 files changed, 52 insertions(+), 34 deletions(-) diff --git a/c/Makefile b/c/Makefile index 96b418b9b..43720c794 100644 --- a/c/Makefile +++ b/c/Makefile @@ -64,6 +64,7 @@ containers/hss.c \ containers/lhmsi.c \ containers/lhms2v.c \ containers/lhmslv.c \ +containers/top_keeper.c \ containers/dheap.c \ input/file_reader_mmap.c input/file_reader_stdio.c \ input/lrec_reader_mmap_csvlite.c input/lrec_reader_stdio_csvlite.c \ diff --git a/c/containers/test_multiple_containers.c b/c/containers/test_multiple_containers.c index 58e17be6b..b54f19eb2 100644 --- a/c/containers/test_multiple_containers.c +++ b/c/containers/test_multiple_containers.c @@ -8,8 +8,8 @@ #include "containers/lhmsi.h" #include "containers/lhms2v.h" #include "containers/lhmslv.h" -#include "containers/dheap.h" #include "containers/top_keeper.h" +#include "containers/dheap.h" #ifdef __TEST_MAPS_AND_SETS_MAIN__ int tests_run = 0; @@ -383,43 +383,47 @@ static char* test_percentile_keeper() { // ---------------------------------------------------------------- static char* test_top_keeper() { mu_assert_lf(0 == 0); + int capacity = 3; + top_keeper_t* ptop_keeper = top_keeper_alloc(capacity); + mu_assert_lf(ptop_keeper->size == 0); + + top_keeper_add(ptop_keeper, 5.0, NULL); + top_keeper_print(ptop_keeper); + mu_assert_lf(ptop_keeper->size == 1); + mu_assert_lf(ptop_keeper->top_values[0] == 5.0); + + top_keeper_add(ptop_keeper, 6.0, NULL); + top_keeper_print(ptop_keeper); + mu_assert_lf(ptop_keeper->size == 2); + mu_assert_lf(ptop_keeper->top_values[0] == 6.0); + mu_assert_lf(ptop_keeper->top_values[1] == 5.0); + + top_keeper_add(ptop_keeper, 4.0, NULL); + top_keeper_print(ptop_keeper); + mu_assert_lf(ptop_keeper->size == 3); + mu_assert_lf(ptop_keeper->top_values[0] == 6.0); + mu_assert_lf(ptop_keeper->top_values[1] == 5.0); + mu_assert_lf(ptop_keeper->top_values[2] == 4.0); + + top_keeper_add(ptop_keeper, 2.0, NULL); + top_keeper_print(ptop_keeper); + mu_assert_lf(ptop_keeper->size == 3); + mu_assert_lf(ptop_keeper->top_values[0] == 6.0); + mu_assert_lf(ptop_keeper->top_values[1] == 5.0); + mu_assert_lf(ptop_keeper->top_values[2] == 4.0); + + top_keeper_add(ptop_keeper, 7.0, NULL); + top_keeper_print(ptop_keeper); + mu_assert_lf(ptop_keeper->size == 3); + mu_assert_lf(ptop_keeper->top_values[0] == 7.0); + mu_assert_lf(ptop_keeper->top_values[1] == 6.0); + mu_assert_lf(ptop_keeper->top_values[2] == 5.0); + + top_keeper_free(ptop_keeper); return NULL; } -//void top_keeper_dump(top_keeper_t* ptop_keeper) { -// for (int i = 0; i < ptop_keeper->size; i++) -// printf("[%02d] %.8lf\n", i, ptop_keeper->top_values[i]); -// for (int i = ptop_keeper->size; i < ptop_keeper->capacity; i++) -// printf("[%02d] ---\n", i); -//} - -// int capacity = 5; -// char buffer[1024]; -// if (argc == 2) -// (void)sscanf(argv[1], "%d", &capacity); -// top_keeper_t* ptop_keeper = top_keeper_alloc(capacity); -// char* line; -// while ((line = fgets(buffer, sizeof(buffer), stdin)) != NULL) { -// int len = strlen(line); -// if (len >= 1) // xxx write and use a chomp() -// if (line[len-1] == '\n') -// line[len-1] = 0; -// if (streq(line, "")) { -// //top_keeper_dump(ptop_keeper); -// printf("\n"); -// } else { -// double v; -// if (!mlr_try_double_from_string(line, &v)) { -// top_keeper_add(ptop_keeper, v, NULL); -// top_keeper_dump(ptop_keeper); -// printf("\n"); -// } else { -// printf("meh? >>%s<<\n", line); -// } -// } -// } - // ---------------------------------------------------------------- static char* test_dheap() { mu_assert_lf(0 == 0); diff --git a/c/containers/top_keeper.c b/c/containers/top_keeper.c index 3eda084cf..2db8197a3 100644 --- a/c/containers/top_keeper.c +++ b/c/containers/top_keeper.c @@ -1,3 +1,4 @@ +#include #include #include "lib/mlrutil.h" #include "containers/top_keeper.h" @@ -76,3 +77,12 @@ void top_keeper_add(top_keeper_t* ptop_keeper, double value, lrec_t* prec) { ptop_keeper->top_precords[destidx] = prec; // xxx copy?? xxx free on shift-off?!? } } + +// ---------------------------------------------------------------- +void top_keeper_print(top_keeper_t* ptop_keeper) { + printf("top_keeper dump:\n"); + for (int i = 0; i < ptop_keeper->size; i++) + printf("[%02d] %.8lf\n", i, ptop_keeper->top_values[i]); + for (int i = ptop_keeper->size; i < ptop_keeper->capacity; i++) + printf("[%02d] ---\n", i); +} diff --git a/c/containers/top_keeper.h b/c/containers/top_keeper.h index c657c8a9c..69b1b6ce6 100644 --- a/c/containers/top_keeper.h +++ b/c/containers/top_keeper.h @@ -17,4 +17,7 @@ top_keeper_t* top_keeper_alloc(int capacity); void top_keeper_free(top_keeper_t* ptop_keeper); void top_keeper_add(top_keeper_t* ptop_keeper, double value, lrec_t* prec); +// For debug/test +void top_keeper_print(top_keeper_t* ptop_keeper); + #endif // TOP_KEEPER_H From d0d25d4d2ec2e6256907bf69c2b32d20a1f01a0d Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 18:05:42 -0400 Subject: [PATCH 08/43] move manual tests to unit tests: checkpoint --- c/Makefile | 5 +- c/containers/percentile_keeper.c | 8 +++ c/containers/percentile_keeper.h | 3 ++ c/containers/test_multiple_containers.c | 67 ++++++++++++++----------- 4 files changed, 51 insertions(+), 32 deletions(-) diff --git a/c/Makefile b/c/Makefile index 43720c794..9351e5273 100644 --- a/c/Makefile +++ b/c/Makefile @@ -57,13 +57,14 @@ input/lrec_reader_mmap_nidx.c input/lrec_reader_stdio_nidx.c \ input/lrec_reader_mmap_xtab.c input/lrec_reader_stdio_xtab.c \ containers/test_lrec.c -TEST_MAPS_AND_SETS_SRCS = lib/mlrutil.c lib/mlr_globals.c lib/string_builder.c \ +TEST_MULTIPLE_CONTAINERS_SRCS = lib/mlrutil.c lib/mlr_globals.c lib/string_builder.c \ containers/lrec.c containers/header_keeper.c containers/sllv.c \ containers/slls.c \ containers/hss.c \ containers/lhmsi.c \ containers/lhms2v.c \ containers/lhmslv.c \ +containers/percentile_keeper.c \ containers/top_keeper.c \ containers/dheap.c \ input/file_reader_mmap.c input/file_reader_stdio.c \ @@ -214,7 +215,7 @@ test-lrec: .always $(CCDEBUG) -D__TEST_LREC_MAIN__ $(TEST_LREC_SRCS) -o test-lrec test-multiple-containers: .always - $(CCDEBUG) -D__TEST_MAPS_AND_SETS_MAIN__ $(TEST_MAPS_AND_SETS_SRCS) -o test-multiple-containers + $(CCDEBUG) -D__TEST_MAPS_AND_SETS_MAIN__ $(TEST_MULTIPLE_CONTAINERS_SRCS) -o test-multiple-containers test-mlrutil: .always $(CCDEBUG) -D__TEST_MLRUTIL_MAIN__ lib/mlrutil.c lib/test_mlrutil.c -o test-mlrutil diff --git a/c/containers/percentile_keeper.c b/c/containers/percentile_keeper.c index a4319a7d4..e7af55c81 100644 --- a/c/containers/percentile_keeper.c +++ b/c/containers/percentile_keeper.c @@ -65,3 +65,11 @@ double percentile_keeper_emit(percentile_keeper_t* ppercentile_keeper, double pe } return ppercentile_keeper->data[compute_index(ppercentile_keeper->size, percentile)]; } + +// ---------------------------------------------------------------- +void percentile_keeper_print(percentile_keeper_t* ppercentile_keeper) { + printf("percentile_keeper dump:\n"); + for (int i = 0; i < ppercentile_keeper->size; i++) + printf("[%02d] %.8lf\n", i, ppercentile_keeper->data[i]); +} + diff --git a/c/containers/percentile_keeper.h b/c/containers/percentile_keeper.h index f737c5adc..dcfba6307 100644 --- a/c/containers/percentile_keeper.h +++ b/c/containers/percentile_keeper.h @@ -18,4 +18,7 @@ void percentile_keeper_free(percentile_keeper_t* ppercentile_keeper); void percentile_keeper_ingest(percentile_keeper_t* ppercentile_keeper, double value); double percentile_keeper_emit(percentile_keeper_t* ppercentile_keeper, double percentile); +// For debug/test +void percentile_keeper_print(percentile_keeper_t* ppercentile_keeper); + #endif // PERCENTILE_KEEPER_H diff --git a/c/containers/test_multiple_containers.c b/c/containers/test_multiple_containers.c index b54f19eb2..e744b3716 100644 --- a/c/containers/test_multiple_containers.c +++ b/c/containers/test_multiple_containers.c @@ -8,6 +8,7 @@ #include "containers/lhmsi.h" #include "containers/lhms2v.h" #include "containers/lhmslv.h" +#include "containers/percentile_keeper.h" #include "containers/top_keeper.h" #include "containers/dheap.h" @@ -346,40 +347,46 @@ static char* test_lhmsv() { // ---------------------------------------------------------------- static char* test_percentile_keeper() { - mu_assert_lf(0 == 0); + + percentile_keeper_t* ppercentile_keeper = percentile_keeper_alloc(); + percentile_keeper_ingest(ppercentile_keeper, 1.0); + percentile_keeper_ingest(ppercentile_keeper, 2.0); + percentile_keeper_ingest(ppercentile_keeper, 3.0); + percentile_keeper_ingest(ppercentile_keeper, 4.0); + percentile_keeper_ingest(ppercentile_keeper, 5.0); + percentile_keeper_print(ppercentile_keeper); + + double p, q; + p = 0.0; + q = percentile_keeper_emit(ppercentile_keeper, p); + printf("%4.2lf -> %7.4lf\n", p, q); + mu_assert_lf(q == 1.0); + + p = 10.0; + q = percentile_keeper_emit(ppercentile_keeper, p); + printf("%4.2lf -> %7.4lf\n", p, q); + mu_assert_lf(q == 1.0); + + p = 50.0; + q = percentile_keeper_emit(ppercentile_keeper, p); + printf("%4.2lf -> %7.4lf\n", p, q); + mu_assert_lf(q == 3.0); + + p = 90.0; + q = percentile_keeper_emit(ppercentile_keeper, p); + printf("%4.2lf -> %7.4lf\n", p, q); + mu_assert_lf(q == 5.0); + + p = 100.0; + q = percentile_keeper_emit(ppercentile_keeper, p); + printf("%4.2lf -> %7.4lf\n", p, q); + mu_assert_lf(q == 5.0); + + percentile_keeper_free(ppercentile_keeper); return NULL; } -//void percentile_keeper_dump(percentile_keeper_t* ppercentile_keeper) { -// for (int i = 0; i < ppercentile_keeper->size; i++) -// printf("[%02d] %.8lf\n", i, ppercentile_keeper->data[i]); -//} - -// char buffer[1024]; -// percentile_keeper_t* ppercentile_keeper = percentile_keeper_alloc(); -// char* line; -// while ((line = fgets(buffer, sizeof(buffer), stdin)) != NULL) { -// int len = strlen(line); -// if (len >= 1) // xxx write and use a chomp() -// if (line[len-1] == '\n') -// line[len-1] = 0; -// double v; -// if (!mlr_try_double_from_string(line, &v)) { -// percentile_keeper_ingest(ppercentile_keeper, v); -// } else { -// printf("meh? >>%s<<\n", line); -// } -// } -// percentile_keeper_dump(ppercentile_keeper); -// printf("\n"); -// double p; -// p = 0.10; printf("%.2lf: %.6lf\n", p, percentile_keeper_emit(ppercentile_keeper, p)); -// p = 0.50; printf("%.2lf: %.6lf\n", p, percentile_keeper_emit(ppercentile_keeper, p)); -// p = 0.90; printf("%.2lf: %.6lf\n", p, percentile_keeper_emit(ppercentile_keeper, p)); -// printf("\n"); -// percentile_keeper_dump(ppercentile_keeper); - // ---------------------------------------------------------------- static char* test_top_keeper() { mu_assert_lf(0 == 0); From 5ed2d6f2130b33d0915aa859240d1ccfb8a32dc1 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 18:37:04 -0400 Subject: [PATCH 09/43] move manual tests to unit tests: checkpoint --- c/Makefile | 2 + c/containers/hss.c | 2 +- c/containers/lhms2v.c | 2 +- c/containers/lhmsi.c | 2 +- c/containers/lhmslv.c | 2 +- c/containers/lhmss.c | 51 +++++----- c/containers/lhmss.h | 5 + c/containers/lhmsv.c | 2 +- c/containers/lrec.h | 3 +- c/containers/test_multiple_containers.c | 125 ++++++++++++++++-------- c/input/lrec_reader_csv.c | 2 +- c/input/peek_file_reader.c | 2 +- c/input/peek_file_reader.h | 2 +- c/input/test_peek_file_reader.c | 32 +++--- 14 files changed, 141 insertions(+), 93 deletions(-) diff --git a/c/Makefile b/c/Makefile index 9351e5273..b175b1a5e 100644 --- a/c/Makefile +++ b/c/Makefile @@ -62,6 +62,8 @@ containers/lrec.c containers/header_keeper.c containers/sllv.c \ containers/slls.c \ containers/hss.c \ containers/lhmsi.c \ +containers/lhmss.c \ +containers/lhmsv.c \ containers/lhms2v.c \ containers/lhmslv.c \ containers/percentile_keeper.c \ diff --git a/c/containers/hss.c b/c/containers/hss.c index 44af59745..19f311f16 100644 --- a/c/containers/hss.c +++ b/c/containers/hss.c @@ -231,7 +231,7 @@ static char* get_state_name(int state) { } } -void hss_dump(hss_t* pset) { +void hss_print(hss_t* pset) { for (int index = 0; index < pset->array_length; index++) { hsse_t* pe = &pset->array[index]; diff --git a/c/containers/lhms2v.c b/c/containers/lhms2v.c index 5df240de6..8ee9b2162 100644 --- a/c/containers/lhms2v.c +++ b/c/containers/lhms2v.c @@ -309,7 +309,7 @@ static char* get_state_name(int state) { } } -void lhms2v_dump(lhms2v_t* pmap) { +void lhms2v_print(lhms2v_t* pmap) { for (int index = 0; index < pmap->array_length; index++) { lhms2ve_t* pe = &pmap->entries[index]; diff --git a/c/containers/lhmsi.c b/c/containers/lhmsi.c index 3553266b7..c7e390e16 100644 --- a/c/containers/lhmsi.c +++ b/c/containers/lhmsi.c @@ -325,7 +325,7 @@ static char* get_state_name(int state) { } } -void lhmsi_dump(lhmsi_t* pmap) { +void lhmsi_print(lhmsi_t* pmap) { for (int index = 0; index < pmap->array_length; index++) { lhmsie_t* pe = &pmap->entries[index]; diff --git a/c/containers/lhmslv.c b/c/containers/lhmslv.c index 301b6b6d2..144b9aa83 100644 --- a/c/containers/lhmslv.c +++ b/c/containers/lhmslv.c @@ -306,7 +306,7 @@ static char* get_state_name(int state) { } } -void lhmslv_dump(lhmslv_t* pmap) { +void lhmslv_print(lhmslv_t* pmap) { for (int index = 0; index < pmap->array_length; index++) { lhmslve_t* pe = &pmap->entries[index]; diff --git a/c/containers/lhmss.c b/c/containers/lhmss.c index cfe30a489..9a732c8b3 100644 --- a/c/containers/lhmss.c +++ b/c/containers/lhmss.c @@ -257,30 +257,6 @@ static void lhmss_enlarge(lhmss_t* pmap) { free(old_states); } -// ---------------------------------------------------------------- -void lhmss_check_counts(lhmss_t* pmap) { - int nocc = 0; - int ndel = 0; - for (int index = 0; index < pmap->array_length; index++) { - if (pmap->states[index] == OCCUPIED) - nocc++; - else if (pmap->states[index] == DELETED) - ndel++; - } - if (nocc != pmap->num_occupied) { - fprintf(stderr, - "occupancy-count mismatch: actual %d != cached %d.\n", - nocc, pmap->num_occupied); - exit(1); - } - if (ndel != pmap->num_freed) { - fprintf(stderr, - "freed-count mismatch: actual %d != cached %d.\n", - ndel, pmap->num_freed); - exit(1); - } -} - // ---------------------------------------------------------------- static char* get_state_name(int state) { switch(state) { @@ -291,7 +267,7 @@ static char* get_state_name(int state) { } } -void lhmss_dump(lhmss_t* pmap) { +void lhmss_print(lhmss_t* pmap) { for (int index = 0; index < pmap->array_length; index++) { lhmsse_t* pe = &pmap->entries[index]; @@ -322,3 +298,28 @@ void lhmss_dump(lhmss_t* pmap) { pe->ideal_index, key_string, value_string); } } + +// ---------------------------------------------------------------- +int lhmss_check_counts(lhmss_t* pmap) { + int nocc = 0; + int ndel = 0; + for (int index = 0; index < pmap->array_length; index++) { + if (pmap->states[index] == OCCUPIED) + nocc++; + else if (pmap->states[index] == DELETED) + ndel++; + } + if (nocc != pmap->num_occupied) { + fprintf(stderr, + "occupancy-count mismatch: actual %d != cached %d.\n", + nocc, pmap->num_occupied); + return FALSE; + } + if (ndel != pmap->num_freed) { + fprintf(stderr, + "deleted-count mismatch: actual %d != cached %d.\n", + ndel, pmap->num_freed); + return FALSE; + } + return TRUE; +} diff --git a/c/containers/lhmss.h b/c/containers/lhmss.h index 0cd5bd43f..131e02c5f 100644 --- a/c/containers/lhmss.h +++ b/c/containers/lhmss.h @@ -49,4 +49,9 @@ int lhmss_has_key(lhmss_t* pmap, char* key); void lhmss_remove(lhmss_t* pmap, char* key); void lhmss_rename(lhmss_t* pmap, char* old_key, char* new_key); +void lhmss_print(lhmss_t* pmap); + +// Unit-test hook +int lhmss_check_counts(lhmss_t* pmap); + #endif // LHMSS_H diff --git a/c/containers/lhmsv.c b/c/containers/lhmsv.c index 37fe1dcf0..bc2112a88 100644 --- a/c/containers/lhmsv.c +++ b/c/containers/lhmsv.c @@ -278,7 +278,7 @@ static char* get_state_name(int state) { } } -void lhmsv_dump(lhmsv_t* pmap) { +void lhmsv_print(lhmsv_t* pmap) { for (int index = 0; index < pmap->array_length; index++) { lhmsve_t* pe = &pmap->entries[index]; diff --git a/c/containers/lrec.h b/c/containers/lrec.h index b55d7de53..60b7575c6 100644 --- a/c/containers/lrec.h +++ b/c/containers/lrec.h @@ -111,6 +111,7 @@ void lrec_move_to_tail(lrec_t* prec, char* key); void lrec_free(lrec_t* prec); +void lrec_print(lrec_t* prec); void lrec_dump(lrec_t* prec); void lrec_dump_titled(char* msg, lrec_t* prec); @@ -123,6 +124,4 @@ lrec_t* lrec_literal_2(char* k1, char* v1, char* k2, char* v2); lrec_t* lrec_literal_3(char* k1, char* v1, char* k2, char* v2, char* k3, char* v3); lrec_t* lrec_literal_4(char* k1, char* v1, char* k2, char* v2, char* k3, char* v3, char* k4, char* v4); -void lrec_print(lrec_t* prec); - #endif // LREC_H diff --git a/c/containers/test_multiple_containers.c b/c/containers/test_multiple_containers.c index e744b3716..4bb9d47ac 100644 --- a/c/containers/test_multiple_containers.c +++ b/c/containers/test_multiple_containers.c @@ -6,6 +6,8 @@ #include "containers/sllv.h" #include "containers/hss.h" #include "containers/lhmsi.h" +#include "containers/lhmss.h" +#include "containers/lhmsv.h" #include "containers/lhms2v.h" #include "containers/lhmslv.h" #include "containers/percentile_keeper.h" @@ -225,6 +227,85 @@ static char* test_lhmsi() { return NULL; } +// ---------------------------------------------------------------- +static char* test_lhmss() { + mu_assert_lf(0 == 0); + + lhmss_t *pmap = lhmss_alloc(); + mu_assert_lf(pmap->num_occupied == 0); + mu_assert_lf(!lhmss_has_key(pmap, "w")); + mu_assert_lf(!lhmss_has_key(pmap, "x")); + mu_assert_lf(!lhmss_has_key(pmap, "y")); + mu_assert_lf(!lhmss_has_key(pmap, "z")); + mu_assert_lf(lhmss_check_counts(pmap)); + + lhmss_put(pmap, "x", "3"); + mu_assert_lf(pmap->num_occupied == 1); + mu_assert_lf(!lhmss_has_key(pmap, "w")); + mu_assert_lf(lhmss_has_key(pmap, "x")); + mu_assert_lf(!lhmss_has_key(pmap, "y")); + mu_assert_lf(!lhmss_has_key(pmap, "z")); + mu_assert_lf(lhmss_check_counts(pmap)); + + lhmss_put(pmap, "y", "5"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmss_has_key(pmap, "w")); + mu_assert_lf(lhmss_has_key(pmap, "x")); + mu_assert_lf(lhmss_has_key(pmap, "y")); + mu_assert_lf(!lhmss_has_key(pmap, "z")); + mu_assert_lf(lhmss_check_counts(pmap)); + + lhmss_put(pmap, "x", "4"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmss_has_key(pmap, "w")); + mu_assert_lf(lhmss_has_key(pmap, "x")); + mu_assert_lf(lhmss_has_key(pmap, "y")); + mu_assert_lf(!lhmss_has_key(pmap, "z")); + mu_assert_lf(lhmss_check_counts(pmap)); + + lhmss_put(pmap, "z", "7"); + mu_assert_lf(pmap->num_occupied == 3); + mu_assert_lf(!lhmss_has_key(pmap, "w")); + mu_assert_lf(lhmss_has_key(pmap, "x")); + mu_assert_lf(lhmss_has_key(pmap, "y")); + mu_assert_lf(lhmss_has_key(pmap, "z")); + mu_assert_lf(lhmss_check_counts(pmap)); + + lhmss_remove(pmap, "y"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmss_has_key(pmap, "w")); + mu_assert_lf(lhmss_has_key(pmap, "x")); + mu_assert_lf(!lhmss_has_key(pmap, "y")); + mu_assert_lf(lhmss_has_key(pmap, "z")); + mu_assert_lf(lhmss_check_counts(pmap)); + + lhmss_free(pmap); + + return NULL; +} + +// ---------------------------------------------------------------- +static char* test_lhmsv() { + mu_assert_lf(0 == 0); + + return NULL; +} + +// int x3 = 3; +// int x5 = 5; +// int x4 = 4; +// int x7 = 7; +// lhmsv_t *pmap = lhmsv_alloc(); +// lhmsv_put(pmap, "x", &x3); +// lhmsv_put(pmap, "y", &x5); +// lhmsv_put(pmap, "x", &x4); +// lhmsv_put(pmap, "z", &x7); +// lhmsv_remove(pmap, "y"); +// printf("map size = %d\n", pmap->num_occupied); +// lhmsv_print(pmap); +// lhmsv_check_counts(pmap); +// lhmsv_free(pmap); + // ---------------------------------------------------------------- static char* test_lhms2v() { mu_assert_lf(0 == 0); @@ -305,46 +386,6 @@ static char* test_lhmslv() { return NULL; } -// ---------------------------------------------------------------- -static char* test_lhmss() { - mu_assert_lf(0 == 0); - - return NULL; -} - -// lhmss_t *pmap = lhmss_alloc(); -// lhmss_put(pmap, "x", "3"); -// lhmss_put(pmap, "y", "5"); -// lhmss_put(pmap, "x", "4"); -// lhmss_put(pmap, "z", "7"); -// lhmss_remove(pmap, "y"); -// printf("map size = %d\n", pmap->num_occupied); -// lhmss_dump(pmap); -// lhmss_check_counts(pmap); -// lhmss_free(pmap); - -// ---------------------------------------------------------------- -static char* test_lhmsv() { - mu_assert_lf(0 == 0); - - return NULL; -} - -// int x3 = 3; -// int x5 = 5; -// int x4 = 4; -// int x7 = 7; -// lhmsv_t *pmap = lhmsv_alloc(); -// lhmsv_put(pmap, "x", &x3); -// lhmsv_put(pmap, "y", &x5); -// lhmsv_put(pmap, "x", &x4); -// lhmsv_put(pmap, "z", &x7); -// lhmsv_remove(pmap, "y"); -// printf("map size = %d\n", pmap->num_occupied); -// lhmsv_dump(pmap); -// lhmsv_check_counts(pmap); -// lhmsv_free(pmap); - // ---------------------------------------------------------------- static char* test_percentile_keeper() { @@ -508,10 +549,10 @@ static char * run_all_tests() { mu_run_test(test_sllv_append); mu_run_test(test_hss); mu_run_test(test_lhmsi); - mu_run_test(test_lhms2v); - mu_run_test(test_lhmslv); mu_run_test(test_lhmss); mu_run_test(test_lhmsv); + mu_run_test(test_lhms2v); + mu_run_test(test_lhmslv); mu_run_test(test_percentile_keeper); mu_run_test(test_top_keeper); mu_run_test(test_dheap); diff --git a/c/input/lrec_reader_csv.c b/c/input/lrec_reader_csv.c index f2e4f5429..9bc7584b9 100644 --- a/c/input/lrec_reader_csv.c +++ b/c/input/lrec_reader_csv.c @@ -139,7 +139,7 @@ static slls_t* lrec_reader_csv_get_fields(lrec_reader_csv_state_t* pstate) { pfr->peekbuf, pfr->sob, pfr->npeeked, pfr->peekbuflenmask, &stridx, &matchlen); #ifdef DEBUG_PARSER - pfr_dump(pfr); + pfr_print(pfr); #endif if (rc) { #ifdef DEBUG_PARSER diff --git a/c/input/peek_file_reader.c b/c/input/peek_file_reader.c index 03dd154bf..ac02feb97 100644 --- a/c/input/peek_file_reader.c +++ b/c/input/peek_file_reader.c @@ -3,7 +3,7 @@ #include "input/peek_file_reader.h" // ---------------------------------------------------------------- -void pfr_dump(peek_file_reader_t* pfr) { +void pfr_print(peek_file_reader_t* pfr) { printf("======================== pfr at %p\n", pfr); printf(" peekbuflen = %d\n", pfr->peekbuflen); printf(" npeeked = %d\n", pfr->npeeked); diff --git a/c/input/peek_file_reader.h b/c/input/peek_file_reader.h index 54ef3ba20..fde1cd72c 100644 --- a/c/input/peek_file_reader.h +++ b/c/input/peek_file_reader.h @@ -93,6 +93,6 @@ static inline void pfr_advance_by(peek_file_reader_t* pfr, int len) { } // ---------------------------------------------------------------- -void pfr_dump(peek_file_reader_t* pfr); +void pfr_print(peek_file_reader_t* pfr); #endif // PEEK_FILE_READER_H diff --git a/c/input/test_peek_file_reader.c b/c/input/test_peek_file_reader.c index 9957ed3c3..b1eea34bf 100644 --- a/c/input/test_peek_file_reader.c +++ b/c/input/test_peek_file_reader.c @@ -41,24 +41,24 @@ static char* test_non_empty() { peek_file_reader_t* pfr = pfr_alloc(pbr, 7); - pfr_dump(pfr); mu_assert_lf(pfr_peek_char(pfr) == 'a'); - pfr_dump(pfr); mu_assert_lf(pfr_read_char(pfr) == 'a'); - pfr_dump(pfr); mu_assert_lf(pfr_peek_char(pfr) == 'b'); - pfr_dump(pfr); mu_assert_lf(pfr_read_char(pfr) == 'b'); + pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == 'a'); + pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == 'a'); + pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == 'b'); + pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == 'b'); - pfr_dump(pfr); mu_assert_lf(pfr_peek_char(pfr) == ','); - pfr_dump(pfr); mu_assert_lf(pfr_peek_char(pfr) == ','); - pfr_dump(pfr); mu_assert_lf(pfr_read_char(pfr) == ','); - pfr_dump(pfr); pfr_buffer_by(pfr, 5); - pfr_dump(pfr); pfr_advance_by(pfr, 5); - pfr_dump(pfr); mu_assert_lf(pfr_read_char(pfr) == '2'); + pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == ','); + pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == ','); + pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == ','); + pfr_print(pfr); pfr_buffer_by(pfr, 5); + pfr_print(pfr); pfr_advance_by(pfr, 5); + pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == '2'); - pfr_dump(pfr); mu_assert_lf(pfr_peek_char(pfr) == '3'); - pfr_dump(pfr); mu_assert_lf(pfr_peek_char(pfr) == '3'); - pfr_dump(pfr); mu_assert_lf(pfr_read_char(pfr) == '3'); - pfr_dump(pfr); pfr_buffer_by(pfr, 5); - pfr_dump(pfr); pfr_advance_by(pfr, 5); - pfr_dump(pfr); mu_assert_lf(pfr_read_char(pfr) == '\n'); + pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == '3'); + pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == '3'); + pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == '3'); + pfr_print(pfr); pfr_buffer_by(pfr, 5); + pfr_print(pfr); pfr_advance_by(pfr, 5); + pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == '\n'); pbr->pclose_func(pbr); pfr_free(pfr); From bdd1dafeb9e355261efb091571c7b0a6480007d0 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 18:52:17 -0400 Subject: [PATCH 10/43] move manual tests to unit tests: checkpoint --- c/containers/test_multiple_containers.c | 109 ++++++++++++------------ 1 file changed, 54 insertions(+), 55 deletions(-) diff --git a/c/containers/test_multiple_containers.c b/c/containers/test_multiple_containers.c index 4bb9d47ac..483d16e9c 100644 --- a/c/containers/test_multiple_containers.c +++ b/c/containers/test_multiple_containers.c @@ -56,7 +56,7 @@ static char* test_slls() { } // ---------------------------------------------------------------- -static char* test_sllv_append() { +static char* test_sllv() { mu_assert_lf(0 == 0); sllv_t* pa = sllv_alloc(); @@ -168,58 +168,58 @@ static char* test_lhmsi() { lhmsi_t *pmap = lhmsi_alloc(); mu_assert_lf(pmap->num_occupied == 0); - mu_assert_lf(!lhmsi_has_key(pmap, "w")); - mu_assert_lf(!lhmsi_has_key(pmap, "x")); - mu_assert_lf(!lhmsi_has_key(pmap, "y")); - mu_assert_lf(!lhmsi_has_key(pmap, "z")); + mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf(!lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf(!lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf(!lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); mu_assert_lf(lhmsi_check_counts(pmap)); lhmsi_put(pmap, "x", 3); mu_assert_lf(pmap->num_occupied == 1); - mu_assert_lf(!lhmsi_has_key(pmap, "w")); - mu_assert_lf(lhmsi_has_key(pmap, "x")); - mu_assert_lf(!lhmsi_has_key(pmap, "y")); - mu_assert_lf(!lhmsi_has_key(pmap, "z")); + mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf(lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 3); + mu_assert_lf(!lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == -999); + mu_assert_lf(!lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == -999); mu_assert_lf(lhmsi_check_counts(pmap)); lhmsi_put(pmap, "y", 5); mu_assert_lf(pmap->num_occupied == 2); - mu_assert_lf(!lhmsi_has_key(pmap, "w")); - mu_assert_lf(lhmsi_has_key(pmap, "x")); - mu_assert_lf(lhmsi_has_key(pmap, "y")); - mu_assert_lf(!lhmsi_has_key(pmap, "z")); + mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf(lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 3); + mu_assert_lf(lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == 5); + mu_assert_lf(!lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == -999); mu_assert_lf(lhmsi_check_counts(pmap)); lhmsi_put(pmap, "x", 4); mu_assert_lf(pmap->num_occupied == 2); - mu_assert_lf(!lhmsi_has_key(pmap, "w")); - mu_assert_lf(lhmsi_has_key(pmap, "x")); - mu_assert_lf(lhmsi_has_key(pmap, "y")); - mu_assert_lf(!lhmsi_has_key(pmap, "z")); + mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf(lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 4); + mu_assert_lf(lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == 5); + mu_assert_lf(!lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == -999); mu_assert_lf(lhmsi_check_counts(pmap)); lhmsi_put(pmap, "z", 7); mu_assert_lf(pmap->num_occupied == 3); - mu_assert_lf(!lhmsi_has_key(pmap, "w")); - mu_assert_lf(lhmsi_has_key(pmap, "x")); - mu_assert_lf(lhmsi_has_key(pmap, "y")); - mu_assert_lf(lhmsi_has_key(pmap, "z")); + mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf(lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 4); + mu_assert_lf(lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == 5); + mu_assert_lf(lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == 7); mu_assert_lf(lhmsi_check_counts(pmap)); lhmsi_remove(pmap, "y"); mu_assert_lf(pmap->num_occupied == 2); - mu_assert_lf(!lhmsi_has_key(pmap, "w")); - mu_assert_lf(lhmsi_has_key(pmap, "x")); - mu_assert_lf(!lhmsi_has_key(pmap, "y")); - mu_assert_lf(lhmsi_has_key(pmap, "z")); + mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf(lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 4); + mu_assert_lf(!lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == -999); + mu_assert_lf(lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == 7); mu_assert_lf(lhmsi_check_counts(pmap)); lhmsi_clear(pmap); mu_assert_lf(pmap->num_occupied == 0); - mu_assert_lf(!lhmsi_has_key(pmap, "w")); - mu_assert_lf(!lhmsi_has_key(pmap, "x")); - mu_assert_lf(!lhmsi_has_key(pmap, "y")); - mu_assert_lf(!lhmsi_has_key(pmap, "z")); + mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf(!lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == -999); + mu_assert_lf(!lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == -999); + mu_assert_lf(!lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == -999); mu_assert_lf(lhmsi_check_counts(pmap)); lhmsi_free(pmap); @@ -233,50 +233,49 @@ static char* test_lhmss() { lhmss_t *pmap = lhmss_alloc(); mu_assert_lf(pmap->num_occupied == 0); - mu_assert_lf(!lhmss_has_key(pmap, "w")); - mu_assert_lf(!lhmss_has_key(pmap, "x")); - mu_assert_lf(!lhmss_has_key(pmap, "y")); - mu_assert_lf(!lhmss_has_key(pmap, "z")); + mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); + mu_assert_lf(!lhmss_has_key(pmap, "x")); mu_assert_lf(lhmss_get(pmap, "x") == NULL); + mu_assert_lf(!lhmss_has_key(pmap, "y")); mu_assert_lf(lhmss_get(pmap, "y") == NULL); + mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL); mu_assert_lf(lhmss_check_counts(pmap)); - lhmss_put(pmap, "x", "3"); mu_assert_lf(pmap->num_occupied == 1); - mu_assert_lf(!lhmss_has_key(pmap, "w")); - mu_assert_lf(lhmss_has_key(pmap, "x")); - mu_assert_lf(!lhmss_has_key(pmap, "y")); - mu_assert_lf(!lhmss_has_key(pmap, "z")); + mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); + mu_assert_lf(lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "3")); + mu_assert_lf(!lhmss_has_key(pmap, "y")); mu_assert_lf(lhmss_get(pmap, "y") == NULL); + mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL); mu_assert_lf(lhmss_check_counts(pmap)); lhmss_put(pmap, "y", "5"); mu_assert_lf(pmap->num_occupied == 2); - mu_assert_lf(!lhmss_has_key(pmap, "w")); - mu_assert_lf(lhmss_has_key(pmap, "x")); - mu_assert_lf(lhmss_has_key(pmap, "y")); - mu_assert_lf(!lhmss_has_key(pmap, "z")); + mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); + mu_assert_lf(lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "3")); + mu_assert_lf(lhmss_has_key(pmap, "y")); mu_assert_lf(streq(lhmss_get(pmap, "y"), "5")); + mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL); mu_assert_lf(lhmss_check_counts(pmap)); lhmss_put(pmap, "x", "4"); mu_assert_lf(pmap->num_occupied == 2); - mu_assert_lf(!lhmss_has_key(pmap, "w")); - mu_assert_lf(lhmss_has_key(pmap, "x")); - mu_assert_lf(lhmss_has_key(pmap, "y")); - mu_assert_lf(!lhmss_has_key(pmap, "z")); + mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); + mu_assert_lf(lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "4")); + mu_assert_lf(lhmss_has_key(pmap, "y")); mu_assert_lf(streq(lhmss_get(pmap, "y"), "5")); + mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL); mu_assert_lf(lhmss_check_counts(pmap)); lhmss_put(pmap, "z", "7"); mu_assert_lf(pmap->num_occupied == 3); - mu_assert_lf(!lhmss_has_key(pmap, "w")); - mu_assert_lf(lhmss_has_key(pmap, "x")); - mu_assert_lf(lhmss_has_key(pmap, "y")); - mu_assert_lf(lhmss_has_key(pmap, "z")); + mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); + mu_assert_lf(lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "4")); + mu_assert_lf(lhmss_has_key(pmap, "y")); mu_assert_lf(streq(lhmss_get(pmap, "y"), "5")); + mu_assert_lf(lhmss_has_key(pmap, "z")); mu_assert_lf(streq(lhmss_get(pmap, "z"), "7")); mu_assert_lf(lhmss_check_counts(pmap)); lhmss_remove(pmap, "y"); mu_assert_lf(pmap->num_occupied == 2); - mu_assert_lf(!lhmss_has_key(pmap, "w")); - mu_assert_lf(lhmss_has_key(pmap, "x")); - mu_assert_lf(!lhmss_has_key(pmap, "y")); - mu_assert_lf(lhmss_has_key(pmap, "z")); + mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); + mu_assert_lf(lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "4")); + mu_assert_lf(!lhmss_has_key(pmap, "y")); mu_assert_lf(lhmss_get(pmap, "y") == NULL); + mu_assert_lf(lhmss_has_key(pmap, "z")); mu_assert_lf(streq(lhmss_get(pmap, "z"), "7")); mu_assert_lf(lhmss_check_counts(pmap)); lhmss_free(pmap); @@ -546,7 +545,7 @@ static char* test_dheap() { // ================================================================ static char * run_all_tests() { mu_run_test(test_slls); - mu_run_test(test_sllv_append); + mu_run_test(test_sllv); mu_run_test(test_hss); mu_run_test(test_lhmsi); mu_run_test(test_lhmss); From e04f8bb81139094e2390684a81aa357c6f38dd82 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 19:02:23 -0400 Subject: [PATCH 11/43] move manual tests to unit tests: checkpoint --- c/containers/test_multiple_containers.c | 28 +++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/c/containers/test_multiple_containers.c b/c/containers/test_multiple_containers.c index 483d16e9c..8ad8c1d1a 100644 --- a/c/containers/test_multiple_containers.c +++ b/c/containers/test_multiple_containers.c @@ -312,30 +312,58 @@ static char* test_lhms2v() { // xxx more assertions here lhms2v_t *pmap = lhms2v_alloc(); mu_assert_lf(pmap->num_occupied == 0); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(lhms2v_get(pmap, "a", "x") == NULL); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(lhms2v_get(pmap, "a", "y") == NULL); + mu_assert_lf(!lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(lhms2v_get(pmap, "b", "z") == NULL); mu_assert_lf(lhms2v_check_counts(pmap)); lhms2v_put(pmap, "a", "x", "3"); mu_assert_lf(pmap->num_occupied == 1); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); + mu_assert_lf(lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "3")); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(lhms2v_get(pmap, "a", "y") == NULL); + mu_assert_lf(!lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(lhms2v_get(pmap, "b", "z") == NULL); mu_assert_lf(lhms2v_check_counts(pmap)); lhms2v_put(pmap, "a", "y", "5"); mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); + mu_assert_lf(lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "3")); + mu_assert_lf(lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "y"), "5")); + mu_assert_lf(!lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(lhms2v_get(pmap, "b", "z") == NULL); mu_assert_lf(lhms2v_check_counts(pmap)); lhms2v_put(pmap, "a", "x", "4"); mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); + mu_assert_lf(lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "4")); + mu_assert_lf(lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "y"), "5")); + mu_assert_lf(!lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(lhms2v_get(pmap, "b", "z") == NULL); mu_assert_lf(lhms2v_check_counts(pmap)); lhms2v_put(pmap, "b", "z", "7"); mu_assert_lf(pmap->num_occupied == 3); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); + mu_assert_lf(lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "4")); + mu_assert_lf(lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "y"), "5")); + mu_assert_lf(lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(streq(lhms2v_get(pmap, "b", "z"), "7")); mu_assert_lf(lhms2v_check_counts(pmap)); lhms2v_remove(pmap, "a", "y"); mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); + mu_assert_lf(lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "4")); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(lhms2v_get(pmap, "a", "y") == NULL); + mu_assert_lf(lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(streq(lhms2v_get(pmap, "b", "z"), "7")); mu_assert_lf(lhms2v_check_counts(pmap)); lhms2v_clear(pmap); mu_assert_lf(pmap->num_occupied == 0); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(lhms2v_get(pmap, "a", "x") == NULL); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(lhms2v_get(pmap, "a", "y") == NULL); + mu_assert_lf(!lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(lhms2v_get(pmap, "b", "z") == NULL); mu_assert_lf(lhms2v_check_counts(pmap)); lhms2v_free(pmap); From b8962c2eeb575e6e3b4e3aadcd28c82163a2490f Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 19:09:05 -0400 Subject: [PATCH 12/43] move manual tests to unit tests: checkpoint --- c/containers/test_multiple_containers.c | 38 +++++++++++++++++++------ 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/c/containers/test_multiple_containers.c b/c/containers/test_multiple_containers.c index 8ad8c1d1a..530464399 100644 --- a/c/containers/test_multiple_containers.c +++ b/c/containers/test_multiple_containers.c @@ -57,7 +57,6 @@ static char* test_slls() { // ---------------------------------------------------------------- static char* test_sllv() { - mu_assert_lf(0 == 0); sllv_t* pa = sllv_alloc(); sllv_add(pa, "a"); @@ -164,7 +163,6 @@ static char* test_hss() { // ---------------------------------------------------------------- static char* test_lhmsi() { - mu_assert_lf(0 == 0); lhmsi_t *pmap = lhmsi_alloc(); mu_assert_lf(pmap->num_occupied == 0); @@ -229,7 +227,6 @@ static char* test_lhmsi() { // ---------------------------------------------------------------- static char* test_lhmss() { - mu_assert_lf(0 == 0); lhmss_t *pmap = lhmss_alloc(); mu_assert_lf(pmap->num_occupied == 0); @@ -285,7 +282,6 @@ static char* test_lhmss() { // ---------------------------------------------------------------- static char* test_lhmsv() { - mu_assert_lf(0 == 0); return NULL; } @@ -307,7 +303,6 @@ static char* test_lhmsv() { // ---------------------------------------------------------------- static char* test_lhms2v() { - mu_assert_lf(0 == 0); // xxx more assertions here lhms2v_t *pmap = lhms2v_alloc(); @@ -373,39 +368,66 @@ static char* test_lhms2v() { // ---------------------------------------------------------------- static char* test_lhmslv() { - mu_assert_lf(0 == 0); + slls_t* aw = slls_alloc(); slls_add_no_free(aw, "a"); slls_add_no_free(aw, "w"); slls_t* ax = slls_alloc(); slls_add_no_free(ax, "a"); slls_add_no_free(ax, "x"); slls_t* ay = slls_alloc(); slls_add_no_free(ay, "a"); slls_add_no_free(ay, "y"); slls_t* bz = slls_alloc(); slls_add_no_free(bz, "b"); slls_add_no_free(bz, "z"); - // xxx more assertions here lhmslv_t *pmap = lhmslv_alloc(); mu_assert_lf(pmap->num_occupied == 0); + mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); + mu_assert_lf(!lhmslv_has_key(pmap, ax)); mu_assert_lf(lhmslv_get(pmap, ax) == NULL); + mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL); + mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_put(pmap, ax, "3"); mu_assert_lf(pmap->num_occupied == 1); + mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); + mu_assert_lf(lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "3")); + mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL); + mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_put(pmap, ay, "5"); mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); + mu_assert_lf(lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "3")); + mu_assert_lf(lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5")); + mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_put(pmap, ax, "4"); mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); + mu_assert_lf(lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4")); + mu_assert_lf(lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5")); + mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_put(pmap, bz, "7"); mu_assert_lf(pmap->num_occupied == 3); + mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); + mu_assert_lf(lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4")); + mu_assert_lf(lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5")); + mu_assert_lf(lhmslv_has_key(pmap, bz)); mu_assert_lf(streq(lhmslv_get(pmap, bz), "7")); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_remove(pmap, ay); mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); + mu_assert_lf(lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4")); + mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL); + mu_assert_lf(lhmslv_has_key(pmap, bz)); mu_assert_lf(streq(lhmslv_get(pmap, bz), "7")); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_clear(pmap); mu_assert_lf(pmap->num_occupied == 0); + mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); + mu_assert_lf(!lhmslv_has_key(pmap, ax)); mu_assert_lf(lhmslv_get(pmap, ax) == NULL); + mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL); + mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_free(pmap); @@ -457,7 +479,6 @@ static char* test_percentile_keeper() { // ---------------------------------------------------------------- static char* test_top_keeper() { - mu_assert_lf(0 == 0); int capacity = 3; top_keeper_t* ptop_keeper = top_keeper_alloc(capacity); @@ -501,7 +522,6 @@ static char* test_top_keeper() { // ---------------------------------------------------------------- static char* test_dheap() { - mu_assert_lf(0 == 0); dheap_t *pdheap = dheap_alloc(); mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); From 1a6cbc7664107018689ddaa02631808ef8947122 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 19:25:20 -0400 Subject: [PATCH 13/43] move manual tests to unit tests: checkpoint --- c/containers/lhmsv.c | 7 +- c/containers/lhmsv.h | 3 + c/containers/test_multiple_containers.c | 163 ++++++++++++++---------- 3 files changed, 106 insertions(+), 67 deletions(-) diff --git a/c/containers/lhmsv.c b/c/containers/lhmsv.c index bc2112a88..eeb49c052 100644 --- a/c/containers/lhmsv.c +++ b/c/containers/lhmsv.c @@ -245,7 +245,7 @@ static void lhmsv_enlarge(lhmsv_t* pmap) { } // ---------------------------------------------------------------- -void lhmsv_check_counts(lhmsv_t* pmap) { +int lhmsv_check_counts(lhmsv_t* pmap) { int nocc = 0; int ndel = 0; for (int index = 0; index < pmap->array_length; index++) { @@ -258,14 +258,15 @@ void lhmsv_check_counts(lhmsv_t* pmap) { fprintf(stderr, "occupancy-count mismatch: actual %d != cached %d.\n", nocc, pmap->num_occupied); - exit(1); + return FALSE; } if (ndel != pmap->num_freed) { fprintf(stderr, "deleted-count mismatch: actual %d != cached %d.\n", ndel, pmap->num_freed); - exit(1); + return FALSE; } + return TRUE; } // ---------------------------------------------------------------- diff --git a/c/containers/lhmsv.h b/c/containers/lhmsv.h index 3a1a6355f..273ecbe74 100644 --- a/c/containers/lhmsv.h +++ b/c/containers/lhmsv.h @@ -47,4 +47,7 @@ void* lhmsv_get(lhmsv_t* pmap, char* key); int lhmsv_has_key(lhmsv_t* pmap, char* key); void lhmsv_remove(lhmsv_t* pmap, char* key); +// Unit-test hook +int lhmsv_check_counts(lhmsv_t* pmap); + #endif // LHMSV_H diff --git a/c/containers/test_multiple_containers.c b/c/containers/test_multiple_containers.c index 530464399..d66a0b7b1 100644 --- a/c/containers/test_multiple_containers.c +++ b/c/containers/test_multiple_containers.c @@ -112,7 +112,7 @@ static char* test_hss() { hss_add(pset, "x"); mu_assert_lf(pset->num_occupied == 1); mu_assert_lf(!hss_has(pset, "w")); - mu_assert_lf(hss_has(pset, "x")); + mu_assert_lf( hss_has(pset, "x")); mu_assert_lf(!hss_has(pset, "y")); mu_assert_lf(!hss_has(pset, "z")); mu_assert_lf(hss_check_counts(pset)); @@ -120,33 +120,33 @@ static char* test_hss() { hss_add(pset, "y"); mu_assert_lf(pset->num_occupied == 2); mu_assert_lf(!hss_has(pset, "w")); - mu_assert_lf(hss_has(pset, "x")); - mu_assert_lf(hss_has(pset, "y")); + mu_assert_lf( hss_has(pset, "x")); + mu_assert_lf( hss_has(pset, "y")); mu_assert_lf(!hss_has(pset, "z")); mu_assert_lf(hss_check_counts(pset)); hss_add(pset, "x"); mu_assert_lf(pset->num_occupied == 2); mu_assert_lf(!hss_has(pset, "w")); - mu_assert_lf(hss_has(pset, "x")); - mu_assert_lf(hss_has(pset, "y")); + mu_assert_lf( hss_has(pset, "x")); + mu_assert_lf( hss_has(pset, "y")); mu_assert_lf(!hss_has(pset, "z")); mu_assert_lf(hss_check_counts(pset)); hss_add(pset, "z"); mu_assert_lf(pset->num_occupied == 3); mu_assert_lf(!hss_has(pset, "w")); - mu_assert_lf(hss_has(pset, "x")); - mu_assert_lf(hss_has(pset, "y")); + mu_assert_lf( hss_has(pset, "x")); + mu_assert_lf( hss_has(pset, "y")); mu_assert_lf(hss_has(pset, "z")); mu_assert_lf(hss_check_counts(pset)); hss_remove(pset, "y"); mu_assert_lf(pset->num_occupied == 2); mu_assert_lf(!hss_has(pset, "w")); - mu_assert_lf(hss_has(pset, "x")); + mu_assert_lf( hss_has(pset, "x")); mu_assert_lf(!hss_has(pset, "y")); - mu_assert_lf(hss_has(pset, "z")); + mu_assert_lf( hss_has(pset, "z")); mu_assert_lf(hss_check_counts(pset)); hss_clear(pset); @@ -175,7 +175,7 @@ static char* test_lhmsi() { lhmsi_put(pmap, "x", 3); mu_assert_lf(pmap->num_occupied == 1); mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); - mu_assert_lf(lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 3); + mu_assert_lf( lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 3); mu_assert_lf(!lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == -999); mu_assert_lf(!lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == -999); mu_assert_lf(lhmsi_check_counts(pmap)); @@ -183,33 +183,33 @@ static char* test_lhmsi() { lhmsi_put(pmap, "y", 5); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); - mu_assert_lf(lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 3); - mu_assert_lf(lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == 5); + mu_assert_lf( lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 3); + mu_assert_lf( lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == 5); mu_assert_lf(!lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == -999); mu_assert_lf(lhmsi_check_counts(pmap)); lhmsi_put(pmap, "x", 4); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); - mu_assert_lf(lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 4); - mu_assert_lf(lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == 5); + mu_assert_lf( lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 4); + mu_assert_lf( lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == 5); mu_assert_lf(!lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == -999); mu_assert_lf(lhmsi_check_counts(pmap)); lhmsi_put(pmap, "z", 7); mu_assert_lf(pmap->num_occupied == 3); mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); - mu_assert_lf(lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 4); - mu_assert_lf(lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == 5); + mu_assert_lf( lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 4); + mu_assert_lf( lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == 5); mu_assert_lf(lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == 7); mu_assert_lf(lhmsi_check_counts(pmap)); lhmsi_remove(pmap, "y"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); - mu_assert_lf(lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 4); + mu_assert_lf( lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 4); mu_assert_lf(!lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == -999); - mu_assert_lf(lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == 7); + mu_assert_lf( lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == 7); mu_assert_lf(lhmsi_check_counts(pmap)); lhmsi_clear(pmap); @@ -235,10 +235,11 @@ static char* test_lhmss() { mu_assert_lf(!lhmss_has_key(pmap, "y")); mu_assert_lf(lhmss_get(pmap, "y") == NULL); mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL); mu_assert_lf(lhmss_check_counts(pmap)); + lhmss_put(pmap, "x", "3"); mu_assert_lf(pmap->num_occupied == 1); mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); - mu_assert_lf(lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "3")); + mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "3")); mu_assert_lf(!lhmss_has_key(pmap, "y")); mu_assert_lf(lhmss_get(pmap, "y") == NULL); mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL); mu_assert_lf(lhmss_check_counts(pmap)); @@ -246,33 +247,33 @@ static char* test_lhmss() { lhmss_put(pmap, "y", "5"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); - mu_assert_lf(lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "3")); - mu_assert_lf(lhmss_has_key(pmap, "y")); mu_assert_lf(streq(lhmss_get(pmap, "y"), "5")); + mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "3")); + mu_assert_lf( lhmss_has_key(pmap, "y")); mu_assert_lf(streq(lhmss_get(pmap, "y"), "5")); mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL); mu_assert_lf(lhmss_check_counts(pmap)); lhmss_put(pmap, "x", "4"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); - mu_assert_lf(lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "4")); - mu_assert_lf(lhmss_has_key(pmap, "y")); mu_assert_lf(streq(lhmss_get(pmap, "y"), "5")); + mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "4")); + mu_assert_lf( lhmss_has_key(pmap, "y")); mu_assert_lf(streq(lhmss_get(pmap, "y"), "5")); mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL); mu_assert_lf(lhmss_check_counts(pmap)); lhmss_put(pmap, "z", "7"); mu_assert_lf(pmap->num_occupied == 3); mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); - mu_assert_lf(lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "4")); - mu_assert_lf(lhmss_has_key(pmap, "y")); mu_assert_lf(streq(lhmss_get(pmap, "y"), "5")); - mu_assert_lf(lhmss_has_key(pmap, "z")); mu_assert_lf(streq(lhmss_get(pmap, "z"), "7")); + mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "4")); + mu_assert_lf( lhmss_has_key(pmap, "y")); mu_assert_lf(streq(lhmss_get(pmap, "y"), "5")); + mu_assert_lf( lhmss_has_key(pmap, "z")); mu_assert_lf(streq(lhmss_get(pmap, "z"), "7")); mu_assert_lf(lhmss_check_counts(pmap)); lhmss_remove(pmap, "y"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); - mu_assert_lf(lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "4")); + mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "4")); mu_assert_lf(!lhmss_has_key(pmap, "y")); mu_assert_lf(lhmss_get(pmap, "y") == NULL); - mu_assert_lf(lhmss_has_key(pmap, "z")); mu_assert_lf(streq(lhmss_get(pmap, "z"), "7")); + mu_assert_lf( lhmss_has_key(pmap, "z")); mu_assert_lf(streq(lhmss_get(pmap, "z"), "7")); mu_assert_lf(lhmss_check_counts(pmap)); lhmss_free(pmap); @@ -283,28 +284,62 @@ static char* test_lhmss() { // ---------------------------------------------------------------- static char* test_lhmsv() { + lhmsv_t *pmap = lhmsv_alloc(); + mu_assert_lf(pmap->num_occupied == 0); + mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL); + mu_assert_lf(!lhmsv_has_key(pmap, "x")); mu_assert_lf(lhmsv_get(pmap, "x") == NULL); + mu_assert_lf(!lhmsv_has_key(pmap, "y")); mu_assert_lf(lhmsv_get(pmap, "y") == NULL); + mu_assert_lf(!lhmsv_has_key(pmap, "z")); mu_assert_lf(lhmsv_get(pmap, "z") == NULL); + mu_assert_lf(lhmsv_check_counts(pmap)); + + lhmsv_put(pmap, "x", "3"); + mu_assert_lf(pmap->num_occupied == 1); + mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL); + mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "3")); + mu_assert_lf(!lhmsv_has_key(pmap, "y")); mu_assert_lf(lhmsv_get(pmap, "y") == NULL); + mu_assert_lf(!lhmsv_has_key(pmap, "z")); mu_assert_lf(lhmsv_get(pmap, "z") == NULL); + mu_assert_lf(lhmsv_check_counts(pmap)); + + lhmsv_put(pmap, "y", "5"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL); + mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "3")); + mu_assert_lf( lhmsv_has_key(pmap, "y")); mu_assert_lf(streq(lhmsv_get(pmap, "y"), "5")); + mu_assert_lf(!lhmsv_has_key(pmap, "z")); mu_assert_lf(lhmsv_get(pmap, "z") == NULL); + mu_assert_lf(lhmsv_check_counts(pmap)); + + lhmsv_put(pmap, "x", "4"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL); + mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "4")); + mu_assert_lf( lhmsv_has_key(pmap, "y")); mu_assert_lf(streq(lhmsv_get(pmap, "y"), "5")); + mu_assert_lf(!lhmsv_has_key(pmap, "z")); mu_assert_lf(lhmsv_get(pmap, "z") == NULL); + mu_assert_lf(lhmsv_check_counts(pmap)); + + lhmsv_put(pmap, "z", "7"); + mu_assert_lf(pmap->num_occupied == 3); + mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL); + mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "4")); + mu_assert_lf( lhmsv_has_key(pmap, "y")); mu_assert_lf(streq(lhmsv_get(pmap, "y"), "5")); + mu_assert_lf( lhmsv_has_key(pmap, "z")); mu_assert_lf(streq(lhmsv_get(pmap, "z"), "7")); + mu_assert_lf(lhmsv_check_counts(pmap)); + + lhmsv_remove(pmap, "y"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL); + mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "4")); + mu_assert_lf(!lhmsv_has_key(pmap, "y")); mu_assert_lf(lhmsv_get(pmap, "y") == NULL); + mu_assert_lf( lhmsv_has_key(pmap, "z")); mu_assert_lf(streq(lhmsv_get(pmap, "z"), "7")); + mu_assert_lf(lhmsv_check_counts(pmap)); + + lhmsv_free(pmap); + return NULL; } -// int x3 = 3; -// int x5 = 5; -// int x4 = 4; -// int x7 = 7; -// lhmsv_t *pmap = lhmsv_alloc(); -// lhmsv_put(pmap, "x", &x3); -// lhmsv_put(pmap, "y", &x5); -// lhmsv_put(pmap, "x", &x4); -// lhmsv_put(pmap, "z", &x7); -// lhmsv_remove(pmap, "y"); -// printf("map size = %d\n", pmap->num_occupied); -// lhmsv_print(pmap); -// lhmsv_check_counts(pmap); -// lhmsv_free(pmap); - // ---------------------------------------------------------------- static char* test_lhms2v() { - // xxx more assertions here lhms2v_t *pmap = lhms2v_alloc(); mu_assert_lf(pmap->num_occupied == 0); mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); @@ -316,7 +351,7 @@ static char* test_lhms2v() { lhms2v_put(pmap, "a", "x", "3"); mu_assert_lf(pmap->num_occupied == 1); mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); - mu_assert_lf(lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "3")); + mu_assert_lf( lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "3")); mu_assert_lf(!lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(lhms2v_get(pmap, "a", "y") == NULL); mu_assert_lf(!lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(lhms2v_get(pmap, "b", "z") == NULL); mu_assert_lf(lhms2v_check_counts(pmap)); @@ -324,33 +359,33 @@ static char* test_lhms2v() { lhms2v_put(pmap, "a", "y", "5"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); - mu_assert_lf(lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "3")); - mu_assert_lf(lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "y"), "5")); + mu_assert_lf( lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "3")); + mu_assert_lf( lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "y"), "5")); mu_assert_lf(!lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(lhms2v_get(pmap, "b", "z") == NULL); mu_assert_lf(lhms2v_check_counts(pmap)); lhms2v_put(pmap, "a", "x", "4"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); - mu_assert_lf(lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "4")); - mu_assert_lf(lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "y"), "5")); + mu_assert_lf( lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "4")); + mu_assert_lf( lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "y"), "5")); mu_assert_lf(!lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(lhms2v_get(pmap, "b", "z") == NULL); mu_assert_lf(lhms2v_check_counts(pmap)); lhms2v_put(pmap, "b", "z", "7"); mu_assert_lf(pmap->num_occupied == 3); mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); - mu_assert_lf(lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "4")); - mu_assert_lf(lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "y"), "5")); - mu_assert_lf(lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(streq(lhms2v_get(pmap, "b", "z"), "7")); + mu_assert_lf( lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "4")); + mu_assert_lf( lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "y"), "5")); + mu_assert_lf( lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(streq(lhms2v_get(pmap, "b", "z"), "7")); mu_assert_lf(lhms2v_check_counts(pmap)); lhms2v_remove(pmap, "a", "y"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); - mu_assert_lf(lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "4")); + mu_assert_lf( lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "4")); mu_assert_lf(!lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(lhms2v_get(pmap, "a", "y") == NULL); - mu_assert_lf(lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(streq(lhms2v_get(pmap, "b", "z"), "7")); + mu_assert_lf( lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(streq(lhms2v_get(pmap, "b", "z"), "7")); mu_assert_lf(lhms2v_check_counts(pmap)); lhms2v_clear(pmap); @@ -385,7 +420,7 @@ static char* test_lhmslv() { lhmslv_put(pmap, ax, "3"); mu_assert_lf(pmap->num_occupied == 1); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); - mu_assert_lf(lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "3")); + mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "3")); mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL); mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); mu_assert_lf(lhmslv_check_counts(pmap)); @@ -393,33 +428,33 @@ static char* test_lhmslv() { lhmslv_put(pmap, ay, "5"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); - mu_assert_lf(lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "3")); - mu_assert_lf(lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5")); + mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "3")); + mu_assert_lf( lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5")); mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_put(pmap, ax, "4"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); - mu_assert_lf(lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4")); - mu_assert_lf(lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5")); + mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4")); + mu_assert_lf( lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5")); mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_put(pmap, bz, "7"); mu_assert_lf(pmap->num_occupied == 3); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); - mu_assert_lf(lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4")); - mu_assert_lf(lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5")); - mu_assert_lf(lhmslv_has_key(pmap, bz)); mu_assert_lf(streq(lhmslv_get(pmap, bz), "7")); + mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4")); + mu_assert_lf( lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5")); + mu_assert_lf( lhmslv_has_key(pmap, bz)); mu_assert_lf(streq(lhmslv_get(pmap, bz), "7")); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_remove(pmap, ay); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); - mu_assert_lf(lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4")); + mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4")); mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL); - mu_assert_lf(lhmslv_has_key(pmap, bz)); mu_assert_lf(streq(lhmslv_get(pmap, bz), "7")); + mu_assert_lf( lhmslv_has_key(pmap, bz)); mu_assert_lf(streq(lhmslv_get(pmap, bz), "7")); mu_assert_lf(lhmslv_check_counts(pmap)); lhmslv_clear(pmap); From 40c4fb8a62a097c86ddebdef20cad62bb6cffa87 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 19:29:32 -0400 Subject: [PATCH 14/43] remove unused reference to memcheck --- c/Makefile | 4 ---- c/containers/test_lrec.c | 11 ----------- c/lib/test_mlrutil.c | 1 - c/lib/test_string_builder.c | 1 - c/mlrmain.c | 10 ---------- 5 files changed, 27 deletions(-) diff --git a/c/Makefile b/c/Makefile index b175b1a5e..cad3e241a 100644 --- a/c/Makefile +++ b/c/Makefile @@ -145,10 +145,6 @@ dsls: .always mlrg: .always dsls $(CCDEBUG) $(NON_DSL_SRCS) $(PDSL_OBJS) $(FDSL_OBJS) $(LFLAGS) -o mlrg -# Memcheck version -mlrk: .always dsls - $(CCDEBUG) -DUSE_MCHECK $(NON_DSL_SRCS) $(PDSL_OBJS) $(FDSL_OBJS) $(LFLAGS) -o mlrk - # Profile version. Usage: # * make mlrp # * mlrp {arguments> diff --git a/c/containers/test_lrec.c b/c/containers/test_lrec.c index 4ed619cea..a480c18cc 100644 --- a/c/containers/test_lrec.c +++ b/c/containers/test_lrec.c @@ -1,8 +1,5 @@ #include #include -#ifdef MLR_USE_MCHECK -#include -#endif // MLR_USE_MCHECK #include "lib/minunit.h" #include "lib/mlrutil.h" #include "containers/lrec.h" @@ -262,14 +259,6 @@ static char * run_all_tests() { } int main(int argc, char **argv) { -#ifdef MLR_USE_MCHECK - if (mcheck(NULL) != 0) { - printf("Could not set up mcheck\n"); - exit(1); - } - printf("Set up mcheck\n"); -#endif // MLR_USE_MCHECK - char *result = run_all_tests(); printf("\n"); if (result != 0) { diff --git a/c/lib/test_mlrutil.c b/c/lib/test_mlrutil.c index 391053af3..e0cc0db3d 100644 --- a/c/lib/test_mlrutil.c +++ b/c/lib/test_mlrutil.c @@ -61,7 +61,6 @@ int main(int argc, char **argv) { char *result = all_tests(); printf("\n"); if (result != 0) { - //printf("%s\n", result); printf("Not all unit tests passed\n"); } else { diff --git a/c/lib/test_string_builder.c b/c/lib/test_string_builder.c index 0b0edcd82..08ca23b69 100644 --- a/c/lib/test_string_builder.c +++ b/c/lib/test_string_builder.c @@ -81,7 +81,6 @@ int main(int argc, char **argv) { char *result = all_tests(); printf("\n"); if (result != 0) { - //printf("%s\n", result); printf("Not all unit tests passed\n"); } else { diff --git a/c/mlrmain.c b/c/mlrmain.c index ce76c76b2..9141c5301 100644 --- a/c/mlrmain.c +++ b/c/mlrmain.c @@ -1,9 +1,6 @@ #include #include #include -#ifdef MLR_USE_MCHECK -#include -#endif #include "cli/mlrcli.h" #include "lib/mlrutil.h" @@ -16,13 +13,6 @@ #include "stream/stream.h" int main(int argc, char** argv) { -#ifdef MLR_USE_MCHECK - if (mcheck(NULL) != 0) { - fprintf(stderr, "Could not set up mcheck\n"); - exit(1); - } - fprintf(stderr, "Set up mcheck\n"); -#endif mlr_global_init(argv[0], NULL, NULL); cli_opts_t* popts = parse_command_line(argc, argv); mlr_global_init(argv[0], popts->ofmt, popts); From 7d5b3c37efa6264b80786f7a41af086f8ffd6393 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 19:35:42 -0400 Subject: [PATCH 15/43] neaten --- c/Makefile | 2 +- c/containers/test_join_bucket_keeper.c | 1 + c/containers/test_lrec.c | 1 + c/containers/test_multiple_containers.c | 7 ++++--- c/containers/test_parse_trie.c | 1 + c/input/test_byte_readers.c | 1 + c/input/test_peek_file_reader.c | 1 + c/lib/test_mlrutil.c | 1 + c/lib/test_string_builder.c | 1 + 9 files changed, 12 insertions(+), 4 deletions(-) diff --git a/c/Makefile b/c/Makefile index cad3e241a..70d8a4e53 100644 --- a/c/Makefile +++ b/c/Makefile @@ -213,7 +213,7 @@ test-lrec: .always $(CCDEBUG) -D__TEST_LREC_MAIN__ $(TEST_LREC_SRCS) -o test-lrec test-multiple-containers: .always - $(CCDEBUG) -D__TEST_MAPS_AND_SETS_MAIN__ $(TEST_MULTIPLE_CONTAINERS_SRCS) -o test-multiple-containers + $(CCDEBUG) -D__TEST_MULTIPLE_CONTAINERS_MAIN__ $(TEST_MULTIPLE_CONTAINERS_SRCS) -o test-multiple-containers test-mlrutil: .always $(CCDEBUG) -D__TEST_MLRUTIL_MAIN__ lib/mlrutil.c lib/test_mlrutil.c -o test-mlrutil diff --git a/c/containers/test_join_bucket_keeper.c b/c/containers/test_join_bucket_keeper.c index ae49d376f..19838d4b8 100644 --- a/c/containers/test_join_bucket_keeper.c +++ b/c/containers/test_join_bucket_keeper.c @@ -427,6 +427,7 @@ static char * run_all_tests() { } int main(int argc, char **argv) { + printf("TEST_JOIN_BUCKET_KEEPER ENTER\n"); if ((argc == 2) && streq(argv[1], "-v")) tjbk_verbose = TRUE; diff --git a/c/containers/test_lrec.c b/c/containers/test_lrec.c index a480c18cc..593976d95 100644 --- a/c/containers/test_lrec.c +++ b/c/containers/test_lrec.c @@ -259,6 +259,7 @@ static char * run_all_tests() { } int main(int argc, char **argv) { + printf("TEST_LREC ENTER\n"); char *result = run_all_tests(); printf("\n"); if (result != 0) { diff --git a/c/containers/test_multiple_containers.c b/c/containers/test_multiple_containers.c index d66a0b7b1..8e9f3653f 100644 --- a/c/containers/test_multiple_containers.c +++ b/c/containers/test_multiple_containers.c @@ -14,7 +14,7 @@ #include "containers/top_keeper.h" #include "containers/dheap.h" -#ifdef __TEST_MAPS_AND_SETS_MAIN__ +#ifdef __TEST_MULTIPLE_CONTAINERS_MAIN__ int tests_run = 0; int tests_failed = 0; int assertions_run = 0; @@ -642,17 +642,18 @@ static char * run_all_tests() { } int main(int argc, char **argv) { + printf("TEST_MULTIPLE_CONTAINERS ENTER\n"); char *result = run_all_tests(); printf("\n"); if (result != 0) { printf("Not all unit tests passed\n"); } else { - printf("TEST_MAPS_AND_SETS: ALL UNIT TESTS PASSED\n"); + printf("TEST_MULTIPLE_CONTAINERS: ALL UNIT TESTS PASSED\n"); } printf("Tests passed: %d of %d\n", tests_run - tests_failed, tests_run); printf("Assertions passed: %d of %d\n", assertions_run - assertions_failed, assertions_run); return result != 0; } -#endif // __TEST_MAPS_AND_SETS_MAIN__ +#endif // __TEST_MULTIPLE_CONTAINERS_MAIN__ diff --git a/c/containers/test_parse_trie.c b/c/containers/test_parse_trie.c index ac3dec759..4d2d1c025 100644 --- a/c/containers/test_parse_trie.c +++ b/c/containers/test_parse_trie.c @@ -252,6 +252,7 @@ static char* all_tests() { } int main(int argc, char** argv) { + printf("TEST_PARSE_TRIE ENTER\n"); char* result = all_tests(); printf("\n"); if (result != 0) { diff --git a/c/input/test_byte_readers.c b/c/input/test_byte_readers.c index 9d506268e..65b6c3b79 100644 --- a/c/input/test_byte_readers.c +++ b/c/input/test_byte_readers.c @@ -197,6 +197,7 @@ static char * run_all_tests() { } int main(int argc, char **argv) { + printf("TEST_BYTE_READERS ENTER\n"); char *result = run_all_tests(); printf("\n"); if (result != 0) { diff --git a/c/input/test_peek_file_reader.c b/c/input/test_peek_file_reader.c index b1eea34bf..58358c4f5 100644 --- a/c/input/test_peek_file_reader.c +++ b/c/input/test_peek_file_reader.c @@ -74,6 +74,7 @@ static char * run_all_tests() { } int main(int argc, char **argv) { + printf("TEST_PEEK_FILE_READER ENTER\n"); char *result = run_all_tests(); printf("\n"); if (result != 0) { diff --git a/c/lib/test_mlrutil.c b/c/lib/test_mlrutil.c index e0cc0db3d..cc9cb3da7 100644 --- a/c/lib/test_mlrutil.c +++ b/c/lib/test_mlrutil.c @@ -58,6 +58,7 @@ static char * all_tests() { } int main(int argc, char **argv) { + printf("TEST_MLRUTIL ENTER\n"); char *result = all_tests(); printf("\n"); if (result != 0) { diff --git a/c/lib/test_string_builder.c b/c/lib/test_string_builder.c index 08ca23b69..6b9b9c726 100644 --- a/c/lib/test_string_builder.c +++ b/c/lib/test_string_builder.c @@ -78,6 +78,7 @@ static char * all_tests() { } int main(int argc, char **argv) { + printf("TEST_STRING_BUILDER ENTER\n"); char *result = all_tests(); printf("\n"); if (result != 0) { From 9b3e5acb9226532a8ed06bb5ca5308ae6796ccd8 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 19:42:08 -0400 Subject: [PATCH 16/43] valgrind wasn't being used in any automated way --- c/Makefile | 32 +------------------------------- 1 file changed, 1 insertion(+), 31 deletions(-) diff --git a/c/Makefile b/c/Makefile index 70d8a4e53..c1d240779 100644 --- a/c/Makefile +++ b/c/Makefile @@ -104,18 +104,11 @@ input/file_reader_mmap.c \ experimental/getlines.c # ================================================================ -# User-make: creates the executable and runs unit & regression tests (without -# valgrind) +# User-make: creates the executable and runs unit & regression tests # ----> This is the default target for anyone pulling the repo and trying to # build it to be able to use it. It just needs flex and the C compiler. top: mlr tests -# Dev-make: updates the tags file, creates the executable, and runs unit & -# regression tests (with valgrind) -# ----> This is the target for a developer to run before a commit. -# It requires ctags and valgrind in addition to flex and the C compiler. -dev: tags mlr dev-tests - install: mlr tests cp mlr $(INSTALLDIR) installhome: mlr tests @@ -154,10 +147,6 @@ mlrp: .always dsls $(CCDEBUG) -g -pg $(NON_DSL_SRCS) $(PDSL_OBJS) $(FDSL_OBJS) $(LFLAGS) -o mlrp # ================================================================ -# User-tests: don't use valgrind: they may not have it, and valgrind is most -# useful for detecting errors at build time. They don't need it to produce an -# executable. - tests: unit-test reg-test unit-test: test-mlrutil test-byte-readers test-peek-file-reader test-parse-trie test-lrec test-multiple-containers test-string-builder test-join-bucket-keeper @@ -176,25 +165,6 @@ reg-test: ./test/run # ---------------------------------------------------------------- -# Use valgrind at dev time for additional reassurance. - -dev-tests: dev-unit-test reg-test - -# Unfortunately --error-exitcode=1 doesn't work well since there are -# valgrind-detected errors in stdlibs. :( -dev-unit-test: test-mlrutil test-byte-readers test-peek-file-reader test-parse-trie test-lrec test-multiple-containers test-string-builder test-join-bucket-keeper - #valgrind --leak-check=full --error-exitcode=1 ./a.out - valgrind --leak-check=full ./test-mlrutil - valgrind --leak-check=full ./test-byte-readers - valgrind --leak-check=full ./test-Peek-file-reader - valgrind --leak-check=full ./test-parse-trie - valgrind --leak-check=full ./test-lrec - valgrind --leak-check=full ./test-multiple-containers - valgrind --leak-check=full ./test-string-builder - valgrind --leak-check=full ./test-join-bucket-keeper - @echo - @echo DONE - # Run this after unit-test expected output has changed, and is verified to be # OK. (Example: after adding new test cases in test/run.) regtest-copy: From 73590658c41705dcef7c49328302602641b2e25a Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 19:43:56 -0400 Subject: [PATCH 17/43] let test/run write to ./output for VPATH builds --- c/test/output/out | 2210 --------------------------------------------- c/test/run | 2 +- 2 files changed, 1 insertion(+), 2211 deletions(-) delete mode 100644 c/test/output/out diff --git a/c/test/output/out b/c/test/output/out deleted file mode 100644 index d67477712..000000000 --- a/c/test/output/out +++ /dev/null @@ -1,2210 +0,0 @@ - -================================================================ -STATELESS MAPPERS - -mlr cat ./test/input/abixy -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 - -mlr cat /dev/null - -mlr cut -f a,x ./test/input/abixy -a=pan,x=0.3467901443380824 -a=eks,x=0.7586799647899636 -a=wye,x=0.20460330576630303 -a=eks,x=0.38139939387114097 -a=wye,x=0.5732889198020006 -a=zee,x=0.5271261600918548 -a=eks,x=0.6117840605678454 -a=zee,x=0.5985540091064224 -a=hat,x=0.03144187646093577 -a=pan,x=0.5026260055412137 - -mlr cut --complement -f a,x ./test/input/abixy -b=pan,i=1,y=0.7268028627434533 -b=pan,i=2,y=0.5221511083334797 -b=wye,i=3,y=0.33831852551664776 -b=wye,i=4,y=0.13418874328430463 -b=pan,i=5,y=0.8636244699032729 -b=pan,i=6,y=0.49322128674835697 -b=zee,i=7,y=0.1878849191181694 -b=wye,i=8,y=0.976181385699006 -b=wye,i=9,y=0.7495507603507059 -b=wye,i=10,y=0.9526183602969864 - -mlr having-fields --at-least a,b ./test/input/abixy -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 - -mlr having-fields --at-least a,c ./test/input/abixy - -mlr having-fields --at-least a,b,i,x,y ./test/input/abixy -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 - -mlr having-fields --which-are a,b,i,x ./test/input/abixy - -mlr having-fields --which-are a,b,i,x,y ./test/input/abixy -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 - -mlr having-fields --which-are a,b,i,y,x ./test/input/abixy -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 - -mlr having-fields --which-are a,b,i,x,w ./test/input/abixy - -mlr having-fields --which-are a,b,i,x,y,z ./test/input/abixy - -mlr having-fields --at-most a,c ./test/input/abixy - -mlr having-fields --at-most a,b,i,x,y ./test/input/abixy -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 - -mlr having-fields --at-most a,b,i,x,y,z ./test/input/abixy -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 - -mlr rename b,BEE,x,EKS ./test/input/abixy -a=pan,BEE=pan,i=1,EKS=0.3467901443380824,y=0.7268028627434533 -a=eks,BEE=pan,i=2,EKS=0.7586799647899636,y=0.5221511083334797 -a=wye,BEE=wye,i=3,EKS=0.20460330576630303,y=0.33831852551664776 -a=eks,BEE=wye,i=4,EKS=0.38139939387114097,y=0.13418874328430463 -a=wye,BEE=pan,i=5,EKS=0.5732889198020006,y=0.8636244699032729 -a=zee,BEE=pan,i=6,EKS=0.5271261600918548,y=0.49322128674835697 -a=eks,BEE=zee,i=7,EKS=0.6117840605678454,y=0.1878849191181694 -a=zee,BEE=wye,i=8,EKS=0.5985540091064224,y=0.976181385699006 -a=hat,BEE=wye,i=9,EKS=0.03144187646093577,y=0.7495507603507059 -a=pan,BEE=wye,i=10,EKS=0.5026260055412137,y=0.9526183602969864 - -mlr rename nonesuch,nonesuch,x,EKS ./test/input/abixy -a=pan,b=pan,i=1,EKS=0.3467901443380824,y=0.7268028627434533 -a=eks,b=pan,i=2,EKS=0.7586799647899636,y=0.5221511083334797 -a=wye,b=wye,i=3,EKS=0.20460330576630303,y=0.33831852551664776 -a=eks,b=wye,i=4,EKS=0.38139939387114097,y=0.13418874328430463 -a=wye,b=pan,i=5,EKS=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,EKS=0.5271261600918548,y=0.49322128674835697 -a=eks,b=zee,i=7,EKS=0.6117840605678454,y=0.1878849191181694 -a=zee,b=wye,i=8,EKS=0.5985540091064224,y=0.976181385699006 -a=hat,b=wye,i=9,EKS=0.03144187646093577,y=0.7495507603507059 -a=pan,b=wye,i=10,EKS=0.5026260055412137,y=0.9526183602969864 - -mlr regularize ./test/input/regularize.dkvp -a=1,c=2,b=3 -e=4,d=5 -a=6,c=7,b=8 - - -================================================================ -TRIVIAL RETAINERS - -mlr group-by a ./test/input/abixy -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 - -mlr group-by a,b ./test/input/abixy -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 - -mlr group-like ./test/input/het.dkvp -host=jupiter -host=saturn -host=mars -host=jupiter -host=mars -host=saturn -df/tmp=2.43MB,uptime=32345sec -df/tmp=1.34MB,uptime=234214132sec -df/tmp=4.97MB,uptime=345089805sec -df/tmp=0.04MB,uptime=890sec -df/tmp=8.55MB,uptime=787897777sec -df/tmp=9.47MB,uptime=234289080sec - -mlr tac ./test/input/abixy -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 - -mlr tac /dev/null - - -================================================================ -SORT - -mlr sort -f a ./test/input/abixy -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 - -mlr sort -r a ./test/input/abixy -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 - -mlr sort -f x ./test/input/abixy -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 - -mlr sort -r x ./test/input/abixy -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 - -mlr sort -nf x ./test/input/abixy -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 - -mlr sort -nr x ./test/input/abixy -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 - -mlr sort -f a,b ./test/input/abixy -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 - -mlr sort -r a,b ./test/input/abixy -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 - -mlr sort -f x,y ./test/input/abixy -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 - -mlr sort -r x,y ./test/input/abixy -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 - -mlr sort -nf x,y ./test/input/abixy -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 - -mlr sort -nr x,y ./test/input/abixy -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 - -mlr sort -f a -nr x ./test/input/abixy -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 - -mlr sort -nr y -f a ./test/input/abixy -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 - -mlr sort -f a -r b -nf x -nr y ./test/input/abixy -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 - - -================================================================ -JOIN - -mlr --opprint join -f ./test/input/joina.dkvp -l l -r r -j o ./test/input/joinb.dkvp -o x y -1 a s -2 b t -2 c t -2 d t -2 b v -2 c v -2 d v -3 e w -3 f w -3 e x -3 f x -3 e y -3 f y - -mlr --opprint join -u -f ./test/input/joina.dkvp -l l -r r -j o ./test/input/joinb.dkvp -o x y -1 a s -2 b t -2 c t -2 d t -2 b v -2 c v -2 d v -3 e w -3 f w -3 e x -3 f x -3 e y -3 f y - -mlr --opprint join --ul -f ./test/input/joina.dkvp -l l -r r -j o ./test/input/joinb.dkvp -o x y -1 a s -2 b t -2 c t -2 d t -2 b v -2 c v -2 d v -3 e w -3 f w -3 e x -3 f x -3 e y -3 f y - -l x -4 g - -mlr --opprint join -u --ul -f ./test/input/joina.dkvp -l l -r r -j o ./test/input/joinb.dkvp -o x y -1 a s -2 b t -2 c t -2 d t -2 b v -2 c v -2 d v -3 e w -3 f w -3 e x -3 f x -3 e y -3 f y - -l x -4 g - -mlr --opprint join --ur -f ./test/input/joina.dkvp -l l -r r -j o ./test/input/joinb.dkvp -o x y -1 a s -2 b t -2 c t -2 d t -2 b v -2 c v -2 d v -3 e w -3 f w -3 e x -3 f x -3 e y -3 f y - -r y -5 z - -mlr --opprint join -u --ur -f ./test/input/joina.dkvp -l l -r r -j o ./test/input/joinb.dkvp -o x y -1 a s -2 b t -2 c t -2 d t -2 b v -2 c v -2 d v -3 e w -3 f w -3 e x -3 f x -3 e y -3 f y - -r y -5 z - -mlr --opprint join --ul --ur -f ./test/input/joina.dkvp -l l -r r -j o ./test/input/joinb.dkvp -o x y -1 a s -2 b t -2 c t -2 d t -2 b v -2 c v -2 d v -3 e w -3 f w -3 e x -3 f x -3 e y -3 f y - -l x -4 g - -r y -5 z - -mlr --opprint join -u --ul --ur -f ./test/input/joina.dkvp -l l -r r -j o ./test/input/joinb.dkvp -o x y -1 a s -2 b t -2 c t -2 d t -2 b v -2 c v -2 d v -3 e w -3 f w -3 e x -3 f x -3 e y -3 f y - -r y -5 z - -l x -4 g - -mlr --opprint join --np --ul -f ./test/input/joina.dkvp -l l -r r -j o ./test/input/joinb.dkvp -l x -4 g - -mlr --opprint join -u --np --ul -f ./test/input/joina.dkvp -l l -r r -j o ./test/input/joinb.dkvp -l x -4 g - -mlr --opprint join --np --ur -f ./test/input/joina.dkvp -l l -r r -j o ./test/input/joinb.dkvp -r y -5 z - -mlr --opprint join -u --np --ur -f ./test/input/joina.dkvp -l l -r r -j o ./test/input/joinb.dkvp -r y -5 z - -mlr --opprint join --np --ul --ur -f ./test/input/joina.dkvp -l l -r r -j o ./test/input/joinb.dkvp -l x -4 g - -r y -5 z - -mlr --opprint join -u --np --ul --ur -f ./test/input/joina.dkvp -l l -r r -j o ./test/input/joinb.dkvp -r y -5 z - -l x -4 g - -mlr --opprint join -f /dev/null -l l -r r -j o ./test/input/joinb.dkvp - -mlr --opprint join -u -f /dev/null -l l -r r -j o ./test/input/joinb.dkvp - -mlr --opprint join --ul -f /dev/null -l l -r r -j o ./test/input/joinb.dkvp - -mlr --opprint join -u --ul -f /dev/null -l l -r r -j o ./test/input/joinb.dkvp - -mlr --opprint join --ur -f /dev/null -l l -r r -j o ./test/input/joinb.dkvp -r y -1 s -2 t -2 v -3 w -3 x -3 y -5 z - -mlr --opprint join -u --ur -f /dev/null -l l -r r -j o ./test/input/joinb.dkvp -r y -1 s -2 t -2 v -3 w -3 x -3 y -5 z - -mlr --opprint join --ul --ur -f /dev/null -l l -r r -j o ./test/input/joinb.dkvp -r y -1 s -2 t -2 v -3 w -3 x -3 y -5 z - -mlr --opprint join -u --ul --ur -f /dev/null -l l -r r -j o ./test/input/joinb.dkvp -r y -1 s -2 t -2 v -3 w -3 x -3 y -5 z - -mlr --opprint join --np --ul -f /dev/null -l l -r r -j o ./test/input/joinb.dkvp - -mlr --opprint join -u --np --ul -f /dev/null -l l -r r -j o ./test/input/joinb.dkvp - -mlr --opprint join --np --ur -f /dev/null -l l -r r -j o ./test/input/joinb.dkvp -r y -1 s -2 t -2 v -3 w -3 x -3 y -5 z - -mlr --opprint join -u --np --ur -f /dev/null -l l -r r -j o ./test/input/joinb.dkvp -r y -1 s -2 t -2 v -3 w -3 x -3 y -5 z - -mlr --opprint join --np --ul --ur -f /dev/null -l l -r r -j o ./test/input/joinb.dkvp -r y -1 s -2 t -2 v -3 w -3 x -3 y -5 z - -mlr --opprint join -u --np --ul --ur -f /dev/null -l l -r r -j o ./test/input/joinb.dkvp -r y -1 s -2 t -2 v -3 w -3 x -3 y -5 z - -mlr --opprint join -f ./test/input/joina.dkvp -l l -r r -j o /dev/null - -mlr --opprint join -u -f ./test/input/joina.dkvp -l l -r r -j o /dev/null - -mlr --opprint join --ul -f ./test/input/joina.dkvp -l l -r r -j o /dev/null -l x -1 a -2 b -2 c -2 d -3 e -3 f -4 g - -mlr --opprint join -u --ul -f ./test/input/joina.dkvp -l l -r r -j o /dev/null -l x -1 a -2 b -2 c -2 d -3 e -3 f -4 g - -mlr --opprint join --ur -f ./test/input/joina.dkvp -l l -r r -j o /dev/null - -mlr --opprint join -u --ur -f ./test/input/joina.dkvp -l l -r r -j o /dev/null - -mlr --opprint join --ul --ur -f ./test/input/joina.dkvp -l l -r r -j o /dev/null -l x -1 a -2 b -2 c -2 d -3 e -3 f -4 g - -mlr --opprint join -u --ul --ur -f ./test/input/joina.dkvp -l l -r r -j o /dev/null -l x -1 a -2 b -2 c -2 d -3 e -3 f -4 g - -mlr --opprint join --np --ul -f ./test/input/joina.dkvp -l l -r r -j o /dev/null -l x -1 a -2 b -2 c -2 d -3 e -3 f -4 g - -mlr --opprint join -u --np --ul -f ./test/input/joina.dkvp -l l -r r -j o /dev/null -l x -1 a -2 b -2 c -2 d -3 e -3 f -4 g - -mlr --opprint join --np --ur -f ./test/input/joina.dkvp -l l -r r -j o /dev/null - -mlr --opprint join -u --np --ur -f ./test/input/joina.dkvp -l l -r r -j o /dev/null - -mlr --opprint join --np --ul --ur -f ./test/input/joina.dkvp -l l -r r -j o /dev/null -l x -1 a -2 b -2 c -2 d -3 e -3 f -4 g - -mlr --opprint join -u --np --ul --ur -f ./test/input/joina.dkvp -l l -r r -j o /dev/null -l x -1 a -2 b -2 c -2 d -3 e -3 f -4 g - - -================================================================ -STATS - -mlr count-distinct -f a,b ./test/input/small ./test/input/abixy -a=pan,b=pan,count=2 -a=eks,b=pan,count=2 -a=wye,b=wye,count=2 -a=eks,b=wye,count=2 -a=wye,b=pan,count=2 -a=zee,b=pan,count=2 -a=eks,b=zee,count=2 -a=zee,b=wye,count=2 -a=hat,b=wye,count=2 -a=pan,b=wye,count=2 - -mlr --opprint stats1 -a mean,sum,count,min,max,mode -f i,x,y ./test/input/abixy -i_mean i_sum i_count i_min i_max i_mode x_mean x_sum x_count x_min x_max x_mode y_mean y_sum y_count y_min y_max y_mode -5.500000 55.000000 10 1.000000 10.000000 1 0.453629 4.536294 10 0.031442 0.758680 0.3467901443380824 0.594454 5.944542 10 0.134189 0.976181 0.7268028627434533 - -mlr --opprint stats1 -a min,p10,p50,mode,p90,max -f i,x,y ./test/input/abixy -i_min i_p10 i_p50 i_mode i_p90 i_max x_min x_p10 x_p50 x_mode x_p90 x_max y_min y_p10 y_p50 y_mode y_p90 y_max -1.000000 2.000000 6.000000 1 10.000000 10.000000 0.031442 0.204603 0.527126 0.3467901443380824 0.758680 0.758680 0.134189 0.187885 0.726803 0.7268028627434533 0.976181 0.976181 - -mlr --opprint stats1 -a mean,meaneb,stddev -f i,x,y ./test/input/abixy -i_mean i_meaneb i_stddev x_mean x_meaneb x_stddev y_mean y_meaneb y_stddev -5.500000 0.957427 3.027650 0.453629 0.068157 0.215531 0.594454 0.096968 0.306639 - -mlr --opprint stats1 -a mean,sum,count,min,max,mode -f i,x,y -g a ./test/input/abixy -a i_mean i_sum i_count i_min i_max i_mode x_mean x_sum x_count x_min x_max x_mode y_mean y_sum y_count y_min y_max y_mode -pan 5.500000 11.000000 2 1.000000 10.000000 1 0.424708 0.849416 2 0.346790 0.502626 0.3467901443380824 0.839711 1.679421 2 0.726803 0.952618 0.7268028627434533 -eks 4.333333 13.000000 3 2.000000 7.000000 2 0.583954 1.751863 3 0.381399 0.758680 0.7586799647899636 0.281408 0.844225 3 0.134189 0.522151 0.5221511083334797 -wye 4.000000 8.000000 2 3.000000 5.000000 3 0.388946 0.777892 2 0.204603 0.573289 0.20460330576630303 0.600971 1.201943 2 0.338319 0.863624 0.33831852551664776 -zee 7.000000 14.000000 2 6.000000 8.000000 6 0.562840 1.125680 2 0.527126 0.598554 0.5271261600918548 0.734701 1.469403 2 0.493221 0.976181 0.49322128674835697 -hat 9.000000 9.000000 1 9.000000 9.000000 9 0.031442 0.031442 1 0.031442 0.031442 0.03144187646093577 0.749551 0.749551 1 0.749551 0.749551 0.7495507603507059 - -mlr --opprint stats1 -a min,p10,p50,mode,p90,max -f i,x,y -g a ./test/input/abixy -a i_min i_p10 i_p50 i_mode i_p90 i_max x_min x_p10 x_p50 x_mode x_p90 x_max y_min y_p10 y_p50 y_mode y_p90 y_max -pan 1.000000 1.000000 10.000000 1 10.000000 10.000000 0.346790 0.346790 0.502626 0.3467901443380824 0.502626 0.502626 0.726803 0.726803 0.952618 0.7268028627434533 0.952618 0.952618 -eks 2.000000 2.000000 4.000000 2 7.000000 7.000000 0.381399 0.381399 0.611784 0.7586799647899636 0.758680 0.758680 0.134189 0.134189 0.187885 0.5221511083334797 0.522151 0.522151 -wye 3.000000 3.000000 5.000000 3 5.000000 5.000000 0.204603 0.204603 0.573289 0.20460330576630303 0.573289 0.573289 0.338319 0.338319 0.863624 0.33831852551664776 0.863624 0.863624 -zee 6.000000 6.000000 8.000000 6 8.000000 8.000000 0.527126 0.527126 0.598554 0.5271261600918548 0.598554 0.598554 0.493221 0.493221 0.976181 0.49322128674835697 0.976181 0.976181 -hat 9.000000 9.000000 9.000000 9 9.000000 9.000000 0.031442 0.031442 0.031442 0.03144187646093577 0.031442 0.031442 0.749551 0.749551 0.749551 0.7495507603507059 0.749551 0.749551 - -mlr --opprint stats1 -a mean,meaneb,stddev -f i,x,y -g a ./test/input/abixy -a i_mean i_meaneb i_stddev x_mean x_meaneb x_stddev y_mean y_meaneb y_stddev -pan 5.500000 4.500000 6.363961 0.424708 0.077918 0.110193 0.839711 0.112908 0.159676 -eks 4.333333 1.452966 2.516611 0.583954 0.109797 0.190174 0.281408 0.121365 0.210211 -wye 4.000000 1.000000 1.414214 0.388946 0.184343 0.260700 0.600971 0.262653 0.371447 -zee 7.000000 1.000000 1.414214 0.562840 0.035714 0.050507 0.734701 0.241480 0.341504 -hat 9.000000 - - 0.031442 - - 0.749551 - - - -mlr --opprint stats1 -a mean,sum,count,min,max,mode -f i,x,y -g a,b ./test/input/abixy -a b i_mean i_sum i_count i_min i_max i_mode x_mean x_sum x_count x_min x_max x_mode y_mean y_sum y_count y_min y_max y_mode -pan pan 1.000000 1.000000 1 1.000000 1.000000 1 0.346790 0.346790 1 0.346790 0.346790 0.3467901443380824 0.726803 0.726803 1 0.726803 0.726803 0.7268028627434533 -eks pan 2.000000 2.000000 1 2.000000 2.000000 2 0.758680 0.758680 1 0.758680 0.758680 0.7586799647899636 0.522151 0.522151 1 0.522151 0.522151 0.5221511083334797 -wye wye 3.000000 3.000000 1 3.000000 3.000000 3 0.204603 0.204603 1 0.204603 0.204603 0.20460330576630303 0.338319 0.338319 1 0.338319 0.338319 0.33831852551664776 -eks wye 4.000000 4.000000 1 4.000000 4.000000 4 0.381399 0.381399 1 0.381399 0.381399 0.38139939387114097 0.134189 0.134189 1 0.134189 0.134189 0.13418874328430463 -wye pan 5.000000 5.000000 1 5.000000 5.000000 5 0.573289 0.573289 1 0.573289 0.573289 0.5732889198020006 0.863624 0.863624 1 0.863624 0.863624 0.8636244699032729 -zee pan 6.000000 6.000000 1 6.000000 6.000000 6 0.527126 0.527126 1 0.527126 0.527126 0.5271261600918548 0.493221 0.493221 1 0.493221 0.493221 0.49322128674835697 -eks zee 7.000000 7.000000 1 7.000000 7.000000 7 0.611784 0.611784 1 0.611784 0.611784 0.6117840605678454 0.187885 0.187885 1 0.187885 0.187885 0.1878849191181694 -zee wye 8.000000 8.000000 1 8.000000 8.000000 8 0.598554 0.598554 1 0.598554 0.598554 0.5985540091064224 0.976181 0.976181 1 0.976181 0.976181 0.976181385699006 -hat wye 9.000000 9.000000 1 9.000000 9.000000 9 0.031442 0.031442 1 0.031442 0.031442 0.03144187646093577 0.749551 0.749551 1 0.749551 0.749551 0.7495507603507059 -pan wye 10.000000 10.000000 1 10.000000 10.000000 10 0.502626 0.502626 1 0.502626 0.502626 0.5026260055412137 0.952618 0.952618 1 0.952618 0.952618 0.9526183602969864 - -mlr --opprint stats1 -a min,p10,p50,mode,p90,max -f i,x,y -g a,b ./test/input/abixy -a b i_min i_p10 i_p50 i_mode i_p90 i_max x_min x_p10 x_p50 x_mode x_p90 x_max y_min y_p10 y_p50 y_mode y_p90 y_max -pan pan 1.000000 1.000000 1.000000 1 1.000000 1.000000 0.346790 0.346790 0.346790 0.3467901443380824 0.346790 0.346790 0.726803 0.726803 0.726803 0.7268028627434533 0.726803 0.726803 -eks pan 2.000000 2.000000 2.000000 2 2.000000 2.000000 0.758680 0.758680 0.758680 0.7586799647899636 0.758680 0.758680 0.522151 0.522151 0.522151 0.5221511083334797 0.522151 0.522151 -wye wye 3.000000 3.000000 3.000000 3 3.000000 3.000000 0.204603 0.204603 0.204603 0.20460330576630303 0.204603 0.204603 0.338319 0.338319 0.338319 0.33831852551664776 0.338319 0.338319 -eks wye 4.000000 4.000000 4.000000 4 4.000000 4.000000 0.381399 0.381399 0.381399 0.38139939387114097 0.381399 0.381399 0.134189 0.134189 0.134189 0.13418874328430463 0.134189 0.134189 -wye pan 5.000000 5.000000 5.000000 5 5.000000 5.000000 0.573289 0.573289 0.573289 0.5732889198020006 0.573289 0.573289 0.863624 0.863624 0.863624 0.8636244699032729 0.863624 0.863624 -zee pan 6.000000 6.000000 6.000000 6 6.000000 6.000000 0.527126 0.527126 0.527126 0.5271261600918548 0.527126 0.527126 0.493221 0.493221 0.493221 0.49322128674835697 0.493221 0.493221 -eks zee 7.000000 7.000000 7.000000 7 7.000000 7.000000 0.611784 0.611784 0.611784 0.6117840605678454 0.611784 0.611784 0.187885 0.187885 0.187885 0.1878849191181694 0.187885 0.187885 -zee wye 8.000000 8.000000 8.000000 8 8.000000 8.000000 0.598554 0.598554 0.598554 0.5985540091064224 0.598554 0.598554 0.976181 0.976181 0.976181 0.976181385699006 0.976181 0.976181 -hat wye 9.000000 9.000000 9.000000 9 9.000000 9.000000 0.031442 0.031442 0.031442 0.03144187646093577 0.031442 0.031442 0.749551 0.749551 0.749551 0.7495507603507059 0.749551 0.749551 -pan wye 10.000000 10.000000 10.000000 10 10.000000 10.000000 0.502626 0.502626 0.502626 0.5026260055412137 0.502626 0.502626 0.952618 0.952618 0.952618 0.9526183602969864 0.952618 0.952618 - -mlr --opprint stats1 -a mean,meaneb,stddev -f i,x,y -g a,b ./test/input/abixy -a b i_mean i_meaneb i_stddev x_mean x_meaneb x_stddev y_mean y_meaneb y_stddev -pan pan 1.000000 - - 0.346790 - - 0.726803 - - -eks pan 2.000000 - - 0.758680 - - 0.522151 - - -wye wye 3.000000 - - 0.204603 - - 0.338319 - - -eks wye 4.000000 - - 0.381399 - - 0.134189 - - -wye pan 5.000000 - - 0.573289 - - 0.863624 - - -zee pan 6.000000 - - 0.527126 - - 0.493221 - - -eks zee 7.000000 - - 0.611784 - - 0.187885 - - -zee wye 8.000000 - - 0.598554 - - 0.976181 - - -hat wye 9.000000 - - 0.031442 - - 0.749551 - - -pan wye 10.000000 - - 0.502626 - - 0.952618 - - - -mlr --opprint stats2 -a linreg-ols,linreg-pca,r2,corr,cov -f x,y,xy,y2,x2,x2 ./test/input/abixy-wide -x_y_ols_m x_y_ols_b x_y_ols_n x_y_pca_m x_y_pca_b x_y_pca_n x_y_pca_quality x_y_r2 x_y_corr x_y_cov xy_y2_ols_m xy_y2_ols_b xy_y2_ols_n xy_y2_pca_m xy_y2_pca_b xy_y2_pca_n xy_y2_pca_quality xy_y2_r2 xy_y2_corr xy_y2_cov x2_x2_ols_m x2_x2_ols_b x2_x2_ols_n x2_x2_pca_m x2_x2_pca_b x2_x2_pca_n x2_x2_pca_quality x2_x2_r2 x2_x2_corr x2_x2_cov -0.028351 0.487644 2000 1.332924 -0.170590 2000 0.056909 0.000791 0.028120 0.002330 0.893610 0.107060 2000 1.529534 -0.055477 2000 0.824336 0.447971 0.669306 0.045036 1.000000 0.000000 2000 1.000000 0.000000 2000 1.000000 1.000000 1.000000 0.087709 - -mlr --opprint stats2 -a linreg-ols,linreg-pca,r2,corr,cov -f x,y,xy,y2,x2,x2 -g a,b ./test/input/abixy-wide -a b x_y_ols_m x_y_ols_b x_y_ols_n x_y_pca_m x_y_pca_b x_y_pca_n x_y_pca_quality x_y_r2 x_y_corr x_y_cov xy_y2_ols_m xy_y2_ols_b xy_y2_ols_n xy_y2_pca_m xy_y2_pca_b xy_y2_pca_n xy_y2_pca_quality xy_y2_r2 xy_y2_corr xy_y2_cov x2_x2_ols_m x2_x2_ols_b x2_x2_ols_n x2_x2_pca_m x2_x2_pca_b x2_x2_pca_n x2_x2_pca_quality x2_x2_r2 x2_x2_corr x2_x2_cov -cat pan 0.054420 0.481777 89 3.636062 -1.221602 89 0.177683 0.002504 0.050036 0.003777 0.950908 0.105754 89 1.715574 -0.081719 89 0.830612 0.435336 0.659800 0.041616 1.000000 0.000000 89 1.000000 0.000000 89 1.000000 1.000000 1.000000 0.066303 -pan wye -0.145486 0.584799 78 -1.340927 1.199920 78 0.254025 0.019479 -0.139568 -0.012683 0.908151 0.126628 78 1.595150 -0.045034 78 0.824114 0.438850 0.662457 0.046203 1.000000 0.000000 78 1.000000 0.000000 78 1.000000 1.000000 1.000000 0.093192 -wye cat 0.185913 0.377639 74 1.135325 -0.145894 74 0.309499 0.033002 0.181665 0.014494 0.969266 0.040602 74 1.406365 -0.081379 74 0.868480 0.561236 0.749157 0.052090 1.000000 0.000000 74 1.000000 0.000000 74 1.000000 1.000000 1.000000 0.086883 -dog hat 0.100096 0.448757 88 0.810749 0.097346 88 0.189256 0.010462 0.102283 0.008036 0.919149 0.090504 88 1.425774 -0.038344 88 0.846209 0.507155 0.712148 0.045034 1.000000 0.000000 88 1.000000 0.000000 88 1.000000 1.000000 1.000000 0.081226 -dog pan -0.066834 0.590647 87 -0.254112 0.688837 87 0.275316 0.005924 -0.076969 -0.005709 0.726118 0.164937 87 1.566309 -0.075073 87 0.749025 0.315011 0.561259 0.034107 1.000000 0.000000 87 1.000000 0.000000 87 1.000000 1.000000 1.000000 0.098975 -pan pan 0.094932 0.461566 77 0.672369 0.189898 77 0.192719 0.009768 0.098832 0.007175 0.822261 0.123441 77 1.312543 0.003200 77 0.820351 0.465390 0.682195 0.039784 1.000000 0.000000 77 1.000000 0.000000 77 1.000000 1.000000 1.000000 0.080908 -hat hat 0.043668 0.405219 88 10.170494 -5.125282 88 0.310513 0.001324 0.036392 0.003037 1.128896 0.015188 88 1.414166 -0.052514 88 0.922308 0.708725 0.841858 0.060975 1.000000 0.000000 88 1.000000 0.000000 88 1.000000 1.000000 1.000000 0.084636 -wye hat 0.043018 0.496029 87 0.254879 0.395780 87 0.177794 0.002197 0.046876 0.004023 0.720402 0.165623 87 1.376136 0.002792 87 0.760716 0.353558 0.594608 0.038763 1.000000 0.000000 87 1.000000 0.000000 87 1.000000 1.000000 1.000000 0.091675 -pan hat 0.120797 0.448197 67 1.597359 -0.325695 67 0.225137 0.013060 0.114278 0.008987 0.962678 0.076920 67 1.285796 -0.012566 67 0.887704 0.622353 0.788893 0.054965 1.000000 0.000000 67 1.000000 0.000000 67 1.000000 1.000000 1.000000 0.079553 -cat hat 0.172391 0.464384 90 0.959329 0.086790 90 0.296109 0.030150 0.173639 0.015030 0.904257 0.133482 90 1.415658 -0.008369 90 0.841567 0.498171 0.705812 0.055626 1.000000 0.000000 90 1.000000 0.000000 90 1.000000 1.000000 1.000000 0.089895 -hat wye -0.022975 0.496361 70 -1.765884 1.344268 70 0.051493 0.000514 -0.022665 -0.002000 0.971929 0.096088 70 1.989422 -0.142072 70 0.825656 0.386354 0.621574 0.040126 1.000000 0.000000 70 1.000000 0.000000 70 1.000000 1.000000 1.000000 0.075309 -dog dog 0.078397 0.489236 87 0.354494 0.351041 87 0.242210 0.007619 0.087288 0.008214 0.776967 0.150999 87 1.354405 -0.006432 87 0.792265 0.408257 0.638950 0.049648 1.000000 0.000000 87 1.000000 0.000000 87 1.000000 1.000000 1.000000 0.106525 -wye dog 0.116403 0.425576 76 2.367821 -0.777734 76 0.254607 0.011048 0.105109 0.007867 0.925781 0.071192 76 1.453590 -0.070509 76 0.845204 0.501559 0.708208 0.046440 1.000000 0.000000 76 1.000000 0.000000 76 1.000000 1.000000 1.000000 0.081433 -wye wye -0.188354 0.613934 67 -1.433772 1.217887 67 0.316070 0.031156 -0.176512 -0.015876 0.876717 0.159179 67 2.044493 -0.118503 67 0.795455 0.325193 0.570257 0.042026 1.000000 0.000000 67 1.000000 0.000000 67 1.000000 1.000000 1.000000 0.087513 -dog wye 0.029527 0.502643 79 0.496713 0.282511 79 0.073039 0.000913 0.030211 0.002391 0.904925 0.120816 79 1.609123 -0.052245 79 0.821822 0.432413 0.657581 0.042857 1.000000 0.000000 79 1.000000 0.000000 79 1.000000 1.000000 1.000000 0.083924 -cat dog 0.057573 0.408644 78 0.728479 0.071114 78 0.116103 0.003442 0.058671 0.005320 0.884325 0.079999 78 1.418207 -0.040344 78 0.832998 0.479596 0.692528 0.044762 1.000000 0.000000 78 1.000000 0.000000 78 1.000000 1.000000 1.000000 0.098206 -hat pan -0.154393 0.564981 85 -0.845852 0.911026 85 0.276955 0.025143 -0.158566 -0.012756 0.911165 0.104362 85 1.763740 -0.092987 85 0.814584 0.397150 0.630199 0.035622 1.000000 0.000000 85 1.000000 0.000000 85 1.000000 1.000000 1.000000 0.087879 -cat wye -0.014851 0.564875 77 -0.572708 0.892322 77 0.034146 0.000224 -0.014982 -0.000966 0.878820 0.086362 77 1.447244 -0.098657 77 0.827119 0.463961 0.681147 0.041096 1.000000 0.000000 77 1.000000 0.000000 77 1.000000 1.000000 1.000000 0.081922 -hat cat -0.022859 0.498539 88 -0.156242 0.565723 88 0.149344 0.000610 -0.024689 -0.002116 0.840965 0.111121 88 1.663518 -0.088942 88 0.793883 0.373515 0.611158 0.036575 1.000000 0.000000 88 1.000000 0.000000 88 1.000000 1.000000 1.000000 0.093075 -dog cat 0.104057 0.428559 83 2.712382 -1.005787 83 0.250036 0.008705 0.093300 0.007122 1.080443 0.023866 83 1.653922 -0.133367 83 0.875586 0.547103 0.739664 0.050357 1.000000 0.000000 83 1.000000 0.000000 83 1.000000 1.000000 1.000000 0.075381 -hat dog 0.041849 0.427228 78 0.403977 0.254919 78 0.118494 0.001918 0.043789 0.003856 0.776135 0.114930 78 1.475403 -0.036508 78 0.779056 0.372058 0.609966 0.040583 1.000000 0.000000 78 1.000000 0.000000 78 1.000000 1.000000 1.000000 0.104033 -pan dog 0.119510 0.467833 73 2.492496 -0.761490 73 0.266455 0.011427 0.106896 0.009302 0.948592 0.107556 73 1.408389 -0.022846 73 0.860243 0.541263 0.735706 0.056609 1.000000 0.000000 73 1.000000 0.000000 73 1.000000 1.000000 1.000000 0.088101 -cat cat 0.016257 0.425410 79 0.432946 0.225535 79 0.044275 0.000273 0.016510 0.001350 0.930954 0.072476 79 1.624993 -0.072669 79 0.830029 0.446764 0.668404 0.036267 1.000000 0.000000 79 1.000000 0.000000 79 1.000000 1.000000 1.000000 0.086429 -pan cat -0.188523 0.616919 89 -0.898665 0.953923 89 0.324264 0.037036 -0.192447 -0.016206 0.781770 0.176617 89 2.020454 -0.113587 89 0.762332 0.278739 0.527958 0.032984 1.000000 0.000000 89 1.000000 0.000000 89 1.000000 1.000000 1.000000 0.093193 -wye pan 0.229443 0.444446 66 1.313689 -0.098124 66 0.365811 0.046722 0.216152 0.020367 0.887659 0.145052 66 1.471906 -0.030176 66 0.827911 0.462545 0.680107 0.064496 1.000000 0.000000 66 1.000000 0.000000 66 1.000000 1.000000 1.000000 0.103497 - -mlr --opprint step -a rsum,delta,counter -f x,y ./test/input/abixy -a b i x y x_rsum x_delta x_counter y_rsum y_delta y_counter -pan pan 1 0.3467901443380824 0.7268028627434533 0.346790 0.346790 1 0.726803 0.726803 1 -eks pan 2 0.7586799647899636 0.5221511083334797 1.105470 0.411890 2 1.248954 -0.204652 2 -wye wye 3 0.20460330576630303 0.33831852551664776 1.310073 -0.554077 3 1.587272 -0.183833 3 -eks wye 4 0.38139939387114097 0.13418874328430463 1.691473 0.176796 4 1.721461 -0.204130 4 -wye pan 5 0.5732889198020006 0.8636244699032729 2.264762 0.191890 5 2.585086 0.729436 5 -zee pan 6 0.5271261600918548 0.49322128674835697 2.791888 -0.046163 6 3.078307 -0.370403 6 -eks zee 7 0.6117840605678454 0.1878849191181694 3.403672 0.084658 7 3.266192 -0.305336 7 -zee wye 8 0.5985540091064224 0.976181385699006 4.002226 -0.013230 8 4.242373 0.788296 8 -hat wye 9 0.03144187646093577 0.7495507603507059 4.033668 -0.567112 9 4.991924 -0.226631 9 -pan wye 10 0.5026260055412137 0.9526183602969864 4.536294 0.471184 10 5.944542 0.203068 10 - -mlr --opprint step -a rsum,delta,counter -f x,y -g a ./test/input/abixy -a b i x y x_rsum x_delta x_counter y_rsum y_delta y_counter -pan pan 1 0.3467901443380824 0.7268028627434533 0.346790 0.346790 1 0.726803 0.726803 1 -eks pan 2 0.7586799647899636 0.5221511083334797 0.758680 0.758680 1 0.522151 0.522151 1 -wye wye 3 0.20460330576630303 0.33831852551664776 0.204603 0.204603 1 0.338319 0.338319 1 -eks wye 4 0.38139939387114097 0.13418874328430463 1.140079 -0.377281 2 0.656340 -0.387962 2 -wye pan 5 0.5732889198020006 0.8636244699032729 0.777892 0.368686 2 1.201943 0.525306 2 -zee pan 6 0.5271261600918548 0.49322128674835697 0.527126 0.527126 1 0.493221 0.493221 1 -eks zee 7 0.6117840605678454 0.1878849191181694 1.751863 0.230385 3 0.844225 0.053696 3 -zee wye 8 0.5985540091064224 0.976181385699006 1.125680 0.071428 2 1.469403 0.482960 2 -hat wye 9 0.03144187646093577 0.7495507603507059 0.031442 0.031442 1 0.749551 0.749551 1 -pan wye 10 0.5026260055412137 0.9526183602969864 0.849416 0.155836 2 1.679421 0.225815 2 - -mlr --opprint histogram -f x,y --lo 0 --hi 1 --nbins 20 ./test/input/small -bin_lo bin_hi x_count y_count -0.000000 0.050000 1 0 -0.050000 0.100000 0 0 -0.100000 0.150000 0 1 -0.150000 0.200000 0 1 -0.200000 0.250000 1 0 -0.250000 0.300000 0 0 -0.300000 0.350000 1 1 -0.350000 0.400000 1 0 -0.400000 0.450000 0 0 -0.450000 0.500000 0 1 -0.500000 0.550000 2 1 -0.550000 0.600000 2 0 -0.600000 0.650000 1 0 -0.650000 0.700000 0 0 -0.700000 0.750000 0 2 -0.750000 0.800000 1 0 -0.800000 0.850000 0 0 -0.850000 0.900000 0 1 -0.900000 0.950000 0 0 -0.950000 1.000000 0 2 - - -================================================================ -DSLs - -mlr filter $x>.3 ./test/input/abixy -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 - -mlr filter $x>0.3 ./test/input/abixy -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 - -mlr filter $x>0.3 && $y>0.3 ./test/input/abixy -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 - -mlr filter $x>0.3 || $y>0.3 ./test/input/abixy -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864 - -mlr filter NR>=4 && NR <= 7 ./test/input/abixy -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694 - -mlr filter $nosuchfield>.3 ./test/input/abixy - -mlr put $x2 = $x**2 ./test/input/abixy -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533,x2=0.120263 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797,x2=0.575595 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776,x2=0.041863 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463,x2=0.145465 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729,x2=0.328660 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697,x2=0.277862 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694,x2=0.374280 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006,x2=0.358267 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059,x2=0.000989 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864,x2=0.252633 - -mlr put $z = -0.024*$x+0.13 ./test/input/abixy -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533,z=0.121677 -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797,z=0.111792 -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776,z=0.125090 -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463,z=0.120846 -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729,z=0.116241 -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697,z=0.117349 -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694,z=0.115317 -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006,z=0.115635 -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059,z=0.129245 -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864,z=0.117937 - -mlr put $c = $a . $b ./test/input/abixy -a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533,c=panpan -a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797,c=ekspan -a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776,c=wyewye -a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463,c=ekswye -a=wye,b=pan,i=5,x=0.5732889198020006,y=0.8636244699032729,c=wyepan -a=zee,b=pan,i=6,x=0.5271261600918548,y=0.49322128674835697,c=zeepan -a=eks,b=zee,i=7,x=0.6117840605678454,y=0.1878849191181694,c=ekszee -a=zee,b=wye,i=8,x=0.5985540091064224,y=0.976181385699006,c=zeewye -a=hat,b=wye,i=9,x=0.03144187646093577,y=0.7495507603507059,c=hatwye -a=pan,b=wye,i=10,x=0.5026260055412137,y=0.9526183602969864,c=panwye - -mlr --opprint put $nr=NR;$fnr=FNR;$nf=NF;$filenum=FILENUM ./test/input/abixy ./test/input/abixy -a b i x y nr fnr nf filenum -pan pan 1 0.3467901443380824 0.7268028627434533 1 1 7 1 -eks pan 2 0.7586799647899636 0.5221511083334797 2 2 7 1 -wye wye 3 0.20460330576630303 0.33831852551664776 3 3 7 1 -eks wye 4 0.38139939387114097 0.13418874328430463 4 4 7 1 -wye pan 5 0.5732889198020006 0.8636244699032729 5 5 7 1 -zee pan 6 0.5271261600918548 0.49322128674835697 6 6 7 1 -eks zee 7 0.6117840605678454 0.1878849191181694 7 7 7 1 -zee wye 8 0.5985540091064224 0.976181385699006 8 8 7 1 -hat wye 9 0.03144187646093577 0.7495507603507059 9 9 7 1 -pan wye 10 0.5026260055412137 0.9526183602969864 10 10 7 1 -pan pan 1 0.3467901443380824 0.7268028627434533 11 1 7 2 -eks pan 2 0.7586799647899636 0.5221511083334797 12 2 7 2 -wye wye 3 0.20460330576630303 0.33831852551664776 13 3 7 2 -eks wye 4 0.38139939387114097 0.13418874328430463 14 4 7 2 -wye pan 5 0.5732889198020006 0.8636244699032729 15 5 7 2 -zee pan 6 0.5271261600918548 0.49322128674835697 16 6 7 2 -eks zee 7 0.6117840605678454 0.1878849191181694 17 7 7 2 -zee wye 8 0.5985540091064224 0.976181385699006 18 8 7 2 -hat wye 9 0.03144187646093577 0.7495507603507059 19 9 7 2 -pan wye 10 0.5026260055412137 0.9526183602969864 20 10 7 2 - - -================================================================ -OPERATOR PRECEDENCE AND ASSOCIATIVITY - -mlr put -v $x=$a+$b+$c /dev/null -= (operator): - x (field_name). - + (operator): - + (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr put -v $x=$a+$b-$c /dev/null -= (operator): - x (field_name). - - (operator): - + (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr put -v $x=$a-$b-$c /dev/null -= (operator): - x (field_name). - - (operator): - - (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr put -v $x=$a-$b+$c /dev/null -= (operator): - x (field_name). - + (operator): - - (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr put -v $x=$a*$b*$c /dev/null -= (operator): - x (field_name). - * (operator): - * (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr put -v $x=$a*$b/$c /dev/null -= (operator): - x (field_name). - / (operator): - * (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr put -v $x=$a/$b/$c /dev/null -= (operator): - x (field_name). - / (operator): - / (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr put -v $x=$a/$b*$c /dev/null -= (operator): - x (field_name). - * (operator): - / (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr put -v $x=$a+$b+$c /dev/null -= (operator): - x (field_name). - + (operator): - + (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr put -v $x=$a+$b*$c /dev/null -= (operator): - x (field_name). - + (operator): - a (field_name). - * (operator): - b (field_name). - c (field_name). - -mlr put -v $x=$a*$b*$c /dev/null -= (operator): - x (field_name). - * (operator): - * (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr put -v $x=$a*$b+$c /dev/null -= (operator): - x (field_name). - + (operator): - * (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr put -v $x=$a+$b+$c /dev/null -= (operator): - x (field_name). - + (operator): - + (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr put -v $x=$a+$b**$c /dev/null -= (operator): - x (field_name). - + (operator): - a (field_name). - ** (operator): - b (field_name). - c (field_name). - -mlr put -v $x=$a**$b**$c /dev/null -= (operator): - x (field_name). - ** (operator): - a (field_name). - ** (operator): - b (field_name). - c (field_name). - -mlr put -v $x=$a**$b+$c /dev/null -= (operator): - x (field_name). - + (operator): - ** (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr put -v $x=$a.$b.$c /dev/null -= (operator): - x (field_name). - . (operator): - . (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr put -v $x=-$a+$b*$c /dev/null -= (operator): - x (field_name). - + (operator): - - (operator): - a (field_name). - * (operator): - b (field_name). - c (field_name). - -mlr put -v $x=-$a*$b+$c /dev/null -= (operator): - x (field_name). - + (operator): - * (operator): - - (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr put -v $x=$a+-$b*$c /dev/null -= (operator): - x (field_name). - + (operator): - a (field_name). - * (operator): - - (operator): - b (field_name). - c (field_name). - -mlr put -v $x=$a*-$b+$c /dev/null -= (operator): - x (field_name). - + (operator): - * (operator): - a (field_name). - - (operator): - b (field_name). - c (field_name). - -mlr put -v $x=$a+$b*-$c /dev/null -= (operator): - x (field_name). - + (operator): - a (field_name). - * (operator): - b (field_name). - - (operator): - c (field_name). - -mlr put -v $x=$a*$b+-$c /dev/null -= (operator): - x (field_name). - + (operator): - * (operator): - a (field_name). - b (field_name). - - (operator): - c (field_name). - -mlr filter -v $a==1 && $b == 1 && $c == 1 /dev/null -&& (operator): - && (operator): - == (operator): - a (field_name). - 1 (literal). - == (operator): - b (field_name). - 1 (literal). - == (operator): - c (field_name). - 1 (literal). - -mlr filter -v $a==1 || $b == 1 && $c == 1 /dev/null -|| (operator): - == (operator): - a (field_name). - 1 (literal). - && (operator): - == (operator): - b (field_name). - 1 (literal). - == (operator): - c (field_name). - 1 (literal). - -mlr filter -v $a==1 || $b == 1 || $c == 1 /dev/null -|| (operator): - || (operator): - == (operator): - a (field_name). - 1 (literal). - == (operator): - b (field_name). - 1 (literal). - == (operator): - c (field_name). - 1 (literal). - -mlr filter -v $a==1 && $b == 1 || $c == 1 /dev/null -|| (operator): - && (operator): - == (operator): - a (field_name). - 1 (literal). - == (operator): - b (field_name). - 1 (literal). - == (operator): - c (field_name). - 1 (literal). - -mlr filter -v $x<$a*$b*$c /dev/null -< (operator): - x (field_name). - * (operator): - * (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr filter -v $x<$a*$b/$c /dev/null -< (operator): - x (field_name). - / (operator): - * (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr filter -v $x<$a/$b/$c /dev/null -< (operator): - x (field_name). - / (operator): - / (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr filter -v $x<$a/$b*$c /dev/null -< (operator): - x (field_name). - * (operator): - / (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr filter -v $x<$a+$b+$c /dev/null -< (operator): - x (field_name). - + (operator): - + (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr filter -v $x<$a+$b*$c /dev/null -< (operator): - x (field_name). - + (operator): - a (field_name). - * (operator): - b (field_name). - c (field_name). - -mlr filter -v $x<$a*$b*$c /dev/null -< (operator): - x (field_name). - * (operator): - * (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr filter -v $x<$a*$b+$c /dev/null -< (operator): - x (field_name). - + (operator): - * (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr filter -v $x<$a+$b+$c /dev/null -< (operator): - x (field_name). - + (operator): - + (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr filter -v $x<$a+$b**$c /dev/null -< (operator): - x (field_name). - + (operator): - a (field_name). - ** (operator): - b (field_name). - c (field_name). - -mlr filter -v $x<$a**$b**$c /dev/null -< (operator): - x (field_name). - ** (operator): - ** (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr filter -v $x<$a**$b+$c /dev/null -< (operator): - x (field_name). - + (operator): - ** (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr filter -v $x<$a.$b.$c /dev/null -< (operator): - x (field_name). - . (operator): - . (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr filter -v $x<-$a+$b*$c /dev/null -< (operator): - x (field_name). - + (operator): - - (operator): - a (field_name). - * (operator): - b (field_name). - c (field_name). - -mlr filter -v $x<-$a*$b+$c /dev/null -< (operator): - x (field_name). - + (operator): - * (operator): - - (operator): - a (field_name). - b (field_name). - c (field_name). - -mlr filter -v $x<$a+-$b*$c /dev/null -< (operator): - x (field_name). - + (operator): - a (field_name). - * (operator): - - (operator): - b (field_name). - c (field_name). - -mlr filter -v $x<$a*-$b+$c /dev/null -< (operator): - x (field_name). - + (operator): - * (operator): - a (field_name). - - (operator): - b (field_name). - c (field_name). - -mlr filter -v $x<$a+$b*-$c /dev/null -< (operator): - x (field_name). - + (operator): - a (field_name). - * (operator): - b (field_name). - - (operator): - c (field_name). - -mlr filter -v $x<$a*$b+-$c /dev/null -< (operator): - x (field_name). - + (operator): - * (operator): - a (field_name). - b (field_name). - - (operator): - c (field_name). - -mlr --csvlite put $gmt=sec2gmt($sec) ./test/input/sec2gmt -sec,gmt -0,1970-01-01T00:00:00Z -1,1970-01-01T00:00:01Z -10,1970-01-01T00:00:10Z -100,1970-01-01T00:01:40Z -1000,1970-01-01T00:16:40Z -10000,1970-01-01T02:46:40Z -100000,1970-01-02T03:46:40Z -1000000,1970-01-12T13:46:40Z -10000000,1970-04-26T17:46:40Z -100000000,1973-03-03T09:46:40Z -1000000000,2001-09-09T01:46:40Z -1432036180,2015-05-19T11:49:40Z -1500000000,2017-07-14T02:40:00Z -2000000000,2033-05-18T03:33:20Z - -mlr --csvlite put $sec=gmt2sec($gmt) ./test/input/gmt2sec -gmt,sec -1970-01-01T00:00:00Z,0 -1970-01-01T00:00:01Z,1 -1970-01-01T00:00:10Z,10 -1970-01-01T00:01:40Z,100 -1970-01-01T00:16:40Z,1000 -1970-01-01T02:46:40Z,10000 -1970-01-02T03:46:40Z,100000 -1970-01-12T13:46:40Z,1000000 -1970-04-26T17:46:40Z,10000000 -1973-03-03T09:46:40Z,100000000 -2001-09-09T01:46:40Z,1000000000 -2015-05-19T11:49:40Z,1432036180 -2017-07-14T02:40:00Z,1500000000 -2033-05-18T03:33:20Z,2000000000 - -mlr put $z=min($x, $y) ./test/input/minmax.dkvp -x=1,y=2,z=1.000000 -x=1,y=,z=1.000000 -x=,y=,z= -x=,y=2,z=2.000000 -x=3,y=2,z=2.000000 -x=3,y=,z=3.000000 -x=,y=,z= -x=,y=2,z=2.000000 - -mlr put $z=max($x, $y) ./test/input/minmax.dkvp -x=1,y=2,z=2.000000 -x=1,y=,z=1.000000 -x=,y=,z= -x=,y=2,z=2.000000 -x=3,y=2,z=3.000000 -x=3,y=,z=3.000000 -x=,y=,z= -x=,y=2,z=2.000000 - -mlr --opprint put $hms=sec2hms($sec); $resec=hms2sec($hms); $diff=$resec-$sec ./test/input/sec2xhms -sec hms resec diff -0 00:00:00 0 0.000000 -1 00:00:01 1 0.000000 -59 00:00:59 59 0.000000 -60 00:01:00 60 0.000000 -61 00:01:01 61 0.000000 -3599 00:59:59 3599 0.000000 -3600 01:00:00 3600 0.000000 -3601 01:00:01 3601 0.000000 -86399 23:59:59 86399 0.000000 -86400 24:00:00 86400 0.000000 -86401 24:00:01 86401 0.000000 -863999 239:59:59 863999 0.000000 -864000 240:00:00 864000 0.000000 -864001 240:00:01 864001 0.000000 --1 -00:00:01 -1 0.000000 --59 -00:00:59 -59 0.000000 --60 -00:01:00 -60 0.000000 --61 -00:01:01 -61 0.000000 --3599 -00:59:59 -3599 0.000000 --3600 -01:00:00 -3600 0.000000 --3601 -01:00:01 -3601 0.000000 --86399 -23:59:59 -86399 0.000000 --86400 -24:00:00 -86400 0.000000 --86401 -24:00:01 -86401 0.000000 --863999 -239:59:59 -863999 0.000000 --864000 -240:00:00 -864000 0.000000 --864001 -240:00:01 -864001 0.000000 - -mlr --opprint put $hms=fsec2hms($sec); $resec=hms2fsec($hms); $diff=$resec-$sec ./test/input/fsec2xhms -sec hms resec diff -0.25 0:00:00.250000 0.250000 0.000000 -1.25 0:00:01.250000 1.250000 0.000000 -59.25 0:00:59.250000 59.250000 0.000000 -60.25 0:01:00.250000 60.250000 0.000000 -61.25 0:01:01.250000 61.250000 0.000000 -3599.25 0:59:59.250000 3599.250000 0.000000 -3600.25 1:00:00.250000 3600.250000 0.000000 -3601.25 1:00:01.250000 3601.250000 0.000000 -86399.25 23:59:59.250000 86399.250000 0.000000 -86400.25 24:00:00.250000 86400.250000 0.000000 -86401.25 24:00:01.250000 86401.250000 0.000000 -863999.25 239:59:59.250000 863999.250000 0.000000 -864000.25 240:00:00.250000 864000.250000 0.000000 -864001.25 240:00:01.250000 864001.250000 0.000000 --0.25 -00:00:00.250000 -0.250000 0.000000 --1.25 -00:00:01.250000 -1.250000 0.000000 --59.25 -00:00:59.250000 -59.250000 0.000000 --60.25 -00:01:00.250000 -60.250000 0.000000 --61.25 -00:01:01.250000 -61.250000 0.000000 --3599.25 -00:59:59.250000 -3599.250000 0.000000 --3600.25 -01:00:00.250000 -3600.250000 0.000000 --3601.25 -01:00:01.250000 -3601.250000 0.000000 --86399.25 -23:59:59.250000 -86399.250000 0.000000 --86400.25 -24:00:00.250000 -86400.250000 0.000000 --86401.25 -24:00:01.250000 -86401.250000 0.000000 --863999.25 -239:59:59.250000 -863999.250000 0.000000 --864000.25 -240:00:00.250000 -864000.250000 0.000000 --864001.25 -240:00:01.250000 -864001.250000 0.000000 - -mlr --opprint put $hms=sec2dhms($sec); $resec=dhms2sec($hms); $diff=$resec-$sec ./test/input/sec2xhms -sec hms resec diff -0 0s 0 0.000000 -1 1s 1 0.000000 -59 59s 59 0.000000 -60 1m00s 60 0.000000 -61 1m01s 61 0.000000 -3599 59m59s 3599 0.000000 -3600 1h00m00s 3600 0.000000 -3601 1h00m01s 3601 0.000000 -86399 23h59m59s 86399 0.000000 -86400 1d00h00m00s 86400 0.000000 -86401 1d00h00m01s 86401 0.000000 -863999 9d23h59m59s 863999 0.000000 -864000 10d00h00m00s 864000 0.000000 -864001 10d00h00m01s 864001 0.000000 --1 -1s -1 0.000000 --59 -59s -59 0.000000 --60 -1m00s -60 0.000000 --61 -1m01s -61 0.000000 --3599 -59m59s -3599 0.000000 --3600 -1h00m00s -3600 0.000000 --3601 -1h00m01s -3601 0.000000 --86399 -23h59m59s -86399 0.000000 --86400 -1d00h00m00s -86400 0.000000 --86401 -1d00h00m01s -86401 0.000000 --863999 -9d23h59m59s -863999 0.000000 --864000 -10d00h00m00s -864000 0.000000 --864001 -10d00h00m01s -864001 0.000000 - -mlr --opprint put $hms=fsec2dhms($sec); $resec=dhms2fsec($hms); $diff=$resec-$sec ./test/input/fsec2xhms -sec hms resec diff -0.25 0.250000s 0.250000 0.000000 -1.25 1.250000s 1.250000 0.000000 -59.25 59.250000s 59.250000 0.000000 -60.25 1m00.250000s 60.250000 0.000000 -61.25 1m01.250000s 61.250000 0.000000 -3599.25 59m59.250000s 3599.250000 0.000000 -3600.25 1h00m00.250000s 3600.250000 0.000000 -3601.25 1h00m01.250000s 3601.250000 0.000000 -86399.25 23h59m59.250000s 86399.250000 0.000000 -86400.25 1d00h00m00.250000s 86400.250000 0.000000 -86401.25 1d00h00m01.250000s 86401.250000 0.000000 -863999.25 9d23h59m59.250000s 863999.250000 0.000000 -864000.25 10d00h00m00.250000s 864000.250000 0.000000 -864001.25 10d00h00m01.250000s 864001.250000 0.000000 --0.25 -0.250000s -0.250000 0.000000 --1.25 -1.250000s -1.250000 0.000000 --59.25 -59.250000s -59.250000 0.000000 --60.25 -1m00.250000s -60.250000 0.000000 --61.25 -1m01.250000s -61.250000 0.000000 --3599.25 -59m59.250000s -3599.250000 0.000000 --3600.25 -1h00m00.250000s -3600.250000 0.000000 --3601.25 -1h00m01.250000s -3601.250000 0.000000 --86399.25 -23h59m59.250000s -86399.250000 0.000000 --86400.25 -1d00h00m00.250000s -86400.250000 0.000000 --86401.25 -1d00h00m01.250000s -86401.250000 0.000000 --863999.25 -9d23h59m59.250000s -863999.250000 0.000000 --864000.25 -10d00h00m00.250000s -864000.250000 0.000000 --864001.25 -10d00h00m01.250000s -864001.250000 0.000000 - - -================================================================ -CHAINING - -mlr cat then cat ./test/input/short -a=1 -b=2 -c=3 - -mlr cat then tac ./test/input/short -c=3 -b=2 -a=1 - -mlr tac then cat ./test/input/short -c=3 -b=2 -a=1 - -mlr tac then tac ./test/input/short -a=1 -b=2 -c=3 - -mlr cat then cat then cat ./test/input/short -a=1 -b=2 -c=3 - -mlr cat then cat then tac ./test/input/short -c=3 -b=2 -a=1 - -mlr cat then tac then cat ./test/input/short -c=3 -b=2 -a=1 - -mlr cat then tac then tac ./test/input/short -a=1 -b=2 -c=3 - -mlr tac then cat then cat ./test/input/short -c=3 -b=2 -a=1 - -mlr tac then cat then tac ./test/input/short -a=1 -b=2 -c=3 - -mlr tac then tac then cat ./test/input/short -a=1 -b=2 -c=3 - -mlr tac then tac then tac ./test/input/short -c=3 -b=2 -a=1 - - -================================================================ -HET-CSV INPUT - -mlr --icsvlite --odkvp cat ./test/input/a.csv -a=1,b=2,c=3 -a=4,b=5,c=6 - -mlr --icsvlite --odkvp cat ./test/input/b.csv -d=5,e=6,f=7 - -mlr --icsvlite --odkvp cat ./test/input/c.csv -a=1,b=2,c=3 -a=4,b=5,c=6 -a=7,b=8,c=9 - -mlr --icsvlite --odkvp cat ./test/input/d.csv -h=3,i=4,j=5 -m=8,n=9,o=10 - -mlr --icsvlite --odkvp cat ./test/input/e.csv -a=1,b=2,c=3 -a=4,b=5,c=6 - -mlr --icsvlite --odkvp cat ./test/input/f.csv - -mlr --icsvlite --odkvp cat ./test/input/g.csv - -mlr --icsvlite --odkvp cat ./test/input/a.csv ./test/input/a.csv -a=1,b=2,c=3 -a=4,b=5,c=6 -a=1,b=2,c=3 -a=4,b=5,c=6 - -mlr --icsvlite --odkvp cat ./test/input/b.csv ./test/input/b.csv -d=5,e=6,f=7 -d=5,e=6,f=7 - -mlr --icsvlite --odkvp cat ./test/input/c.csv ./test/input/c.csv -a=1,b=2,c=3 -a=4,b=5,c=6 -a=7,b=8,c=9 -a=1,b=2,c=3 -a=4,b=5,c=6 -a=7,b=8,c=9 - -mlr --icsvlite --odkvp cat ./test/input/d.csv ./test/input/d.csv -h=3,i=4,j=5 -m=8,n=9,o=10 -h=3,i=4,j=5 -m=8,n=9,o=10 - -mlr --icsvlite --odkvp cat ./test/input/e.csv ./test/input/e.csv -a=1,b=2,c=3 -a=4,b=5,c=6 -a=1,b=2,c=3 -a=4,b=5,c=6 - -mlr --icsvlite --odkvp cat ./test/input/f.csv ./test/input/f.csv - -mlr --icsvlite --odkvp cat ./test/input/g.csv ./test/input/g.csv - -mlr --icsvlite --odkvp cat ./test/input/a.csv ./test/input/b.csv -a=1,b=2,c=3 -a=4,b=5,c=6 -d=5,e=6,f=7 - -mlr --icsvlite --odkvp cat ./test/input/b.csv ./test/input/c.csv -d=5,e=6,f=7 -a=1,b=2,c=3 -a=4,b=5,c=6 -a=7,b=8,c=9 - -mlr --icsvlite --odkvp cat ./test/input/c.csv ./test/input/d.csv -a=1,b=2,c=3 -a=4,b=5,c=6 -a=7,b=8,c=9 -h=3,i=4,j=5 -m=8,n=9,o=10 - -mlr --icsvlite --odkvp cat ./test/input/d.csv ./test/input/e.csv -h=3,i=4,j=5 -m=8,n=9,o=10 -a=1,b=2,c=3 -a=4,b=5,c=6 - -mlr --icsvlite --odkvp cat ./test/input/e.csv ./test/input/f.csv -a=1,b=2,c=3 -a=4,b=5,c=6 - -mlr --icsvlite --odkvp cat ./test/input/f.csv ./test/input/g.csv - -mlr --icsvlite --odkvp cat ./test/input/a.csv ./test/input/b.csv ./test/input/c.csv ./test/input/d.csv ./test/input/e.csv ./test/input/f.csv ./test/input/g.csv -a=1,b=2,c=3 -a=4,b=5,c=6 -d=5,e=6,f=7 -a=1,b=2,c=3 -a=4,b=5,c=6 -a=7,b=8,c=9 -h=3,i=4,j=5 -m=8,n=9,o=10 -a=1,b=2,c=3 -a=4,b=5,c=6 - -mlr --icsvlite --odkvp tac ./test/input/het.csv -resource=/some/other/path,loadsec=0.97,ok=false -record_count=150,resource=/path/to/second/file -resource=/path/to/second/file,loadsec=0.32,ok=true -record_count=100,resource=/path/to/file -resource=/path/to/file,loadsec=0.45,ok=true - - -================================================================ -HET-PPRINT INPUT - -mlr --ipprint --odkvp cat ./test/input/a.pprint -a=1,b=2,c=3 -a=4,b=5,c=6 - -mlr --ipprint --odkvp cat ./test/input/b.pprint -d=5,e=6,f=7 - -mlr --ipprint --odkvp cat ./test/input/c.pprint -a=1,b=2,c=3 -a=4,b=5,c=6 -a=7,b=8,c=9 - -mlr --ipprint --odkvp cat ./test/input/d.pprint -h=3,i=4,j=5 -m=8,n=9,o=10 - -mlr --ipprint --odkvp cat ./test/input/e.pprint -a=1,b=2,c=3 -a=4,b=5,c=6 - -mlr --ipprint --odkvp cat ./test/input/f.pprint - -mlr --ipprint --odkvp cat ./test/input/g.pprint - -mlr --ipprint --odkvp cat ./test/input/a.pprint ./test/input/a.pprint -a=1,b=2,c=3 -a=4,b=5,c=6 -a=1,b=2,c=3 -a=4,b=5,c=6 - -mlr --ipprint --odkvp cat ./test/input/b.pprint ./test/input/b.pprint -d=5,e=6,f=7 -d=5,e=6,f=7 - -mlr --ipprint --odkvp cat ./test/input/c.pprint ./test/input/c.pprint -a=1,b=2,c=3 -a=4,b=5,c=6 -a=7,b=8,c=9 -a=1,b=2,c=3 -a=4,b=5,c=6 -a=7,b=8,c=9 - -mlr --ipprint --odkvp cat ./test/input/d.pprint ./test/input/d.pprint -h=3,i=4,j=5 -m=8,n=9,o=10 -h=3,i=4,j=5 -m=8,n=9,o=10 - -mlr --ipprint --odkvp cat ./test/input/e.pprint ./test/input/e.pprint -a=1,b=2,c=3 -a=4,b=5,c=6 -a=1,b=2,c=3 -a=4,b=5,c=6 - -mlr --ipprint --odkvp cat ./test/input/f.pprint ./test/input/f.pprint - -mlr --ipprint --odkvp cat ./test/input/g.pprint ./test/input/g.pprint - -mlr --ipprint --odkvp cat ./test/input/a.pprint ./test/input/b.pprint -a=1,b=2,c=3 -a=4,b=5,c=6 -d=5,e=6,f=7 - -mlr --ipprint --odkvp cat ./test/input/b.pprint ./test/input/c.pprint -d=5,e=6,f=7 -a=1,b=2,c=3 -a=4,b=5,c=6 -a=7,b=8,c=9 - -mlr --ipprint --odkvp cat ./test/input/c.pprint ./test/input/d.pprint -a=1,b=2,c=3 -a=4,b=5,c=6 -a=7,b=8,c=9 -h=3,i=4,j=5 -m=8,n=9,o=10 - -mlr --ipprint --odkvp cat ./test/input/d.pprint ./test/input/e.pprint -h=3,i=4,j=5 -m=8,n=9,o=10 -a=1,b=2,c=3 -a=4,b=5,c=6 - -mlr --ipprint --odkvp cat ./test/input/e.pprint ./test/input/f.pprint -a=1,b=2,c=3 -a=4,b=5,c=6 - -mlr --ipprint --odkvp cat ./test/input/f.pprint ./test/input/g.pprint - -mlr --ipprint --odkvp cat ./test/input/a.pprint ./test/input/b.pprint ./test/input/c.pprint ./test/input/d.pprint ./test/input/e.pprint ./test/input/f.pprint ./test/input/g.pprint -a=1,b=2,c=3 -a=4,b=5,c=6 -d=5,e=6,f=7 -a=1,b=2,c=3 -a=4,b=5,c=6 -a=7,b=8,c=9 -h=3,i=4,j=5 -m=8,n=9,o=10 -a=1,b=2,c=3 -a=4,b=5,c=6 - - -================================================================ -NULL-FIELD INPUT - -mlr --icsvlite --odkvp cat ./test/input/null-fields.csv -a=1,b=2,c=3,d=4,e=5 -a=6,b=,c=,d=,e=10 -a=,b=,c=,d=11,e=12 -a=13,b=14,c=,d=,e= -a=,b=,c=,d=,e= - -mlr --inidx --odkvp cat ./test/input/null-fields.nidx -1=a,2=b,3=c,4=d,5=e -1=f,2=,3=,4=,5=g -1=,2=,3=,4=h,5=i -1=j,2=k,3=,4=,5= -1=,2=,3=,4=,5= - -mlr --idkvp --oxtab cat ./test/input/missings.dkvp -a 1 -b 2 -c 3 -d 4 -e 5 -f 6 - -1 a -b -x 4 -c 3 -5 d -6 e -7 f - -1 a -b -x 4 -c 3 -5 d -6 e -f 6 - - -================================================================ -SPACE-PADDING - -mlr --mmap --idkvp --odkvp --ifs space --repifs cat ./test/input/space-pad.dkvp -a=1,b=2 -c=3 - -mlr --no-mmap --idkvp --odkvp --ifs space --repifs cat ./test/input/space-pad.dkvp -a=1,b=2 -c=3 - -mlr --mmap --inidx --odkvp --ifs space --repifs cat ./test/input/space-pad.nidx -1=a,2=b,3=c -1=d,2=e,3=f - -mlr --no-mmap --inidx --odkvp --ifs space --repifs cat ./test/input/space-pad.nidx -1=a,2=b,3=c -1=d,2=e,3=f - -mlr --mmap --icsvlite --odkvp --ifs space --repifs cat ./test/input/space-pad.pprint -a=1,b=2,c=3 - -mlr --no-mmap --icsvlite --odkvp --ifs space --repifs cat ./test/input/space-pad.pprint -a=1,b=2,c=3 - - -================================================================ -DOUBLE PS - -mlr --no-mmap --opprint cat ./test/input/double-ps.dkvp -a b c -pan wy.e 3 -pan wy=e 3 - -mlr --mmap --opprint cat ./test/input/double-ps.dkvp -a b c -pan wy.e 3 -pan wy=e 3 - - -================================================================ -MISSING FINAL LF - -mlr --no-mmap --csvlite cat ./test/input/truncated.csv -a,b,c -1,2,3 -4,5,6 -7,8,9 - -mlr --csvlite cat ./test/input/truncated.csv -a,b,c -1,2,3 -4,5,6 -7,8,9 - -mlr --no-mmap --dkvp cat ./test/input/truncated.dkvp -a=1,b=2 -c=3 - -mlr --dkvp cat ./test/input/truncated.dkvp -a=1,b=2 -c=3 - -mlr --no-mmap --nidx cat ./test/input/truncated.nidx -1 2 3 -4 5 - -mlr --nidx cat ./test/input/truncated.nidx -1 2 3 -4 5 - -mlr --no-mmap --pprint cat ./test/input/truncated.pprint -a b c -1 2 3 -4 5 6 -7 8 9 - -mlr --pprint cat ./test/input/truncated.pprint -a b c -1 2 3 -4 5 6 -7 8 9 - -mlr --no-mmap --xtab cat ./test/input/truncated.xtab -a 1 -b 2 -c 3 - -d 4 -e 5 - -mlr --xtab cat ./test/input/truncated.xtab -a 1 -b 2 -c 3 - -d 4 -e 5 - - -================================================================ -UTF-8 alignment - -mlr --icsvlite --opprint cat ./test/input/utf8-1.csv -langue nom jour -français françois vendredi - -mlr --icsvlite --opprint cat ./test/input/utf8-2.csv -français françois vendredi -langue nom jour - -mlr --icsvlite --oxtab cat ./test/input/utf8-1.csv -langue français -nom françois -jour vendredi - -mlr --icsvlite --oxtab cat ./test/input/utf8-2.csv -français langue -françois nom -vendredi jour - -mlr --inidx --ifs space --opprint cat ./test/input/utf8-align.nidx -1 2 -191º test -191 test2 -francois français -françois francais - -mlr --inidx --ifs space --opprint --right cat ./test/input/utf8-align.nidx - 1 2 - 191º test - 191 test2 -francois français -françois francais - -mlr --oxtab cat ./test/input/utf8-align.dkvp -191º test -1912 test2 -cois çais -çois cais - -191º test -1912 test2 -ois çais -çois cais - -191º test -1912 test2 -coise çais -çois cais - - -================================================================ -STDIN - -mlr --csv cat -a,b,c -1,x,3 -4,5,6 -x,y"yy,z - - -================================================================ -RFC-CSV - -mlr --csv cat ./test/input/rfc-csv/simple.csv -a,b,c -1,x,3 -4,5,6 -x,y"yy,z - -mlr --csv cat ./test/input/rfc-csv/simple-truncated.csv -a,b,c -1,x,3 -4,5,6 - -mlr --csv cat ./test/input/rfc-csv/narrow.csv -a -1 -2 -3 -4 - -mlr --csv cat ./test/input/rfc-csv/narrow-truncated.csv -a -1 -2 -3 -4 - -mlr --csv cat ./test/input/rfc-csv/quoted-comma.csv -a,b,c -1,"x,3",y -4,5,6 - -mlr --csv cat ./test/input/rfc-csv/quoted-comma-truncated.csv -a,b,c -1,"x,3",y -4,5,6 - -mlr --csv cat ./test/input/rfc-csv/quoted-crlf.csv -a,b,c -1,"x -3",y -4,5,6 - -mlr --csv cat ./test/input/rfc-csv/quoted-crlf-truncated.csv -a,b,c -1,"x -3",y -4,5,6 - -mlr --csv cat ./test/input/rfc-csv/simple-truncated.csv ./test/input/rfc-csv/simple.csv -a,b,c -1,x,3 -4,5,6 -1,x,3 -4,5,6 -x,y"yy,z - diff --git a/c/test/run b/c/test/run index bd85eb1da..72cd5ee9f 100755 --- a/c/test/run +++ b/c/test/run @@ -24,7 +24,7 @@ ourdir=`dirname $verb` path_to_mlr=$ourdir/../mlr indir=$ourdir/input expdir=$ourdir/expected -outdir=$ourdir/output +outdir=./output mkdir -p $outdir rm -f $outdir/out From 008d4adabf1b783e0053acb069689a3a3220a32a Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 19:47:23 -0400 Subject: [PATCH 18/43] test/README.md --- c/test/README.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 c/test/README.md diff --git a/c/test/README.md b/c/test/README.md new file mode 100644 index 000000000..a12a168cb --- /dev/null +++ b/c/test/README.md @@ -0,0 +1,5 @@ +There are two classes of testing for Miller: + +* C source-file names starting with `test_` use MinUnit to **unit-test** various subsystems of interest. These are separate executables built and run by the build framework. + +* `test/run` runs the main `mlr` executable with canned inputs, comparing actual to canned outputs, to **regression-test** Miller's end-to-end operation. From fc64c7510bdda9d99bc453424a2384759aefb318 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 20:07:22 -0400 Subject: [PATCH 19/43] allow ORS/OFS/OPS to be multi-char --- c/cli/mlrcli.c | 82 ++++++++++++++++++++++------------ c/cli/mlrcli.h | 6 +-- c/output/lrec_writer_csv.c | 10 ++--- c/output/lrec_writer_csvlite.c | 22 ++++----- c/output/lrec_writer_dkvp.c | 20 ++++----- c/output/lrec_writer_nidx.c | 18 ++++---- c/output/lrec_writer_pprint.c | 1 + c/output/lrec_writers.h | 8 ++-- 8 files changed, 97 insertions(+), 70 deletions(-) diff --git a/c/cli/mlrcli.c b/c/cli/mlrcli.c index f9df7c4aa..1dbd78d37 100644 --- a/c/cli/mlrcli.c +++ b/c/cli/mlrcli.c @@ -43,9 +43,9 @@ static mapper_setup_t* mapper_lookup_table[] = { static int mapper_lookup_table_length = sizeof(mapper_lookup_table) / sizeof(mapper_lookup_table[0]); // ---------------------------------------------------------------- -#define DEFAULT_RS '\n' -#define DEFAULT_FS ',' -#define DEFAULT_PS '=' +#define DEFAULT_RS "\n" +#define DEFAULT_FS "," +#define DEFAULT_PS "=" #define DEFAULT_OFMT "%lf" @@ -89,8 +89,8 @@ static void main_usage(char* argv0, int exit_code) { fprintf(o, " -p is a keystroke-saver for --nidx --fs space --repifs\n"); fprintf(o, "Separator options, for input, output, or both:\n"); fprintf(o, " --rs --irs --ors Record separators, defaulting to newline\n"); - fprintf(o, " --fs --ifs --ofs --repifs Field separators, defaulting to \"%c\"\n", DEFAULT_FS); - fprintf(o, " --ps --ips --ops Pair separators, defaulting to \"%c\"\n", DEFAULT_PS); + fprintf(o, " --fs --ifs --ofs --repifs Field separators, defaulting to \"%s\"\n", DEFAULT_FS); + fprintf(o, " --ps --ips --ops Pair separators, defaulting to \"%s\"\n", DEFAULT_PS); fprintf(o, " Notes (as of Miller v2.0.0):\n"); fprintf(o, " * RS/FS/PS are used for DKVP, NIDX, and CSVLITE formats where they must be single-character.\n"); fprintf(o, " * For CSV, PPRINT, and XTAB formats, RS/FS/PS command-line options are ignored.\n"); @@ -119,6 +119,14 @@ static void main_usage(char* argv0, int exit_code) { exit(exit_code); } +static char xxx_temp_check_single_char_separator(char* sep, char* argv0) { + if (strlen(sep) != 1) { + main_usage(argv0, 1); + } + return sep[0]; +} + + static void usage_all_verbs(char* argv0) { char* separator = "================================================================"; @@ -143,28 +151,38 @@ static void check_arg_count(char** argv, int argi, int argc, int n) { } } -static char sep_from_arg(char* arg, char* argv0) { +static char* sep_from_arg(char* arg, char* argv0) { + if (streq(arg, "cr")) + return "\r"; + if (streq(arg, "lf")) + return "\n"; + if (streq(arg, "lflf")) + return "\n\n"; + if (streq(arg, "crlf")) + return "\r\n"; + if (streq(arg, "crlfcrlf")) + return "\r\n\r\n"; if (streq(arg, "tab")) - return '\t'; + return "\t"; + if (streq(arg, "tab")) + return "\t"; if (streq(arg, "space")) - return ' '; + return " "; if (streq(arg, "comma")) - return ','; + return ","; if (streq(arg, "newline")) - return '\n'; + return "\n"; if (streq(arg, "pipe")) - return '|'; + return "|"; if (streq(arg, "slash")) - return '/'; + return "/"; if (streq(arg, "colon")) - return ':'; + return ":"; if (streq(arg, "semicolon")) - return '|'; + return "|"; if (streq(arg, "equals")) - return '='; - if (strlen(arg) != 1) - main_usage(argv0, 1); - return arg[0]; + return "="; + return arg; } static mapper_setup_t* look_up_mapper_setup(char* verb) { @@ -182,9 +200,9 @@ cli_opts_t* parse_command_line(int argc, char** argv) { cli_opts_t* popts = mlr_malloc_or_die(sizeof(cli_opts_t)); memset(popts, 0, sizeof(*popts)); - popts->irs = DEFAULT_RS; - popts->ifs = DEFAULT_FS; - popts->ips = DEFAULT_PS; + popts->irs = DEFAULT_RS[0]; // xxx temp + popts->ifs = DEFAULT_FS[0]; + popts->ips = DEFAULT_PS[0]; popts->allow_repeat_ifs = FALSE; popts->allow_repeat_ips = FALSE; @@ -231,12 +249,14 @@ cli_opts_t* parse_command_line(int argc, char** argv) { else if (streq(argv[argi], "--rs")) { check_arg_count(argv, argi, argc, 2); - popts->ors = popts->irs = sep_from_arg(argv[argi+1], argv[0]); + //popts->ors = popts->irs = sep_from_arg(argv[argi+1], argv[0]); // xxx temp + popts->ors = sep_from_arg(argv[argi+1], argv[0]); + popts->irs = xxx_temp_check_single_char_separator(sep_from_arg(argv[argi+1], argv[0]), argv[0]); argi++; } else if (streq(argv[argi], "--irs")) { check_arg_count(argv, argi, argc, 2); - popts->irs = sep_from_arg(argv[argi+1], argv[0]); + popts->irs = xxx_temp_check_single_char_separator(sep_from_arg(argv[argi+1], argv[0]), argv[0]); argi++; } else if (streq(argv[argi], "--ors")) { @@ -247,12 +267,15 @@ cli_opts_t* parse_command_line(int argc, char** argv) { else if (streq(argv[argi], "--fs")) { check_arg_count(argv, argi, argc, 2); - popts->ofs = popts->ifs = sep_from_arg(argv[argi+1], argv[0]); + // xxx temp + //popts->ofs = popts->ifs[0] = sep_from_arg(argv[argi+1], argv[0]); + popts->ofs = sep_from_arg(argv[argi+1], argv[0]); + popts->ifs = xxx_temp_check_single_char_separator(sep_from_arg(argv[argi+1], argv[0]), argv[0]); argi++; } else if (streq(argv[argi], "--ifs")) { check_arg_count(argv, argi, argc, 2); - popts->ifs = sep_from_arg(argv[argi+1], argv[0]); + popts->ifs = xxx_temp_check_single_char_separator(sep_from_arg(argv[argi+1], argv[0]), argv[0]); argi++; } else if (streq(argv[argi], "--ofs")) { @@ -268,18 +291,21 @@ cli_opts_t* parse_command_line(int argc, char** argv) { popts->ifmt = "nidx"; ofmt = "nidx"; popts->ifs = ' '; - popts->ofs = ' '; + popts->ofs = " "; popts->allow_repeat_ifs = TRUE; } else if (streq(argv[argi], "--ps")) { check_arg_count(argv, argi, argc, 2); - popts->ops = popts->ips = sep_from_arg(argv[argi+1], argv[0]); + // xxx temp + // popts->ops = popts->ips[0] = sep_from_arg(argv[argi+1], argv[0]); + popts->ops = sep_from_arg(argv[argi+1], argv[0]); + popts->ips = xxx_temp_check_single_char_separator(sep_from_arg(argv[argi+1], argv[0]), argv[0]); argi++; } else if (streq(argv[argi], "--ips")) { check_arg_count(argv, argi, argc, 2); - popts->ips = sep_from_arg(argv[argi+1], argv[0]); + popts->ips = xxx_temp_check_single_char_separator(sep_from_arg(argv[argi+1], argv[0]), argv[0]); argi++; } else if (streq(argv[argi], "--ops")) { diff --git a/c/cli/mlrcli.h b/c/cli/mlrcli.h index 63477d3a6..3d9209799 100644 --- a/c/cli/mlrcli.h +++ b/c/cli/mlrcli.h @@ -25,9 +25,9 @@ typedef struct _cli_opts_t { int use_mmap_for_read; char* ifmt; - char ors; - char ofs; - char ops; + char* ors; + char* ofs; + char* ops; char* ofmt; int oquoting; diff --git a/c/output/lrec_writer_csv.c b/c/output/lrec_writer_csv.c index c89abe9fc..c805907fb 100644 --- a/c/output/lrec_writer_csv.c +++ b/c/output/lrec_writer_csv.c @@ -13,8 +13,8 @@ static void quote_numeric_output_func(FILE* fp, char* string, char* ors, char* o typedef struct _lrec_writer_csv_state_t { int onr; - char *ors; // xxx char -> char* - char *ofs; // xxx char -> char* + char *ors; + char *ofs; int orslen; int ofslen; quoted_output_func_t* pquoted_output_func; @@ -78,13 +78,13 @@ static void lrec_writer_csv_free(void* pvstate) { } } -lrec_writer_t* lrec_writer_csv_alloc(char ors, char ofs, int oquoting) { +lrec_writer_t* lrec_writer_csv_alloc(char* ors, char* ofs, int oquoting) { lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t)); lrec_writer_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_csv_state_t)); pstate->onr = 0; - //pstate->ors = ors; - //pstate->ofs = ofs; + //pstate->ors = ors; + //pstate->ofs = ofs; pstate->ors = "\r\n"; // xxx temp pstate->ofs = ","; // xxx temp pstate->orslen = strlen(pstate->ors); diff --git a/c/output/lrec_writer_csvlite.c b/c/output/lrec_writer_csvlite.c index c608ffce1..cdf5538dd 100644 --- a/c/output/lrec_writer_csvlite.c +++ b/c/output/lrec_writer_csvlite.c @@ -4,9 +4,9 @@ #include "output/lrec_writers.h" typedef struct _lrec_writer_csvlite_state_t { - int onr; - char ors; - char ofs; + int onr; + char* ors; + char* ofs; long long num_header_lines_output; slls_t* plast_header_output; } lrec_writer_csvlite_state_t; @@ -18,8 +18,8 @@ static void lrec_writer_csvlite_process(FILE* output_stream, lrec_t* prec, void* if (prec == NULL) return; lrec_writer_csvlite_state_t* pstate = pvstate; - char ors = pstate->ors; - char ofs = pstate->ofs; + char* ors = pstate->ors; + char* ofs = pstate->ofs; if (pstate->plast_header_output != NULL) { // xxx make a fcn to compare these w/o copy: put it in mixutil. @@ -27,7 +27,7 @@ static void lrec_writer_csvlite_process(FILE* output_stream, lrec_t* prec, void* slls_free(pstate->plast_header_output); pstate->plast_header_output = NULL; if (pstate->num_header_lines_output > 0LL) - fputc(ors, output_stream); + fputs(ors, output_stream); } } @@ -35,11 +35,11 @@ static void lrec_writer_csvlite_process(FILE* output_stream, lrec_t* prec, void* int nf = 0; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { if (nf > 0) - fputc(ofs, output_stream); + fputs(ofs, output_stream); fputs(pe->key, output_stream); nf++; } - fputc(ors, output_stream); + fputs(ors, output_stream); pstate->plast_header_output = mlr_copy_keys_from_record(prec); pstate->num_header_lines_output++; } @@ -47,11 +47,11 @@ static void lrec_writer_csvlite_process(FILE* output_stream, lrec_t* prec, void* int nf = 0; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { if (nf > 0) - fputc(ofs, output_stream); + fputs(ofs, output_stream); fputs(pe->value, output_stream); nf++; } - fputc(ors, output_stream); + fputs(ors, output_stream); pstate->onr++; lrec_free(prec); // xxx cmt mem-mgmt @@ -65,7 +65,7 @@ static void lrec_writer_csvlite_free(void* pvstate) { } } -lrec_writer_t* lrec_writer_csvlite_alloc(char ors, char ofs) { +lrec_writer_t* lrec_writer_csvlite_alloc(char* ors, char* ofs) { lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t)); lrec_writer_csvlite_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_csvlite_state_t)); diff --git a/c/output/lrec_writer_dkvp.c b/c/output/lrec_writer_dkvp.c index f49d85d2f..3e3f8cc82 100644 --- a/c/output/lrec_writer_dkvp.c +++ b/c/output/lrec_writer_dkvp.c @@ -3,9 +3,9 @@ #include "output/lrec_writers.h" typedef struct _lrec_writer_dkvp_state_t { - char rs; - char fs; - char ps; + char* rs; + char* fs; + char* ps; } lrec_writer_dkvp_state_t; // ---------------------------------------------------------------- @@ -13,27 +13,27 @@ static void lrec_writer_dkvp_process(FILE* output_stream, lrec_t* prec, void* pv if (prec == NULL) return; lrec_writer_dkvp_state_t* pstate = pvstate; - char rs = pstate->rs; - char fs = pstate->fs; - char ps = pstate->ps; + char* rs = pstate->rs; + char* fs = pstate->fs; + char* ps = pstate->ps; int nf = 0; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { if (nf > 0) - fputc(fs, output_stream); + fputs(fs, output_stream); fputs(pe->key, output_stream); - fputc(ps, output_stream); + fputs(ps, output_stream); fputs(pe->value, output_stream); nf++; } - fputc(rs, output_stream); + fputs(rs, output_stream); lrec_free(prec); // xxx cmt mem-mgmt } static void lrec_writer_dkvp_free(void* pvstate) { } -lrec_writer_t* lrec_writer_dkvp_alloc(char rs, char fs, char ps) { +lrec_writer_t* lrec_writer_dkvp_alloc(char* rs, char* fs, char* ps) { lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t)); lrec_writer_dkvp_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_dkvp_state_t)); diff --git a/c/output/lrec_writer_nidx.c b/c/output/lrec_writer_nidx.c index 85b476193..1ea8e6e74 100644 --- a/c/output/lrec_writer_nidx.c +++ b/c/output/lrec_writer_nidx.c @@ -3,8 +3,8 @@ #include "output/lrec_writers.h" typedef struct _lrec_writer_nidx_state_t { - char rs; - char fs; + char* ors; + char* ofs; } lrec_writer_nidx_state_t; // ---------------------------------------------------------------- @@ -12,29 +12,29 @@ static void lrec_writer_nidx_process(FILE* output_stream, lrec_t* prec, void* pv if (prec == NULL) return; lrec_writer_nidx_state_t* pstate = pvstate; - char rs = pstate->rs; - char fs = pstate->fs; + char* ors = pstate->ors; + char* ofs = pstate->ofs; int nf = 0; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { if (nf > 0) - fputc(fs, output_stream); + fputs(ofs, output_stream); fputs(pe->value, output_stream); nf++; } - fputc(rs, output_stream); + fputs(ors, output_stream); lrec_free(prec); // xxx cmt mem-mgmt } static void lrec_writer_nidx_free(void* pvstate) { } -lrec_writer_t* lrec_writer_nidx_alloc(char rs, char fs) { +lrec_writer_t* lrec_writer_nidx_alloc(char* ors, char* ofs) { lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t)); lrec_writer_nidx_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_nidx_state_t)); - pstate->rs = rs; - pstate->fs = fs; + pstate->ors = ors; + pstate->ofs = ofs; plrec_writer->pvstate = (void*)pstate; plrec_writer->pprocess_func = &lrec_writer_nidx_process; diff --git a/c/output/lrec_writer_pprint.c b/c/output/lrec_writer_pprint.c index 2b8a7fd2c..2fff373c6 100644 --- a/c/output/lrec_writer_pprint.c +++ b/c/output/lrec_writer_pprint.c @@ -16,6 +16,7 @@ typedef struct _lrec_writer_pprint_state_t { static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, int left_align); // ---------------------------------------------------------------- +// xxx use ORS here static void lrec_writer_pprint_process(FILE* output_stream, lrec_t* prec, void* pvstate) { lrec_writer_pprint_state_t* pstate = pvstate; diff --git a/c/output/lrec_writers.h b/c/output/lrec_writers.h index 737cfaf0e..8fc4c1653 100644 --- a/c/output/lrec_writers.h +++ b/c/output/lrec_writers.h @@ -2,10 +2,10 @@ #define LREC_WRITERS_H #include "output/lrec_writer.h" -lrec_writer_t* lrec_writer_csv_alloc(char rs, char fs, int oquoting); -lrec_writer_t* lrec_writer_csvlite_alloc(char rs, char fs); -lrec_writer_t* lrec_writer_dkvp_alloc(char rs, char fs, char ps); -lrec_writer_t* lrec_writer_nidx_alloc(char rs, char fs); +lrec_writer_t* lrec_writer_csv_alloc(char* rs, char* fs, int oquoting); +lrec_writer_t* lrec_writer_csvlite_alloc(char* rs, char* fs); +lrec_writer_t* lrec_writer_dkvp_alloc(char* rs, char* fs, char* ps); +lrec_writer_t* lrec_writer_nidx_alloc(char* rs, char* fs); lrec_writer_t* lrec_writer_pprint_alloc(int left_align); lrec_writer_t* lrec_writer_xtab_alloc(); From df0f74471274ba68b3f75f2ed3cc555953b330c1 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 20:39:06 -0400 Subject: [PATCH 20/43] allow ORS/OFS/OPS to be multi-char --- c/cli/mlrcli.c | 113 ++++++++++++++++++++++++---------- c/cli/mlrcli.h | 3 +- c/mapping/mapper_join.c | 4 +- c/output/lrec_writer_pprint.c | 19 +++--- c/output/lrec_writer_xtab.c | 12 ++-- c/output/lrec_writers.h | 12 ++-- 6 files changed, 108 insertions(+), 55 deletions(-) diff --git a/c/cli/mlrcli.c b/c/cli/mlrcli.c index 1dbd78d37..2473aa6f7 100644 --- a/c/cli/mlrcli.c +++ b/c/cli/mlrcli.c @@ -200,15 +200,40 @@ cli_opts_t* parse_command_line(int argc, char** argv) { cli_opts_t* popts = mlr_malloc_or_die(sizeof(cli_opts_t)); memset(popts, 0, sizeof(*popts)); + // xxx integrate these with DEFAULT_XS ... + lhmss_t* default_orses = lhmss_alloc(); + lhmss_put(default_orses, "dkvp", "\n"); + lhmss_put(default_orses, "csv", "\r\n"); + lhmss_put(default_orses, "csvlite", "\n"); + lhmss_put(default_orses, "nidx", "\n"); + lhmss_put(default_orses, "xtab", "\n"); + lhmss_put(default_orses, "pprint", "\n"); + + lhmss_t* default_ofses = lhmss_alloc(); + lhmss_put(default_ofses, "dkvp", ","); + lhmss_put(default_ofses, "csv", ","); + lhmss_put(default_ofses, "csvlite", ","); + lhmss_put(default_ofses, "nidx", " "); + lhmss_put(default_ofses, "xtab", " "); + lhmss_put(default_ofses, "pprint", " "); + + lhmss_t* default_opses = lhmss_alloc(); + lhmss_put(default_opses, "dkvp", "="); + lhmss_put(default_opses, "csv", "X"); + lhmss_put(default_opses, "csvlite", "X"); + lhmss_put(default_opses, "nidx", "X"); + lhmss_put(default_opses, "xtab", "X"); + lhmss_put(default_opses, "pprint", "X"); + popts->irs = DEFAULT_RS[0]; // xxx temp popts->ifs = DEFAULT_FS[0]; popts->ips = DEFAULT_PS[0]; popts->allow_repeat_ifs = FALSE; popts->allow_repeat_ips = FALSE; - popts->ors = DEFAULT_RS; - popts->ofs = DEFAULT_FS; - popts->ops = DEFAULT_PS; + popts->ors = NULL; + popts->ofs = NULL; + popts->ops = NULL; popts->ofmt = DEFAULT_OFMT; popts->oquoting = DEFAULT_OQUOTING; @@ -216,8 +241,8 @@ cli_opts_t* parse_command_line(int argc, char** argv) { popts->plrec_writer = NULL; popts->filenames = NULL; - popts->ifmt = "dkvp"; - char* ofmt = "dkvp"; + popts->ifile_fmt = "dkvp"; + popts->ofile_fmt = "dkvp"; popts->use_mmap_for_read = TRUE; int left_align_pprint = TRUE; @@ -288,8 +313,8 @@ cli_opts_t* parse_command_line(int argc, char** argv) { } else if (streq(argv[argi], "-p")) { - popts->ifmt = "nidx"; - ofmt = "nidx"; + popts->ifile_fmt = "nidx"; + popts->ofile_fmt = "nidx"; popts->ifs = ' '; popts->ofs = " "; popts->allow_repeat_ifs = TRUE; @@ -314,40 +339,40 @@ cli_opts_t* parse_command_line(int argc, char** argv) { argi++; } - else if (streq(argv[argi], "--csv")) { popts->ifmt = ofmt = "csv"; } - else if (streq(argv[argi], "--icsv")) { popts->ifmt = "csv"; } - else if (streq(argv[argi], "--ocsv")) { ofmt = "csv"; } + else if (streq(argv[argi], "--csv")) { popts->ifile_fmt = popts->ofile_fmt = "csv"; } + else if (streq(argv[argi], "--icsv")) { popts->ifile_fmt = "csv"; } + else if (streq(argv[argi], "--ocsv")) { popts->ofile_fmt = "csv"; } - else if (streq(argv[argi], "--csvlite")) { popts->ifmt = ofmt = "csvlite"; } - else if (streq(argv[argi], "--icsvlite")) { popts->ifmt = "csvlite"; } - else if (streq(argv[argi], "--ocsvlite")) { ofmt = "csvlite"; } + else if (streq(argv[argi], "--csvlite")) { popts->ifile_fmt = popts->ofile_fmt = "csvlite"; } + else if (streq(argv[argi], "--icsvlite")) { popts->ifile_fmt = "csvlite"; } + else if (streq(argv[argi], "--ocsvlite")) { popts->ofile_fmt = "csvlite"; } - else if (streq(argv[argi], "--dkvp")) { popts->ifmt = ofmt = "dkvp"; } - else if (streq(argv[argi], "--idkvp")) { popts->ifmt = "dkvp"; } - else if (streq(argv[argi], "--odkvp")) { ofmt = "dkvp"; } + else if (streq(argv[argi], "--dkvp")) { popts->ifile_fmt = popts->ofile_fmt = "dkvp"; } + else if (streq(argv[argi], "--idkvp")) { popts->ifile_fmt = "dkvp"; } + else if (streq(argv[argi], "--odkvp")) { popts->ofile_fmt = "dkvp"; } - else if (streq(argv[argi], "--nidx")) { popts->ifmt = ofmt = "nidx"; } - else if (streq(argv[argi], "--inidx")) { popts->ifmt = "nidx"; } - else if (streq(argv[argi], "--onidx")) { ofmt = "nidx"; } + else if (streq(argv[argi], "--nidx")) { popts->ifile_fmt = popts->ofile_fmt = "nidx"; } + else if (streq(argv[argi], "--inidx")) { popts->ifile_fmt = "nidx"; } + else if (streq(argv[argi], "--onidx")) { popts->ofile_fmt = "nidx"; } - else if (streq(argv[argi], "--xtab")) { popts->ifmt = ofmt = "xtab"; } - else if (streq(argv[argi], "--ixtab")) { popts->ifmt = "xtab"; } - else if (streq(argv[argi], "--oxtab")) { ofmt = "xtab"; } + else if (streq(argv[argi], "--xtab")) { popts->ifile_fmt = popts->ofile_fmt = "xtab"; } + else if (streq(argv[argi], "--ixtab")) { popts->ifile_fmt = "xtab"; } + else if (streq(argv[argi], "--oxtab")) { popts->ofile_fmt = "xtab"; } else if (streq(argv[argi], "--ipprint")) { - popts->ifmt = "csvlite"; + popts->ifile_fmt = "csvlite"; popts->ifs = ' '; popts->allow_repeat_ifs = TRUE; } else if (streq(argv[argi], "--opprint")) { - ofmt = "pprint"; + popts->ofile_fmt = "pprint"; } else if (streq(argv[argi], "--pprint")) { - popts->ifmt = "csvlite"; + popts->ifile_fmt = "csvlite"; popts->ifs = ' '; popts->allow_repeat_ifs = TRUE; - ofmt = "pprint"; + popts->ofile_fmt = "pprint"; } else if (streq(argv[argi], "--right")) { left_align_pprint = FALSE; @@ -355,7 +380,7 @@ cli_opts_t* parse_command_line(int argc, char** argv) { else if (streq(argv[argi], "--ofmt")) { check_arg_count(argv, argi, argc, 2); - popts->ofmt = argv[argi+1]; + popts->ofile_fmt = argv[argi+1]; argi++; } @@ -387,12 +412,32 @@ cli_opts_t* parse_command_line(int argc, char** argv) { nusage(argv[0], argv[argi]); } - if (streq(ofmt, "dkvp")) popts->plrec_writer = lrec_writer_dkvp_alloc(popts->ors, popts->ofs, popts->ops); - else if (streq(ofmt, "csv")) popts->plrec_writer = lrec_writer_csv_alloc(popts->ors, popts->ofs, popts->oquoting); - else if (streq(ofmt, "csvlite")) popts->plrec_writer = lrec_writer_csvlite_alloc(popts->ors, popts->ofs); - else if (streq(ofmt, "nidx")) popts->plrec_writer = lrec_writer_nidx_alloc(popts->ors, popts->ofs); - else if (streq(ofmt, "xtab")) popts->plrec_writer = lrec_writer_xtab_alloc(); - else if (streq(ofmt, "pprint")) popts->plrec_writer = lrec_writer_pprint_alloc(left_align_pprint); + if (popts->ors == NULL) + popts->ors = lhmss_get(default_orses, popts->ofile_fmt); + if (popts->ofs == NULL) + popts->ofs = lhmss_get(default_ofses, popts->ofile_fmt); + if (popts->ops == NULL) + popts->ops = lhmss_get(default_opses, popts->ofile_fmt); + + if (popts->ors == NULL) { + fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); + exit(1); + } + if (popts->ofs == NULL) { + fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); + exit(1); + } + if (popts->ops == NULL) { + fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); + exit(1); + } + + if (streq(popts->ofile_fmt, "dkvp")) popts->plrec_writer = lrec_writer_dkvp_alloc(popts->ors, popts->ofs, popts->ops); + else if (streq(popts->ofile_fmt, "csv")) popts->plrec_writer = lrec_writer_csv_alloc(popts->ors, popts->ofs, popts->oquoting); + else if (streq(popts->ofile_fmt, "csvlite")) popts->plrec_writer = lrec_writer_csvlite_alloc(popts->ors, popts->ofs); + else if (streq(popts->ofile_fmt, "nidx")) popts->plrec_writer = lrec_writer_nidx_alloc(popts->ors, popts->ofs); + else if (streq(popts->ofile_fmt, "xtab")) popts->plrec_writer = lrec_writer_xtab_alloc(popts->ors, popts->ofs); + else if (streq(popts->ofile_fmt, "pprint")) popts->plrec_writer = lrec_writer_pprint_alloc(popts->ors, popts->ofs, left_align_pprint); else { main_usage(argv[0], 1); } @@ -439,7 +484,7 @@ cli_opts_t* parse_command_line(int argc, char** argv) { if (argi == argc) popts->use_mmap_for_read = FALSE; - popts->plrec_reader = lrec_reader_alloc(popts->ifmt, popts->use_mmap_for_read, + popts->plrec_reader = lrec_reader_alloc(popts->ifile_fmt, popts->use_mmap_for_read, popts->irs, popts->ifs, popts->allow_repeat_ifs, popts->ips, popts->allow_repeat_ips); if (popts->plrec_reader == NULL) main_usage(argv[0], 1); diff --git a/c/cli/mlrcli.h b/c/cli/mlrcli.h index 3d9209799..bd38734cd 100644 --- a/c/cli/mlrcli.h +++ b/c/cli/mlrcli.h @@ -23,7 +23,8 @@ typedef struct _cli_opts_t { int allow_repeat_ifs; int allow_repeat_ips; int use_mmap_for_read; - char* ifmt; + char* ifile_fmt; + char* ofile_fmt; char* ors; char* ofs; diff --git a/c/mapping/mapper_join.c b/c/mapping/mapper_join.c index b6d44f7bd..1e0b1e944 100644 --- a/c/mapping/mapper_join.c +++ b/c/mapping/mapper_join.c @@ -35,7 +35,7 @@ typedef struct _mapper_join_opts_t { char ips; int allow_repeat_ifs; int allow_repeat_ips; - char* ifmt; + char* ifile_fmt; int use_mmap_for_read; } mapper_join_opts_t; @@ -237,7 +237,7 @@ static void mapper_join_free(void* pvstate) { static void merge_options(mapper_join_opts_t* popts) { if (popts->input_file_format == NULL) - popts->input_file_format = MLR_GLOBALS.popts->ifmt; + popts->input_file_format = MLR_GLOBALS.popts->ifile_fmt; if (popts->irs == OPTION_UNSPECIFIED) popts->irs = MLR_GLOBALS.popts->irs; if (popts->ifs == OPTION_UNSPECIFIED) diff --git a/c/output/lrec_writer_pprint.c b/c/output/lrec_writer_pprint.c index 2fff373c6..425b4fe5e 100644 --- a/c/output/lrec_writer_pprint.c +++ b/c/output/lrec_writer_pprint.c @@ -11,12 +11,13 @@ typedef struct _lrec_writer_pprint_state_t { slls_t* pprev_keys; int left_align; long long num_blocks_written; + char* ors; + char* ofs; } lrec_writer_pprint_state_t; -static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, int left_align); +static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, char* ors, char* ofs, int left_align); // ---------------------------------------------------------------- -// xxx use ORS here static void lrec_writer_pprint_process(FILE* output_stream, lrec_t* prec, void* pvstate) { lrec_writer_pprint_state_t* pstate = pvstate; @@ -32,8 +33,8 @@ static void lrec_writer_pprint_process(FILE* output_stream, lrec_t* prec, void* if (drain) { if (pstate->num_blocks_written > 0LL) // xxx cmt - fputc('\n', output_stream); - print_and_free_record_list(pstate->precords, output_stream, pstate->left_align); + fputs(pstate->ors, output_stream); + print_and_free_record_list(pstate->precords, output_stream, pstate->ors, pstate->ofs, pstate->left_align); if (pstate->pprev_keys != NULL) { slls_free(pstate->pprev_keys); pstate->pprev_keys = NULL; @@ -49,7 +50,7 @@ static void lrec_writer_pprint_process(FILE* output_stream, lrec_t* prec, void* } // ---------------------------------------------------------------- -static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, int left_align) { +static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, char* ors, char* ofs, int left_align) { if (precords->length == 0) return; lrec_t* prec1 = precords->phead->pvdata; @@ -96,7 +97,7 @@ static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, in fprintf(output_stream, "%s", pe->key); } } - fputc('\n', output_stream); + fputs(ors, output_stream); } j = 0; @@ -123,7 +124,7 @@ static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, in fprintf(output_stream, "%s", value); } } - fputc('\n', output_stream); + fputs(ors, output_stream); lrec_free(prec); // xxx cmt mem-mgmt } @@ -144,12 +145,14 @@ static void lrec_writer_pprint_free(void* pvstate) { } } -lrec_writer_t* lrec_writer_pprint_alloc(int left_align) { +lrec_writer_t* lrec_writer_pprint_alloc(char* ors, char* ofs, int left_align) { lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t)); lrec_writer_pprint_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_pprint_state_t)); pstate->precords = sllv_alloc(); pstate->pprev_keys = NULL; + pstate->ors = ors; + pstate->ofs = ofs; pstate->left_align = left_align; pstate->num_blocks_written = 0LL; diff --git a/c/output/lrec_writer_xtab.c b/c/output/lrec_writer_xtab.c index 39bad0a5a..8eadec118 100644 --- a/c/output/lrec_writer_xtab.c +++ b/c/output/lrec_writer_xtab.c @@ -4,6 +4,8 @@ #include "output/lrec_writers.h" typedef struct _lrec_writer_xtab_state_t { + char* ors; + char* ofs; long long record_count; } lrec_writer_xtab_state_t; @@ -13,7 +15,7 @@ static void lrec_writer_xtab_process(FILE* output_stream, lrec_t* prec, void* pv return; lrec_writer_xtab_state_t* pstate = pvstate; if (pstate->record_count > 0LL) - fprintf(output_stream, "\n"); + fputs(pstate->ors, output_stream); pstate->record_count++; int max_key_width = 1; @@ -28,8 +30,8 @@ static void lrec_writer_xtab_process(FILE* output_stream, lrec_t* prec, void* pv fprintf(output_stream, "%s", pe->key); int d = max_key_width - strlen_for_utf8_display(pe->key); for (int i = 0; i < d; i++) - fputc(' ', output_stream); - fprintf(output_stream, " %s\n", pe->value); + fputs(pstate->ofs, output_stream); + fprintf(output_stream, "%s%s%s", pstate->ofs, pe->value, pstate->ors); } lrec_free(prec); // xxx cmt mem-mgmt } @@ -37,10 +39,12 @@ static void lrec_writer_xtab_process(FILE* output_stream, lrec_t* prec, void* pv static void lrec_writer_xtab_free(void* pvstate) { } -lrec_writer_t* lrec_writer_xtab_alloc() { +lrec_writer_t* lrec_writer_xtab_alloc(char* ors, char* ofs) { lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t)); lrec_writer_xtab_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_xtab_state_t)); + pstate->ors = ors; + pstate->ofs = ofs; pstate->record_count = 0LL; plrec_writer->pvstate = pstate; diff --git a/c/output/lrec_writers.h b/c/output/lrec_writers.h index 8fc4c1653..f96ad2592 100644 --- a/c/output/lrec_writers.h +++ b/c/output/lrec_writers.h @@ -2,11 +2,11 @@ #define LREC_WRITERS_H #include "output/lrec_writer.h" -lrec_writer_t* lrec_writer_csv_alloc(char* rs, char* fs, int oquoting); -lrec_writer_t* lrec_writer_csvlite_alloc(char* rs, char* fs); -lrec_writer_t* lrec_writer_dkvp_alloc(char* rs, char* fs, char* ps); -lrec_writer_t* lrec_writer_nidx_alloc(char* rs, char* fs); -lrec_writer_t* lrec_writer_pprint_alloc(int left_align); -lrec_writer_t* lrec_writer_xtab_alloc(); +lrec_writer_t* lrec_writer_csv_alloc(char* ors, char* ofs, int oquoting); +lrec_writer_t* lrec_writer_csvlite_alloc(char* ors, char* ofs); +lrec_writer_t* lrec_writer_dkvp_alloc(char* ors, char* ofs, char* ops); +lrec_writer_t* lrec_writer_nidx_alloc(char* ors, char* ofs); +lrec_writer_t* lrec_writer_pprint_alloc(char* ors, char*ofs, int left_align); +lrec_writer_t* lrec_writer_xtab_alloc(char* ors, char* ofs); #endif // LREC_WRITERS_H From 05a33261188f37e1c0b0366bbeba8470576a22a9 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 20:48:12 -0400 Subject: [PATCH 21/43] neaten --- c/cli/argparse.c | 2 +- c/containers/lhmslv.c | 4 ++-- c/containers/lrec.c | 12 ++++++------ c/containers/slls.c | 7 +++---- c/containers/slls.h | 2 +- c/input/lrec_reader_stdio_dkvp.c | 2 +- c/lib/mlrutil.c | 4 ++-- c/lib/mlrutil.h | 2 +- c/output/lrec_writer_dkvp.c | 26 +++++++++++++------------- 9 files changed, 30 insertions(+), 31 deletions(-) diff --git a/c/cli/argparse.c b/c/cli/argparse.c index 90b899579..76bc1beca 100644 --- a/c/cli/argparse.c +++ b/c/cli/argparse.c @@ -237,7 +237,7 @@ int main(int argc, char** argv) { if (plist == NULL) { printf("list is null\n"); } else { - char* out = slls_join(plist, ','); + char* out = slls_join(plist, ","); printf("list is %s\n", out); free(out); } diff --git a/c/containers/lhmslv.c b/c/containers/lhmslv.c index 144b9aa83..aa4e7d6b3 100644 --- a/c/containers/lhmslv.c +++ b/c/containers/lhmslv.c @@ -312,7 +312,7 @@ void lhmslv_print(lhmslv_t* pmap) { const char* key_string = (pe == NULL) ? "none" : pe->key == NULL ? "null" : - slls_join(pe->key, ','); + slls_join(pe->key, ","); const char* value_string = (pe == NULL) ? "none" : pe->pvvalue == NULL ? "null" : pe->pvvalue; @@ -327,7 +327,7 @@ void lhmslv_print(lhmslv_t* pmap) { for (lhmslve_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) { const char* key_string = (pe == NULL) ? "none" : pe->key == NULL ? "null" : - slls_join(pe->key, ','); + slls_join(pe->key, ","); const char* value_string = (pe == NULL) ? "none" : pe->pvvalue == NULL ? "null" : pe->pvvalue; diff --git a/c/containers/lrec.c b/c/containers/lrec.c index f779a68c0..f986546ce 100644 --- a/c/containers/lrec.c +++ b/c/containers/lrec.c @@ -428,17 +428,17 @@ lrec_t* lrec_literal_4(char* k1, char* v1, char* k2, char* v2, char* k3, char* v void lrec_print(lrec_t* prec) { FILE* output_stream = stdout; - char rs = '\n'; - char fs = ','; - char ps = '='; + char ors = '\n'; + char ofs = ','; + char ops = '='; int nf = 0; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { if (nf > 0) - fputc(fs, output_stream); + fputc(ofs, output_stream); fputs(pe->key, output_stream); - fputc(ps, output_stream); + fputc(ops, output_stream); fputs(pe->value, output_stream); nf++; } - fputc(rs, output_stream); + fputc(ors, output_stream); } diff --git a/c/containers/slls.c b/c/containers/slls.c index b1935097a..9cafa77e8 100644 --- a/c/containers/slls.c +++ b/c/containers/slls.c @@ -118,17 +118,16 @@ slls_t* slls_from_line(char* line, char ifs, int allow_repeat_ifs) { // ---------------------------------------------------------------- // xxx cmt for debug. inefficient. or fix that. // xxx rename to slls_alloc_join -char* slls_join(slls_t* plist, char fs) { +char* slls_join(slls_t* plist, char* ofs) { int len = 0; for (sllse_t* pe = plist->phead; pe != NULL; pe = pe->pnext) - len += strlen(pe->value) + 1; // include space for fs and null-terminator + len += strlen(pe->value) + 1; // include space for ofs and null-terminator char* output = mlr_malloc_or_die(len); - char sep[2] = {fs, 0}; *output = 0; for (sllse_t* pe = plist->phead; pe != NULL; pe = pe->pnext) { strcat(output, pe->value); if (pe->pnext != NULL) { - strcat(output, sep); + strcat(output, ofs); } } diff --git a/c/containers/slls.h b/c/containers/slls.h index a3f1ddf61..b13ce4743 100644 --- a/c/containers/slls.h +++ b/c/containers/slls.h @@ -38,7 +38,7 @@ int slls_compare_lexically(slls_t* pa, slls_t* pb); void slls_sort(slls_t* plist); // Debug routines: -char* slls_join(slls_t* plist, char fs); +char* slls_join(slls_t* plist, char* ofs); void slls_print(slls_t* plist); #endif // SLLS_H diff --git a/c/input/lrec_reader_stdio_dkvp.c b/c/input/lrec_reader_stdio_dkvp.c index 39b50260c..dd9fc25ea 100644 --- a/c/input/lrec_reader_stdio_dkvp.c +++ b/c/input/lrec_reader_stdio_dkvp.c @@ -52,7 +52,7 @@ lrec_reader_t* lrec_reader_stdio_dkvp_alloc(char irs, char ifs, char ips, int al } // ---------------------------------------------------------------- -// xxx needs checking on repeated occurrences of ps between fs occurrences. don't zero-poke there. +// xxx needs checking on repeated occurrences of ps between ifs occurrences. don't zero-poke there. // // xxx needs abend on null lhs. // diff --git a/c/lib/mlrutil.c b/c/lib/mlrutil.c index bfd8081e0..5521a0090 100644 --- a/c/lib/mlrutil.c +++ b/c/lib/mlrutil.c @@ -227,10 +227,10 @@ int mlr_string_pair_hash_func(char* str1, char* str2) { } // ---------------------------------------------------------------- -char* mlr_get_line(FILE* input_stream, char rs) { +char* mlr_get_line(FILE* input_stream, char irs) { char* line = NULL; size_t linecap = 0; - ssize_t linelen = getdelim(&line, &linecap, rs, input_stream); + ssize_t linelen = getdelim(&line, &linecap, irs, input_stream); if (linelen <= 0) { return NULL; } diff --git a/c/lib/mlrutil.h b/c/lib/mlrutil.h index b166e31f3..6ce9a3d19 100644 --- a/c/lib/mlrutil.h +++ b/c/lib/mlrutil.h @@ -58,7 +58,7 @@ int mlr_string_hash_func(char *str); int mlr_string_pair_hash_func(char* str1, char* str2); // xxx cmt mem mgt -char* mlr_get_line(FILE* input_stream, char rs); +char* mlr_get_line(FILE* input_stream, char irs); // portable timegm replacement time_t mlr_timegm (struct tm *tm); diff --git a/c/output/lrec_writer_dkvp.c b/c/output/lrec_writer_dkvp.c index 3e3f8cc82..ef4a29fff 100644 --- a/c/output/lrec_writer_dkvp.c +++ b/c/output/lrec_writer_dkvp.c @@ -3,9 +3,9 @@ #include "output/lrec_writers.h" typedef struct _lrec_writer_dkvp_state_t { - char* rs; - char* fs; - char* ps; + char* ors; + char* ofs; + char* ops; } lrec_writer_dkvp_state_t; // ---------------------------------------------------------------- @@ -13,33 +13,33 @@ static void lrec_writer_dkvp_process(FILE* output_stream, lrec_t* prec, void* pv if (prec == NULL) return; lrec_writer_dkvp_state_t* pstate = pvstate; - char* rs = pstate->rs; - char* fs = pstate->fs; - char* ps = pstate->ps; + char* ors = pstate->ors; + char* ofs = pstate->ofs; + char* ops = pstate->ops; int nf = 0; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { if (nf > 0) - fputs(fs, output_stream); + fputs(ofs, output_stream); fputs(pe->key, output_stream); - fputs(ps, output_stream); + fputs(ops, output_stream); fputs(pe->value, output_stream); nf++; } - fputs(rs, output_stream); + fputs(ors, output_stream); lrec_free(prec); // xxx cmt mem-mgmt } static void lrec_writer_dkvp_free(void* pvstate) { } -lrec_writer_t* lrec_writer_dkvp_alloc(char* rs, char* fs, char* ps) { +lrec_writer_t* lrec_writer_dkvp_alloc(char* ors, char* ofs, char* ops) { lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t)); lrec_writer_dkvp_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_dkvp_state_t)); - pstate->rs = rs; - pstate->fs = fs; - pstate->ps = ps; + pstate->ors = ors; + pstate->ofs = ofs; + pstate->ops = ops; plrec_writer->pvstate = (void*)pstate; plrec_writer->pprocess_func = &lrec_writer_dkvp_process; From 7c74a27d05d171c5a15e9c7287de406ca91f8c5f Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 21:11:38 -0400 Subject: [PATCH 22/43] update getlines profiler/comparator --- c/Makefile | 2 ++ c/experimental/getlines.c | 49 +++++++++++++++++++++++++-------------- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/c/Makefile b/c/Makefile index c1d240779..080ef4167 100644 --- a/c/Makefile +++ b/c/Makefile @@ -100,7 +100,9 @@ EXPERIMENTAL_READER_SRCS = \ lib/mlrutil.c \ lib/mlr_globals.c \ lib/string_builder.c \ +input/stdio_byte_reader.c \ input/file_reader_mmap.c \ +containers/parse_trie.c \ experimental/getlines.c # ================================================================ diff --git a/c/experimental/getlines.c b/c/experimental/getlines.c index d5e6e6848..7003cd8d7 100644 --- a/c/experimental/getlines.c +++ b/c/experimental/getlines.c @@ -6,7 +6,9 @@ #include "input/file_reader_mmap.h" #include "input/lrec_readers.h" #include "lib/string_builder.h" -#include "input/old_peek_file_reader.h" +#include "input/byte_readers.h" +#include "input/peek_file_reader.h" +#include "containers/parse_trie.h" #define PEEK_BUF_LEN 32 #define STRING_BUILDER_INIT_SIZE 1024 @@ -263,28 +265,41 @@ static int read_file_mmap_psb(char* filename, int do_write) { } // ================================================================ -static char* read_line_pfr_psb(old_peek_file_reader_t* pfr, string_builder_t* psb, char* irs, int irs_len) { +#define IRS_STRIDX 11 +#define EOF_STRIDX 22 + +static char* read_line_pfr_psb(peek_file_reader_t* pfr, string_builder_t* psb, parse_trie_t* ptrie) { + int rc, stridx, matchlen; while (TRUE) { - if (old_pfr_at_eof(pfr)) { - if (sb_is_empty(psb)) - return NULL; - else + pfr_buffer_by(pfr, ptrie->maxlen); + rc = parse_trie_match(ptrie, pfr->peekbuf, pfr->sob, pfr->npeeked, pfr->peekbuflenmask, + &stridx, &matchlen); + if (rc) { + switch(stridx) { + case IRS_STRIDX: return sb_finish(psb); - } else if (old_pfr_next_is(pfr, irs, irs_len)) { - old_pfr_advance_by(pfr, irs_len); - return sb_finish(psb); + break; + case EOF_STRIDX: + return sb_finish(psb); + break; + } } else { - sb_append_char(psb, old_pfr_read_char(pfr)); + //sb_append_char(psb, pfr_read_char(pfr)); + printf("%02x\n", (unsigned)pfr_read_char(pfr)); } } } static int read_file_pfr_psb(char* filename, int do_write) { - FILE* fp = fopen_or_die(filename); - char* irs = "\n"; - int irs_len = strlen(irs); + byte_reader_t* pbr = stdio_byte_reader_alloc(); + pbr->popen_func(pbr, filename); + + peek_file_reader_t* pfr = pfr_alloc(pbr, PEEK_BUF_LEN); + + parse_trie_t* ptrie = parse_trie_alloc(); + parse_trie_add_string(ptrie, "\n", IRS_STRIDX); + parse_trie_add_string(ptrie, "\xff", EOF_STRIDX); - old_peek_file_reader_t* pfr = old_pfr_alloc(fp, PEEK_BUF_LEN); string_builder_t sb; string_builder_t* psb = &sb; sb_init(&sb, STRING_BUILDER_INIT_SIZE); @@ -292,7 +307,7 @@ static int read_file_pfr_psb(char* filename, int do_write) { int bc = 0; while (TRUE) { - char* line = read_line_pfr_psb(pfr, psb, irs, irs_len); + char* line = read_line_pfr_psb(pfr, psb, ptrie); if (line == NULL) break; if (do_write) { @@ -302,7 +317,7 @@ static int read_file_pfr_psb(char* filename, int do_write) { bc += strlen(line); free(line); } - fclose(fp); + pbr->pclose_func(pbr); return bc; } @@ -441,4 +456,4 @@ int main(int argc, char** argv) { // * getc_unlocked vs. fgetc, no-brainer for this single-threaded code. // * string-builder is a little than fixed-length malloc, as expected // -- it's adding value. -// ! old_peek_file_reader is where the optimization opportunities are +// ! peek_file_reader is where the optimization opportunities are From d8683f4f370a9aa3b4f6b05eb8f5e2e1cff288c0 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 21:18:14 -0400 Subject: [PATCH 23/43] update getlines profiler/comparator --- c/experimental/getlines.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/c/experimental/getlines.c b/c/experimental/getlines.c index 7003cd8d7..73dd0a8f7 100644 --- a/c/experimental/getlines.c +++ b/c/experimental/getlines.c @@ -265,8 +265,9 @@ static int read_file_mmap_psb(char* filename, int do_write) { } // ================================================================ -#define IRS_STRIDX 11 -#define EOF_STRIDX 22 +#define IRS_STRIDX 11 +#define EOF_STRIDX 22 +#define IRSEOF_STRIDX 33 static char* read_line_pfr_psb(peek_file_reader_t* pfr, string_builder_t* psb, parse_trie_t* ptrie) { int rc, stridx, matchlen; @@ -275,17 +276,20 @@ static char* read_line_pfr_psb(peek_file_reader_t* pfr, string_builder_t* psb, p rc = parse_trie_match(ptrie, pfr->peekbuf, pfr->sob, pfr->npeeked, pfr->peekbuflenmask, &stridx, &matchlen); if (rc) { + pfr_advance_by(pfr, matchlen); switch(stridx) { case IRS_STRIDX: return sb_finish(psb); break; - case EOF_STRIDX: + case IRSEOF_STRIDX: return sb_finish(psb); break; + case EOF_STRIDX: + return NULL; + break; } } else { - //sb_append_char(psb, pfr_read_char(pfr)); - printf("%02x\n", (unsigned)pfr_read_char(pfr)); + sb_append_char(psb, pfr_read_char(pfr)); } } } @@ -299,6 +303,7 @@ static int read_file_pfr_psb(char* filename, int do_write) { parse_trie_t* ptrie = parse_trie_alloc(); parse_trie_add_string(ptrie, "\n", IRS_STRIDX); parse_trie_add_string(ptrie, "\xff", EOF_STRIDX); + parse_trie_add_string(ptrie, "\n\xff", IRSEOF_STRIDX); string_builder_t sb; string_builder_t* psb = &sb; From ddbaef3994828f6ab7d2b07471554730d7292b94 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Fri, 11 Sep 2015 21:19:02 -0400 Subject: [PATCH 24/43] neaten --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 226f69027..b97812fa5 100644 --- a/.gitignore +++ b/.gitignore @@ -33,4 +33,5 @@ c/dsls/filter_dsl_parse.h c/dsls/filter_dsl_parse.out c/dsls/pdm c/dsls/fdm +c/output/out tags From c7d01dbe24be97ffd56aa423dac3dde7bcaf4b2d Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 13:35:20 -0700 Subject: [PATCH 25/43] neaten --- c/Makefile | 3 +- c/experimental/getlines.c | 81 ++++++++++++++++++++++----------------- 2 files changed, 48 insertions(+), 36 deletions(-) diff --git a/c/Makefile b/c/Makefile index 080ef4167..49a744250 100644 --- a/c/Makefile +++ b/c/Makefile @@ -216,7 +216,8 @@ termcvt: tools/termcvt.c $(CCDEBUG) tools/termcvt.c -o termcvt getl: .always - $(CCDEBUG) $(EXPERIMENTAL_READER_SRCS) -o getl + #$(CCDEBUG) $(EXPERIMENTAL_READER_SRCS) -o getl + $(CCOPT) $(EXPERIMENTAL_READER_SRCS) -o getl # ================================================================ clean: diff --git a/c/experimental/getlines.c b/c/experimental/getlines.c index 73dd0a8f7..5f4cbe96b 100644 --- a/c/experimental/getlines.c +++ b/c/experimental/getlines.c @@ -402,43 +402,45 @@ int main(int argc, char** argv) { // ================================================================ // $ ./getl ../data/big.csv 5|tee x +./getl ../data/big.csv 5|tee x // $ mlr --opprint cat then sort -n t x -// type t n -// getdelim 0.118618 55888899 -// getdelim 0.121467 55888899 -// getdelim 0.121943 55888899 -// getdelim 0.124756 55888899 -// getdelim 0.127039 55888899 -// getc_unlocked_fixed_len 0.167563 55888899 -// getc_unlocked_fixed_len 0.167803 55888899 -// getc_unlocked_fixed_len 0.168808 55888899 -// getc_unlocked_fixed_len 0.168980 55888899 -// getc_unlocked_fixed_len 0.176187 55888899 -// getc_unlocked_psb 0.238986 55888899 -// getc_unlocked_psb 0.241325 55888899 -// getc_unlocked_psb 0.246466 55888899 -// getc_unlocked_psb 0.247592 55888899 -// getc_unlocked_psb 0.248112 55888899 -// mmap_psb 0.250021 55888899 -// mmap_psb 0.254118 55888899 -// mmap_psb 0.257428 55888899 -// mmap_psb 0.261807 55888899 -// mmap_psb 0.264367 55888899 -// pfr_psb 0.760035 55888900 -// pfr_psb 0.765121 55888900 -// pfr_psb 0.768731 55888900 -// pfr_psb 0.771937 55888900 -// pfr_psb 0.780460 55888900 -// fgetc_fixed_len 2.516459 55888899 -// fgetc_fixed_len 2.522877 55888899 -// fgetc_fixed_len 2.587373 55888899 -// fgetc_psb 2.590090 55888899 -// fgetc_psb 2.590536 55888899 -// fgetc_fixed_len 2.608356 55888899 -// fgetc_psb 2.623930 55888899 -// fgetc_fixed_len 2.624310 55888899 -// fgetc_psb 2.637269 55888899 +// type t n type t n +// getdelim 0.118618 55888899 getdelim 0.118057 55888899 +// getdelim 0.121467 55888899 getdelim 0.118727 55888899 +// getdelim 0.121943 55888899 getdelim 0.119609 55888899 +// getdelim 0.124756 55888899 getdelim 0.122506 55888899 +// getdelim 0.127039 55888899 getdelim 0.123099 55888899 +// getc_unlocked_fixed_len 0.167563 55888899 getc_unlocked_fixed_len 0.168109 55888899 +// getc_unlocked_fixed_len 0.167803 55888899 getc_unlocked_fixed_len 0.168392 55888899 +// getc_unlocked_fixed_len 0.168808 55888899 getc_unlocked_fixed_len 0.169387 55888899 +// getc_unlocked_fixed_len 0.168980 55888899 getc_unlocked_fixed_len 0.178484 55888899 +// getc_unlocked_fixed_len 0.176187 55888899 getc_unlocked_fixed_len 0.182793 55888899 +// getc_unlocked_psb 0.238986 55888899 getc_unlocked_psb 0.293240 55888899 +// getc_unlocked_psb 0.241325 55888899 getc_unlocked_psb 0.298449 55888899 +// getc_unlocked_psb 0.246466 55888899 getc_unlocked_psb 0.298508 55888899 +// getc_unlocked_psb 0.247592 55888899 getc_unlocked_psb 0.301125 55888899 +// getc_unlocked_psb 0.248112 55888899 mmap_psb 0.313239 55888899 +// mmap_psb 0.250021 55888899 mmap_psb 0.315061 55888899 +// mmap_psb 0.254118 55888899 mmap_psb 0.315517 55888899 +// mmap_psb 0.257428 55888899 mmap_psb 0.316790 55888899 +// mmap_psb 0.261807 55888899 mmap_psb 0.320654 55888899 +// mmap_psb 0.264367 55888899 getc_unlocked_psb 0.326494 55888899 +// pfr_psb 0.760035 55888900 pfr_psb 0.417141 55888899 +// pfr_psb 0.765121 55888900 pfr_psb 0.439269 55888899 +// pfr_psb 0.768731 55888900 pfr_psb 0.439342 55888899 +// pfr_psb 0.771937 55888900 pfr_psb 0.447218 55888899 +// pfr_psb 0.780460 55888900 pfr_psb 0.453839 55888899 +// fgetc_fixed_len 2.516459 55888899 fgetc_psb 2.476543 55888899 +// fgetc_fixed_len 2.522877 55888899 fgetc_psb 2.477130 55888899 +// fgetc_fixed_len 2.587373 55888899 fgetc_psb 2.484007 55888899 +// fgetc_psb 2.590090 55888899 fgetc_psb 2.484495 55888899 +// fgetc_psb 2.590536 55888899 fgetc_fixed_len 2.493730 55888899 +// fgetc_fixed_len 2.608356 55888899 fgetc_fixed_len 2.528333 55888899 +// fgetc_psb 2.623930 55888899 fgetc_fixed_len 2.533535 55888899 +// fgetc_fixed_len 2.624310 55888899 fgetc_fixed_len 2.555377 55888899 +// fgetc_psb 2.637269 55888899 fgetc_fixed_len 2.736391 55888899 +// fgetc_psb 2.743828 55888899 // $ mlr --opprint cat then stats1 -a min,max,stddev,mean -f t -g type then sort -n t_mean x // type t_min t_max t_stddev t_mean @@ -450,6 +452,15 @@ int main(int argc, char** argv) { // fgetc_fixed_len 2.516459 2.624310 0.049478 2.571875 // fgetc_psb 2.590090 2.680364 0.037489 2.624438 +// type t_min t_max t_stddev t_mean +// getdelim 0.118057 0.123099 0.002271 0.120400 +// getc_unlocked_fixed_len 0.168109 0.182793 0.006768 0.173433 +// getc_unlocked_psb 0.293240 0.326494 0.013134 0.303563 +// mmap_psb 0.313239 0.320654 0.002771 0.316252 +// pfr_psb 0.417141 0.453839 0.013830 0.439362 +// fgetc_psb 2.476543 2.743828 0.117803 2.533201 +// fgetc_fixed_len 2.493730 2.736391 0.095892 2.569473 + // ---------------------------------------------------------------- // Analysis: // * getdelim is good; fatal flaw is single-char line-terminator From d5ff7b50dea6a968bebb7aa2584dda70b3bab04e Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 13:48:26 -0700 Subject: [PATCH 26/43] todo --- c/todo.txt | 79 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 20 deletions(-) diff --git a/c/todo.txt b/c/todo.txt index f8c4908a9..2402c1c47 100644 --- a/c/todo.txt +++ b/c/todo.txt @@ -10,36 +10,75 @@ BUGFIXES ================================================================ TOP OF LIST -* v2.1.0: - o perf - o rs/fs -> csv - o multichar rs/fs/ps for all formats - o optimize csv read perf - o double-quote feature -> dkvp +---------------------------------------------------------------- +MEDIUM +incorp multi-char IXS for CSV + +---------------------------------------------------------------- +MAJOR +autoconfig + +---------------------------------------------------------------- +MAJOR +multi-char separators +k oxs done functionally +* need backslash-handling/parsing ... at least, \r \n \t. and, into online help. +! ixs: + o ips & ifs: needs *p==ixs with strneq(p, ixs); also double-null poke (sos&eos) + o irs for mmap: same + o irs for stdio: it all comes down to getdelim. + ! so focus on getline perf. + ! maybe best idea is to re-impl getdelim with multichar irs. + ! temporary option is getdelim with final char of the multichar irs; strcmp backwards; + usually get it right; occasionally have to strcat/memcpy multiple such. this is + gross so don't do it unless multichar-getdelim doesn't pan out. + +---------------------------------------------------------------- +MAJOR +csv mem-leak/read-perf: +* current option runs faster w/o free, apparently due to heap-fragging +* for stdio, needs some thought ... +* ... but for mmap, it's almost always not necessary to strdup at all: + only on escaped-double-quote case. +* denormalize the pbr & make stdio pbr & ptr-backed (mmap,UT-string) pbr. +* code-dup (yes, sadly!) the CSV reader into two & do strups in stdio + but lrec_put w/ !LREC_FREE_VALUE for ptr-backed. +* or *maybe* pbr retent/free-flags for string/mmap w/o denorm, but only + if it's both elegant and fast + +---------------------------------------------------------------- +MINOR +? dkvp quoting ... maybe wait until after the mmap/perf split + +* go back and re-apply ctype/isprint portability things to new spots + +* more dead-code mains ... lrec-eval; what else? + +* dsls/ build outside of pwd? or just lemon $(absdir)/filenamegoeshere.y? +* configure w/o autotools? likewise manpage. etc. multiple build levels. + +* define dkvp, nidx, etc @ cover x 2 +* mlr faq page + +* --mmap @ mlr -h +* ctype ff @ bld.out +* platform os/ccomp list to mlrdoc + +* -h vs. usage : stdout vs. stderr +* pprint join? + - o make a profiler proggy-pair for getline vs. psb/pkr for simple cat - o rs/fs/ps from char to char* throughout - o parameterize csv rs/fs - o implement mmap-backed psb/pkr via vptr intf and profile that - o RFC "there may be a header" -- ?!? use nidx-style integer-numbered columns?? --no-header? - o DKVP double-quote support - i still need separate csvlite/csv on output since the former tolerates heterogeneity * header-length data mismatch et. al: file/line * make an updated dependency list, esp. in light of a2x et al. * probably its own mlrdoc page ... at least, highlighted in build page -* trie-parse to-do: - o make a power-of-two ring buffer for pfr & trie - -* autoconf -* .deb -* homebrew - ---------------------------------------------------------------- little: +* RFC "there may be a header" -- ?!? use nidx-style integer-numbered columns?? --no-header? + * -Wall -Wextra -Wpedantic-?? Werror=unused-but-set-variable? * --mmap into online help ... From 56b42cf27e460cfdbadd4d9741b0ab45b193a615 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 14:11:22 -0700 Subject: [PATCH 27/43] neaten --- c/cli/mlrcli.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/c/cli/mlrcli.c b/c/cli/mlrcli.c index 2473aa6f7..d624bfd8e 100644 --- a/c/cli/mlrcli.c +++ b/c/cli/mlrcli.c @@ -274,7 +274,6 @@ cli_opts_t* parse_command_line(int argc, char** argv) { else if (streq(argv[argi], "--rs")) { check_arg_count(argv, argi, argc, 2); - //popts->ors = popts->irs = sep_from_arg(argv[argi+1], argv[0]); // xxx temp popts->ors = sep_from_arg(argv[argi+1], argv[0]); popts->irs = xxx_temp_check_single_char_separator(sep_from_arg(argv[argi+1], argv[0]), argv[0]); argi++; @@ -292,8 +291,6 @@ cli_opts_t* parse_command_line(int argc, char** argv) { else if (streq(argv[argi], "--fs")) { check_arg_count(argv, argi, argc, 2); - // xxx temp - //popts->ofs = popts->ifs[0] = sep_from_arg(argv[argi+1], argv[0]); popts->ofs = sep_from_arg(argv[argi+1], argv[0]); popts->ifs = xxx_temp_check_single_char_separator(sep_from_arg(argv[argi+1], argv[0]), argv[0]); argi++; @@ -322,8 +319,6 @@ cli_opts_t* parse_command_line(int argc, char** argv) { else if (streq(argv[argi], "--ps")) { check_arg_count(argv, argi, argc, 2); - // xxx temp - // popts->ops = popts->ips[0] = sep_from_arg(argv[argi+1], argv[0]); popts->ops = sep_from_arg(argv[argi+1], argv[0]); popts->ips = xxx_temp_check_single_char_separator(sep_from_arg(argv[argi+1], argv[0]), argv[0]); argi++; From 1ff438ffb23a88cdf0342210e7c63031e521fdfc Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 14:47:32 -0700 Subject: [PATCH 28/43] multi-char-separator options for CSV --- c/cli/mlrcli.c | 100 +++++++++++++++++------------- c/cli/mlrcli.h | 6 +- c/containers/join_bucket_keeper.c | 6 +- c/containers/join_bucket_keeper.h | 6 +- c/input/lrec_reader_csv.c | 6 +- c/input/lrec_readers.c | 56 ++++++++++++++--- c/input/lrec_readers.h | 8 +-- c/mapping/mapper_join.c | 54 ++++++++-------- 8 files changed, 145 insertions(+), 97 deletions(-) diff --git a/c/cli/mlrcli.c b/c/cli/mlrcli.c index d624bfd8e..966207c19 100644 --- a/c/cli/mlrcli.c +++ b/c/cli/mlrcli.c @@ -119,14 +119,6 @@ static void main_usage(char* argv0, int exit_code) { exit(exit_code); } -static char xxx_temp_check_single_char_separator(char* sep, char* argv0) { - if (strlen(sep) != 1) { - main_usage(argv0, 1); - } - return sep[0]; -} - - static void usage_all_verbs(char* argv0) { char* separator = "================================================================"; @@ -201,33 +193,33 @@ cli_opts_t* parse_command_line(int argc, char** argv) { memset(popts, 0, sizeof(*popts)); // xxx integrate these with DEFAULT_XS ... - lhmss_t* default_orses = lhmss_alloc(); - lhmss_put(default_orses, "dkvp", "\n"); - lhmss_put(default_orses, "csv", "\r\n"); - lhmss_put(default_orses, "csvlite", "\n"); - lhmss_put(default_orses, "nidx", "\n"); - lhmss_put(default_orses, "xtab", "\n"); - lhmss_put(default_orses, "pprint", "\n"); + lhmss_t* default_rses = lhmss_alloc(); + lhmss_put(default_rses, "dkvp", "\n"); + lhmss_put(default_rses, "csv", "\r\n"); + lhmss_put(default_rses, "csvlite", "\n"); + lhmss_put(default_rses, "nidx", "\n"); + lhmss_put(default_rses, "xtab", "\n"); + lhmss_put(default_rses, "pprint", "\n"); - lhmss_t* default_ofses = lhmss_alloc(); - lhmss_put(default_ofses, "dkvp", ","); - lhmss_put(default_ofses, "csv", ","); - lhmss_put(default_ofses, "csvlite", ","); - lhmss_put(default_ofses, "nidx", " "); - lhmss_put(default_ofses, "xtab", " "); - lhmss_put(default_ofses, "pprint", " "); + lhmss_t* default_fses = lhmss_alloc(); + lhmss_put(default_fses, "dkvp", ","); + lhmss_put(default_fses, "csv", ","); + lhmss_put(default_fses, "csvlite", ","); + lhmss_put(default_fses, "nidx", ","); // xxx update to space at version bump + lhmss_put(default_fses, "xtab", " "); + lhmss_put(default_fses, "pprint", " "); - lhmss_t* default_opses = lhmss_alloc(); - lhmss_put(default_opses, "dkvp", "="); - lhmss_put(default_opses, "csv", "X"); - lhmss_put(default_opses, "csvlite", "X"); - lhmss_put(default_opses, "nidx", "X"); - lhmss_put(default_opses, "xtab", "X"); - lhmss_put(default_opses, "pprint", "X"); + lhmss_t* default_pses = lhmss_alloc(); + lhmss_put(default_pses, "dkvp", "="); + lhmss_put(default_pses, "csv", "X"); + lhmss_put(default_pses, "csvlite", "X"); + lhmss_put(default_pses, "nidx", "X"); + lhmss_put(default_pses, "xtab", "X"); + lhmss_put(default_pses, "pprint", "X"); - popts->irs = DEFAULT_RS[0]; // xxx temp - popts->ifs = DEFAULT_FS[0]; - popts->ips = DEFAULT_PS[0]; + popts->irs = NULL; + popts->ifs = NULL; + popts->ips = NULL; popts->allow_repeat_ifs = FALSE; popts->allow_repeat_ips = FALSE; @@ -275,12 +267,12 @@ cli_opts_t* parse_command_line(int argc, char** argv) { else if (streq(argv[argi], "--rs")) { check_arg_count(argv, argi, argc, 2); popts->ors = sep_from_arg(argv[argi+1], argv[0]); - popts->irs = xxx_temp_check_single_char_separator(sep_from_arg(argv[argi+1], argv[0]), argv[0]); + popts->irs = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--irs")) { check_arg_count(argv, argi, argc, 2); - popts->irs = xxx_temp_check_single_char_separator(sep_from_arg(argv[argi+1], argv[0]), argv[0]); + popts->irs = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--ors")) { @@ -292,12 +284,12 @@ cli_opts_t* parse_command_line(int argc, char** argv) { else if (streq(argv[argi], "--fs")) { check_arg_count(argv, argi, argc, 2); popts->ofs = sep_from_arg(argv[argi+1], argv[0]); - popts->ifs = xxx_temp_check_single_char_separator(sep_from_arg(argv[argi+1], argv[0]), argv[0]); + popts->ifs = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--ifs")) { check_arg_count(argv, argi, argc, 2); - popts->ifs = xxx_temp_check_single_char_separator(sep_from_arg(argv[argi+1], argv[0]), argv[0]); + popts->ifs = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--ofs")) { @@ -312,7 +304,7 @@ cli_opts_t* parse_command_line(int argc, char** argv) { else if (streq(argv[argi], "-p")) { popts->ifile_fmt = "nidx"; popts->ofile_fmt = "nidx"; - popts->ifs = ' '; + popts->ifs = " "; popts->ofs = " "; popts->allow_repeat_ifs = TRUE; } @@ -320,12 +312,12 @@ cli_opts_t* parse_command_line(int argc, char** argv) { else if (streq(argv[argi], "--ps")) { check_arg_count(argv, argi, argc, 2); popts->ops = sep_from_arg(argv[argi+1], argv[0]); - popts->ips = xxx_temp_check_single_char_separator(sep_from_arg(argv[argi+1], argv[0]), argv[0]); + popts->ips = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--ips")) { check_arg_count(argv, argi, argc, 2); - popts->ips = xxx_temp_check_single_char_separator(sep_from_arg(argv[argi+1], argv[0]), argv[0]); + popts->ips = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--ops")) { @@ -356,7 +348,7 @@ cli_opts_t* parse_command_line(int argc, char** argv) { else if (streq(argv[argi], "--ipprint")) { popts->ifile_fmt = "csvlite"; - popts->ifs = ' '; + popts->ifs = " "; popts->allow_repeat_ifs = TRUE; } @@ -365,7 +357,7 @@ cli_opts_t* parse_command_line(int argc, char** argv) { } else if (streq(argv[argi], "--pprint")) { popts->ifile_fmt = "csvlite"; - popts->ifs = ' '; + popts->ifs = " "; popts->allow_repeat_ifs = TRUE; popts->ofile_fmt = "pprint"; } @@ -407,12 +399,32 @@ cli_opts_t* parse_command_line(int argc, char** argv) { nusage(argv[0], argv[argi]); } + if (popts->irs == NULL) + popts->irs = lhmss_get(default_rses, popts->ifile_fmt); + if (popts->ifs == NULL) + popts->ifs = lhmss_get(default_fses, popts->ifile_fmt); + if (popts->ips == NULL) + popts->ips = lhmss_get(default_pses, popts->ifile_fmt); + if (popts->ors == NULL) - popts->ors = lhmss_get(default_orses, popts->ofile_fmt); + popts->ors = lhmss_get(default_rses, popts->ofile_fmt); if (popts->ofs == NULL) - popts->ofs = lhmss_get(default_ofses, popts->ofile_fmt); + popts->ofs = lhmss_get(default_fses, popts->ofile_fmt); if (popts->ops == NULL) - popts->ops = lhmss_get(default_opses, popts->ofile_fmt); + popts->ops = lhmss_get(default_pses, popts->ofile_fmt); + + if (popts->irs == NULL) { + fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); + exit(1); + } + if (popts->ifs == NULL) { + fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); + exit(1); + } + if (popts->ips == NULL) { + fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); + exit(1); + } if (popts->ors == NULL) { fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); diff --git a/c/cli/mlrcli.h b/c/cli/mlrcli.h index bd38734cd..6f3bb9b95 100644 --- a/c/cli/mlrcli.h +++ b/c/cli/mlrcli.h @@ -17,9 +17,9 @@ #define QUOTE_NUMERIC 0xb4 typedef struct _cli_opts_t { - char irs; - char ifs; - char ips; + char* irs; + char* ifs; + char* ips; int allow_repeat_ifs; int allow_repeat_ips; int use_mmap_for_read; diff --git a/c/containers/join_bucket_keeper.c b/c/containers/join_bucket_keeper.c index aaff9e232..42d1e7277 100644 --- a/c/containers/join_bucket_keeper.c +++ b/c/containers/join_bucket_keeper.c @@ -47,10 +47,10 @@ join_bucket_keeper_t* join_bucket_keeper_alloc( char* left_file_name, char* input_file_format, int use_mmap_for_read, - char irs, - char ifs, + char* irs, + char* ifs, int allow_repeat_ifs, - char ips, + char* ips, int allow_repeat_ips, slls_t* pleft_field_names ) { diff --git a/c/containers/join_bucket_keeper.h b/c/containers/join_bucket_keeper.h index b2df8fcbb..0b249e482 100644 --- a/c/containers/join_bucket_keeper.h +++ b/c/containers/join_bucket_keeper.h @@ -35,10 +35,10 @@ join_bucket_keeper_t* join_bucket_keeper_alloc( char* left_file_name, char* input_file_format, int use_mmap_for_read, - char irs, - char ifs, + char* irs, + char* ifs, int allow_repeat_ifs, - char ips, + char* ips, int allow_repeat_ips, slls_t* pleft_field_names); diff --git a/c/input/lrec_reader_csv.c b/c/input/lrec_reader_csv.c index 9bc7584b9..098d5c1de 100644 --- a/c/input/lrec_reader_csv.c +++ b/c/input/lrec_reader_csv.c @@ -291,15 +291,15 @@ static void lrec_reader_csv_free(void* pvstate) { } // ---------------------------------------------------------------- -lrec_reader_t* lrec_reader_csv_alloc(byte_reader_t* pbr, char irs, char ifs) { +lrec_reader_t* lrec_reader_csv_alloc(byte_reader_t* pbr, char* irs, char* ifs) { lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t)); lrec_reader_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_csv_state_t)); pstate->ilno = 0LL; pstate->eof = "\xff"; - pstate->irs = "\r\n"; // xxx multi-byte the cli irs/ifs/etc, and integrate here - pstate->ifs = ","; // xxx multi-byte the cli irs/ifs/etc, and integrate here + pstate->irs = irs; + pstate->ifs = ifs; pstate->ifs_eof = mlr_paste_2_strings(pstate->ifs, "\xff"); pstate->dquote = "\""; diff --git a/c/input/lrec_readers.c b/c/input/lrec_readers.c index 5d72f47f5..1cc60447b 100644 --- a/c/input/lrec_readers.c +++ b/c/input/lrec_readers.c @@ -1,35 +1,71 @@ #include "lib/mlrutil.h" +#include "lib/mlr_globals.h" #include "input/lrec_readers.h" #include "input/byte_readers.h" -lrec_reader_t* lrec_reader_alloc(char* fmtdesc, int use_mmap, char irs, char ifs, int allow_repeat_ifs, - char ips, int allow_repeat_ips) +static char xxx_temp_check_single_char_separator(char* name, char* value) { + if (strlen(value) != 1) { + fprintf(stderr, + "%s: multi-character separators are not yet supported for all formats. Got %s=\"%s\".\n", + MLR_GLOBALS.argv0, name, value); + exit(1); + } + return value[0]; +} + +lrec_reader_t* lrec_reader_alloc(char* fmtdesc, int use_mmap, char* irs, char* ifs, int allow_repeat_ifs, + char* ips, int allow_repeat_ips) { // xxx refactor for https://github.com/johnkerl/miller/issues/51 et al. byte_reader_t* pbr = use_mmap ? mmap_byte_reader_alloc() : stdio_byte_reader_alloc(); if (streq(fmtdesc, "dkvp")) { if (use_mmap) - return lrec_reader_mmap_dkvp_alloc(irs, ifs, ips, allow_repeat_ifs); + return lrec_reader_mmap_dkvp_alloc( + xxx_temp_check_single_char_separator("irs", irs), + xxx_temp_check_single_char_separator("ifs", ifs), + xxx_temp_check_single_char_separator("ips", ips), + allow_repeat_ifs); else - return lrec_reader_stdio_dkvp_alloc(irs, ifs, ips, allow_repeat_ifs); + return lrec_reader_stdio_dkvp_alloc( + xxx_temp_check_single_char_separator("irs", irs), + xxx_temp_check_single_char_separator("ifs", ifs), + xxx_temp_check_single_char_separator("ips", ips), + allow_repeat_ifs); } else if (streq(fmtdesc, "csv")) { return lrec_reader_csv_alloc(pbr, irs, ifs); } else if (streq(fmtdesc, "csvlite")) { if (use_mmap) - return lrec_reader_mmap_csvlite_alloc(irs, ifs, allow_repeat_ifs); + return lrec_reader_mmap_csvlite_alloc( + xxx_temp_check_single_char_separator("irs", irs), + xxx_temp_check_single_char_separator("ifs", ifs), + allow_repeat_ifs); else - return lrec_reader_stdio_csvlite_alloc(irs, ifs, allow_repeat_ifs); + return lrec_reader_stdio_csvlite_alloc( + xxx_temp_check_single_char_separator("irs", irs), + xxx_temp_check_single_char_separator("ifs", ifs), + allow_repeat_ifs); } else if (streq(fmtdesc, "nidx")) { if (use_mmap) - return lrec_reader_mmap_nidx_alloc(irs, ifs, allow_repeat_ifs); + return lrec_reader_mmap_nidx_alloc( + xxx_temp_check_single_char_separator("irs", irs), + xxx_temp_check_single_char_separator("ifs", ifs), + allow_repeat_ifs); else - return lrec_reader_stdio_nidx_alloc(irs, ifs, allow_repeat_ifs); + return lrec_reader_stdio_nidx_alloc( + xxx_temp_check_single_char_separator("irs", irs), + xxx_temp_check_single_char_separator("ifs", ifs), + allow_repeat_ifs); } else if (streq(fmtdesc, "xtab")) { if (use_mmap) - return lrec_reader_mmap_xtab_alloc(irs, ips, TRUE/*allow_repeat_ips*/); + return lrec_reader_mmap_xtab_alloc( + xxx_temp_check_single_char_separator("irs", irs), + xxx_temp_check_single_char_separator("ips", ips), + TRUE/*allow_repeat_ips*/); else - return lrec_reader_stdio_xtab_alloc(ips, TRUE); // xxx parameterize allow_repeat_ips + return lrec_reader_stdio_xtab_alloc( + xxx_temp_check_single_char_separator("ips", ips), + TRUE); // xxx parameterize allow_repeat_ips } else { return NULL; } diff --git a/c/input/lrec_readers.h b/c/input/lrec_readers.h index c793492aa..0ef8c28ef 100644 --- a/c/input/lrec_readers.h +++ b/c/input/lrec_readers.h @@ -6,12 +6,12 @@ // ---------------------------------------------------------------- // Primary entry points -// fmtdesc: "dkvp", "csv", "nidx", "xtab". -lrec_reader_t* lrec_reader_alloc(char* fmtdesc, int use_mmap, char irs, char ifs, int allow_repeat_ifs, - char ips, int allow_repeat_ips); +// Factory method. fmtdesc: "dkvp", "nidx", "csv", "csvlite", "nidx", "xtab". +lrec_reader_t* lrec_reader_alloc(char* fmtdesc, int use_mmap, char* irs, char* ifs, int allow_repeat_ifs, + char* ips, int allow_repeat_ips); lrec_reader_t* lrec_reader_stdio_csvlite_alloc(char irs, char ifs, int allow_repeat_ifs); -lrec_reader_t* lrec_reader_csv_alloc(byte_reader_t* pbr, char irs, char ifs); +lrec_reader_t* lrec_reader_csv_alloc(byte_reader_t* pbr, char* irs, char* ifs); lrec_reader_t* lrec_reader_stdio_dkvp_alloc(char irs, char ifs, char ips, int allow_repeat_ifs); lrec_reader_t* lrec_reader_stdio_nidx_alloc(char irs, char ifs, int allow_repeat_ifs); lrec_reader_t* lrec_reader_stdio_xtab_alloc(char ips, int allow_repeat_ips); diff --git a/c/mapping/mapper_join.c b/c/mapping/mapper_join.c index 1e0b1e944..d9248335a 100644 --- a/c/mapping/mapper_join.c +++ b/c/mapping/mapper_join.c @@ -30,9 +30,9 @@ typedef struct _mapper_join_opts_t { // These allow the joiner to have its own different format/delimiter for // the left-file: char* input_file_format; - char irs; - char ifs; - char ips; + char* irs; + char* ifs; + char* ips; int allow_repeat_ifs; int allow_repeat_ips; char* ifile_fmt; @@ -238,11 +238,11 @@ static void mapper_join_free(void* pvstate) { static void merge_options(mapper_join_opts_t* popts) { if (popts->input_file_format == NULL) popts->input_file_format = MLR_GLOBALS.popts->ifile_fmt; - if (popts->irs == OPTION_UNSPECIFIED) + if (popts->irs == NULL) popts->irs = MLR_GLOBALS.popts->irs; - if (popts->ifs == OPTION_UNSPECIFIED) + if (popts->ifs == NULL) popts->ifs = MLR_GLOBALS.popts->ifs; - if (popts->ips == OPTION_UNSPECIFIED) + if (popts->ips == NULL) popts->ips = MLR_GLOBALS.popts->ips; if (popts->allow_repeat_ifs == OPTION_UNSPECIFIED) popts->allow_repeat_ifs = MLR_GLOBALS.popts->allow_repeat_ifs; @@ -360,9 +360,9 @@ static mapper_t* mapper_join_parse_cli(int* pargi, int argc, char** argv) { popts->emit_right_unpairables = FALSE; popts->input_file_format = NULL; - popts->irs = OPTION_UNSPECIFIED; - popts->ifs = OPTION_UNSPECIFIED; - popts->ips = OPTION_UNSPECIFIED; + popts->irs = NULL; + popts->ifs = NULL; + popts->ips = NULL; popts->allow_repeat_ifs = OPTION_UNSPECIFIED; popts->allow_repeat_ips = OPTION_UNSPECIFIED; popts->use_mmap_for_read = OPTION_UNSPECIFIED; @@ -370,25 +370,25 @@ static mapper_t* mapper_join_parse_cli(int* pargi, int argc, char** argv) { char* verb = argv[(*pargi)++]; ap_state_t* pstate = ap_alloc(); - ap_define_string_flag(pstate, "-f", &popts->left_file_name); - ap_define_string_list_flag(pstate, "-j", &popts->poutput_join_field_names); - ap_define_string_list_flag(pstate, "-l", &popts->pleft_join_field_names); - ap_define_string_list_flag(pstate, "-r", &popts->pright_join_field_names); - ap_define_string_flag(pstate, "--lp", &popts->left_prefix); - ap_define_string_flag(pstate, "--rp", &popts->right_prefix); - ap_define_false_flag(pstate, "--np", &popts->emit_pairables); - ap_define_true_flag(pstate, "--ul", &popts->emit_left_unpairables); - ap_define_true_flag(pstate, "--ur", &popts->emit_right_unpairables); - ap_define_true_flag(pstate, "-u", &popts->allow_unsorted_input); + ap_define_string_flag(pstate, "-f", &popts->left_file_name); + ap_define_string_list_flag(pstate, "-j", &popts->poutput_join_field_names); + ap_define_string_list_flag(pstate, "-l", &popts->pleft_join_field_names); + ap_define_string_list_flag(pstate, "-r", &popts->pright_join_field_names); + ap_define_string_flag(pstate, "--lp", &popts->left_prefix); + ap_define_string_flag(pstate, "--rp", &popts->right_prefix); + ap_define_false_flag(pstate, "--np", &popts->emit_pairables); + ap_define_true_flag(pstate, "--ul", &popts->emit_left_unpairables); + ap_define_true_flag(pstate, "--ur", &popts->emit_right_unpairables); + ap_define_true_flag(pstate, "-u", &popts->allow_unsorted_input); - ap_define_string_flag(pstate, "-i", &popts->input_file_format); - ap_define_char_flag(pstate, "--irs", &popts->irs); - ap_define_char_flag(pstate, "--ifs", &popts->ifs); - ap_define_char_flag(pstate, "--ips", &popts->ips); - ap_define_true_flag(pstate, "--repifs", &popts->allow_repeat_ifs); - ap_define_true_flag(pstate, "--repips", &popts->allow_repeat_ips); - ap_define_true_flag(pstate, "--use-mmap", &popts->use_mmap_for_read); - ap_define_false_flag(pstate, "--no-mmap", &popts->use_mmap_for_read); + ap_define_string_flag(pstate, "-i", &popts->input_file_format); + ap_define_string_flag(pstate, "--irs", &popts->irs); + ap_define_string_flag(pstate, "--ifs", &popts->ifs); + ap_define_string_flag(pstate, "--ips", &popts->ips); + ap_define_true_flag(pstate, "--repifs", &popts->allow_repeat_ifs); + ap_define_true_flag(pstate, "--repips", &popts->allow_repeat_ips); + ap_define_true_flag(pstate, "--use-mmap", &popts->use_mmap_for_read); + ap_define_false_flag(pstate, "--no-mmap", &popts->use_mmap_for_read); if (!ap_parse(pstate, verb, pargi, argc, argv)) { mapper_join_usage(argv[0], verb); From a65ded878681a262c0aac59f3c0eb8fb44f95da2 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 14:53:40 -0700 Subject: [PATCH 29/43] todo --- c/todo.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/c/todo.txt b/c/todo.txt index 2402c1c47..449107210 100644 --- a/c/todo.txt +++ b/c/todo.txt @@ -13,6 +13,7 @@ TOP OF LIST ---------------------------------------------------------------- MEDIUM incorp multi-char IXS for CSV +-> functionally done; needs mlr -h, mlrdoc, and UT. ---------------------------------------------------------------- MAJOR From 643905777deaaf30bbffd42f29e1d9e5cae47d25 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 14:55:05 -0700 Subject: [PATCH 30/43] todo --- c/todo.txt | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/c/todo.txt b/c/todo.txt index 449107210..ad800904e 100644 --- a/c/todo.txt +++ b/c/todo.txt @@ -1,11 +1,7 @@ ================================================================ BUGFIXES -! memory leak in csv reader! careful about slls data, and do not use lrec_put_no_free --> heap-fragging? --> redo inline-pasting but this time correctly weight the fragging effect --> denormalize :( pointer-copying is fine for string/mmap-backed cases in the absence of dquotes; - no struping needed *at all*. +:D ================================================================ TOP OF LIST @@ -14,6 +10,7 @@ TOP OF LIST MEDIUM incorp multi-char IXS for CSV -> functionally done; needs mlr -h, mlrdoc, and UT. +-> then close it out ---------------------------------------------------------------- MAJOR @@ -38,6 +35,8 @@ k oxs done functionally MAJOR csv mem-leak/read-perf: * current option runs faster w/o free, apparently due to heap-fragging + o memory leak in csv reader! careful about slls data, and do not use lrec_put_no_free + o redo inline-pasting but this time correctly weight the fragging effect * for stdio, needs some thought ... * ... but for mmap, it's almost always not necessary to strdup at all: only on escaped-double-quote case. From 624e4f08555b92077dd04c98867ffbff3ffa1a83 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 15:01:20 -0700 Subject: [PATCH 31/43] todo --- c/todo.txt | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/c/todo.txt b/c/todo.txt index ad800904e..ce71115e1 100644 --- a/c/todo.txt +++ b/c/todo.txt @@ -7,18 +7,15 @@ BUGFIXES TOP OF LIST ---------------------------------------------------------------- -MEDIUM -incorp multi-char IXS for CSV +MEDIUM: incorp multi-char IXS for CSV -> functionally done; needs mlr -h, mlrdoc, and UT. -> then close it out ---------------------------------------------------------------- -MAJOR -autoconfig +MAJOR: autoconfig ---------------------------------------------------------------- -MAJOR -multi-char separators +MAJOR: multi-char separators for file formats other than CSV k oxs done functionally * need backslash-handling/parsing ... at least, \r \n \t. and, into online help. ! ixs: @@ -32,8 +29,7 @@ k oxs done functionally gross so don't do it unless multichar-getdelim doesn't pan out. ---------------------------------------------------------------- -MAJOR -csv mem-leak/read-perf: +MAJOR: csv mem-leak/read-perf * current option runs faster w/o free, apparently due to heap-fragging o memory leak in csv reader! careful about slls data, and do not use lrec_put_no_free o redo inline-pasting but this time correctly weight the fragging effect @@ -48,6 +44,7 @@ csv mem-leak/read-perf: ---------------------------------------------------------------- MINOR + ? dkvp quoting ... maybe wait until after the mmap/perf split * go back and re-apply ctype/isprint portability things to new spots From e8f852bb6f855b46c5bfb234c1d9bf10935ff947 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 16:03:00 -0700 Subject: [PATCH 32/43] let test/run work either with or without autoconfig --- c/test/run | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/c/test/run b/c/test/run index 72cd5ee9f..c5a8b1478 100755 --- a/c/test/run +++ b/c/test/run @@ -13,6 +13,16 @@ # ================================================================ set -e +# For building with autoconf: +# * in-directory build: +# pwd is /path/to/source/tree/c/test +# path to mlr is /path/to/source/tree/c/mlr +# path to test/run is /path/to/source/tree/c/test/run +# * out-of-directory ("VPATH") build: +# pwd is /path/to/build/tree/c/test +# path to mlr is /path/to/build/tree/c/mlr +# path to test/run is /path/to/source/tree/c/test/run + verb=$0 # Note: this is invoked from the Makefile as "./test/run". Without this, f you # invoke it as "test/run" or "sh test/run" then it will produce diff errors. @@ -21,10 +31,14 @@ if [ "$verb" = "test/run" ]; then fi ourdir=`dirname $verb` -path_to_mlr=$ourdir/../mlr +path_to_mlr=`pwd`/mlr +if [ ! -f "$path_to_mlr" ]; then + path_to_mlr=$ourdir/../mlr +fi + indir=$ourdir/input expdir=$ourdir/expected -outdir=./output +outdir=$ourdir/output mkdir -p $outdir rm -f $outdir/out @@ -48,7 +62,7 @@ run_mlr() { # Use just "mlr" for info messages echo mlr "$@" echo mlr "$@" >> $outdir/out - # Use path to mlr for invokving the command + # Use path to mlr for invoking the command $path_to_mlr "$@" >> $outdir/out echo >> $outdir/out # since set -e From fd6329c1e1100fdb9aaa0d44499a78ab80c5f1a6 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 16:55:01 -0700 Subject: [PATCH 33/43] proper invocations for test/run, for autoconf/non-autoconf --- c/test/run | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/c/test/run b/c/test/run index c5a8b1478..5314dbe26 100755 --- a/c/test/run +++ b/c/test/run @@ -15,26 +15,36 @@ set -e # For building with autoconf: # * in-directory build: -# pwd is /path/to/source/tree/c/test -# path to mlr is /path/to/source/tree/c/mlr -# path to test/run is /path/to/source/tree/c/test/run +# pwd is /path/to/the/tree/c/test +# path to mlr is /path/to/the/tree/c/mlr +# path to test/run is /path/to/the/tree/c/test/run +# # * out-of-directory ("VPATH") build: # pwd is /path/to/build/tree/c/test # path to mlr is /path/to/build/tree/c/mlr # path to test/run is /path/to/source/tree/c/test/run -verb=$0 -# Note: this is invoked from the Makefile as "./test/run". Without this, f you -# invoke it as "test/run" or "sh test/run" then it will produce diff errors. -if [ "$verb" = "test/run" ]; then - verb=./test/run -fi -ourdir=`dirname $verb` +# For building without autoconf: +# pwd is /does/not/matter +# path to mlr is /path/to/the/tree/c/mlr +# path to test/run is /path/to/the/tree/c/test/run -path_to_mlr=`pwd`/mlr +ourdir=`dirname $0` +srcdir=$ourdir/../.. +pwd=`pwd` + +# For autoconf builds, in-tree or out-of-tree: +path_to_mlr=$pwd/.. if [ ! -f "$path_to_mlr" ]; then - path_to_mlr=$ourdir/../mlr + # For non-autoconf builds: + path_to_mlr=$srcdir/c/mlr + echo TRY 2 IS $path_to_mlr fi +if [ ! -f "$path_to_mlr" ]; then + echo "$0: Could not find path to mlr executable." 1>&2 + exit 1 +fi +echo Using mlr executable $path_to_mlr indir=$ourdir/input expdir=$ourdir/expected From da85e0d52ec8da07cbdd8e54268a1233fbe4aaf1 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 17:00:56 -0700 Subject: [PATCH 34/43] proper invocations for test/run, for autoconf/non-autoconf --- c/test/run | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/c/test/run b/c/test/run index 5314dbe26..10f6e70b8 100755 --- a/c/test/run +++ b/c/test/run @@ -34,13 +34,12 @@ srcdir=$ourdir/../.. pwd=`pwd` # For autoconf builds, in-tree or out-of-tree: -path_to_mlr=$pwd/.. -if [ ! -f "$path_to_mlr" ]; then +path_to_mlr=$pwd/../mlr +if [ ! -x "$path_to_mlr" ]; then # For non-autoconf builds: path_to_mlr=$srcdir/c/mlr - echo TRY 2 IS $path_to_mlr fi -if [ ! -f "$path_to_mlr" ]; then +if [ ! -x "$path_to_mlr" ]; then echo "$0: Could not find path to mlr executable." 1>&2 exit 1 fi From 26de29c2c9c706a97a7fb83e9464b06ec6ad32f3 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 17:21:25 -0700 Subject: [PATCH 35/43] neaten --- c/test/run | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/c/test/run b/c/test/run index 10f6e70b8..9f7dd7b6f 100755 --- a/c/test/run +++ b/c/test/run @@ -33,14 +33,16 @@ ourdir=`dirname $0` srcdir=$ourdir/../.. pwd=`pwd` -# For autoconf builds, in-tree or out-of-tree: -path_to_mlr=$pwd/../mlr -if [ ! -x "$path_to_mlr" ]; then - # For non-autoconf builds: - path_to_mlr=$srcdir/c/mlr -fi -if [ ! -x "$path_to_mlr" ]; then +try1=$pwd/../mlr # For autoconf builds, in-tree or out-of-tree +try2=$srcdir/c/mlr # For non-autoconf builds +if [ -x "$try1" ]; then + path_to_mlr=$try1 +elif [ -x "$try2" ]; then + path_to_mlr=$try2 +else echo "$0: Could not find path to mlr executable." 1>&2 + echo "Try 1: $try1" 1>&2 + echo "Try 2: $try2" 1>&2 exit 1 fi echo Using mlr executable $path_to_mlr From d58ff99862bb8f9c4d28397b6eab9f21216f977b Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 18:10:39 -0700 Subject: [PATCH 36/43] on-line help for separators --- c/cli/mlrcli.c | 94 +++++++++++++++++++++++++------------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/c/cli/mlrcli.c b/c/cli/mlrcli.c index 966207c19..e2d3642de 100644 --- a/c/cli/mlrcli.c +++ b/c/cli/mlrcli.c @@ -43,10 +43,36 @@ static mapper_setup_t* mapper_lookup_table[] = { static int mapper_lookup_table_length = sizeof(mapper_lookup_table) / sizeof(mapper_lookup_table[0]); // ---------------------------------------------------------------- -#define DEFAULT_RS "\n" -#define DEFAULT_FS "," -#define DEFAULT_PS "=" +static lhmss_t* pdesc_to_chars_map = NULL; +static lhmss_t* get_desc_to_chars_map() { + if (pdesc_to_chars_map == NULL) { + pdesc_to_chars_map = lhmss_alloc(); + lhmss_put(pdesc_to_chars_map, "cr", "\r"); + lhmss_put(pdesc_to_chars_map, "lf", "\n"); + lhmss_put(pdesc_to_chars_map, "lflf", "\n\n"); + lhmss_put(pdesc_to_chars_map, "crlf", "\r\n"); + lhmss_put(pdesc_to_chars_map, "crlfcrlf", "\r\n\r\n"); + lhmss_put(pdesc_to_chars_map, "tab", "\t"); + lhmss_put(pdesc_to_chars_map, "space", " "); + lhmss_put(pdesc_to_chars_map, "comma", ","); + lhmss_put(pdesc_to_chars_map, "newline", "\n"); + lhmss_put(pdesc_to_chars_map, "pipe", "|"); + lhmss_put(pdesc_to_chars_map, "slash", "/"); + lhmss_put(pdesc_to_chars_map, "colon", ":"); + lhmss_put(pdesc_to_chars_map, "semicolon", "|"); + lhmss_put(pdesc_to_chars_map, "equals", "="); + } + return pdesc_to_chars_map; +} +static char* sep_from_arg(char* arg, char* argv0) { + char* chars = lhmss_get(get_desc_to_chars_map(), arg); + if (chars != NULL) + return chars; + else + return arg; +} +// ---------------------------------------------------------------- #define DEFAULT_OFMT "%lf" #define DEFAULT_OQUOTING QUOTE_MINIMAL @@ -88,18 +114,26 @@ static void main_usage(char* argv0, int exit_code) { fprintf(o, " --xtab --ixtab --oxtab Pretty-printed vertical-tabular\n"); fprintf(o, " -p is a keystroke-saver for --nidx --fs space --repifs\n"); fprintf(o, "Separator options, for input, output, or both:\n"); - fprintf(o, " --rs --irs --ors Record separators, defaulting to newline\n"); - fprintf(o, " --fs --ifs --ofs --repifs Field separators, defaulting to \"%s\"\n", DEFAULT_FS); - fprintf(o, " --ps --ips --ops Pair separators, defaulting to \"%s\"\n", DEFAULT_PS); - fprintf(o, " Notes (as of Miller v2.0.0):\n"); - fprintf(o, " * RS/FS/PS are used for DKVP, NIDX, and CSVLITE formats where they must be single-character.\n"); - fprintf(o, " * For CSV, PPRINT, and XTAB formats, RS/FS/PS command-line options are ignored.\n"); + fprintf(o, " --rs --irs --ors Record separators, e.g. newline\n"); + fprintf(o, " --fs --ifs --ofs --repifs Field separators, e.g. comma\n"); + fprintf(o, " --ps --ips --ops Pair separators, e.g. equals sign\n"); + fprintf(o, " Notes (as of Miller v2.1.4):\n"); + fprintf(o, " * IRS,IFS,IPS,ORS,OFS,OPS are specifiable for all file formats.\n"); + fprintf(o, " * IRS,IFS,IPS may be multi-character for CSV; they must be single-character for other formats.\n"); + fprintf(o, " The latter restriction will be lifted in a near-future release.\n"); + fprintf(o, " * ORS,OFS,OPS may be multi-character for all formats.\n"); fprintf(o, " * DKVP, NIDX, CSVLITE, PPRINT, and XTAB formats are intended to handle platform-native text data.\n"); - fprintf(o, " In particular, this means LF line-terminators on Linux/OSX.\n"); + fprintf(o, " In particular, this means LF line-terminators by default on Linux/OSX.\n"); fprintf(o, " * CSV is intended to handle RFC-4180-compliant data.\n"); - fprintf(o, " In particular, this means it *only* handles CRLF line-terminators.\n"); - fprintf(o, " * This will change in v2.1.0, at which point there will be a (default-off) LF-termination option\n"); - fprintf(o, " for CSV, multi-char RS/FS/PS, and double-quote support for DKVP.\n"); + fprintf(o, " In particular, this means it uses CRLF line-terminators by default.\n"); + fprintf(o, " So, you can use \"--csv --rs lf\" for Linux-native CSV files.\n"); + fprintf(o, " * You can use \"--fs '|'\", \"--ips :\", etc., or any of the following names for separators:\n"); + fprintf(o, " "); + lhmss_t* pmap = get_desc_to_chars_map(); + for (lhmsse_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) { + fprintf(o, " %s", pe->key); + } + fprintf(o, "\n"); fprintf(o, "Double-quoting for CSV:\n"); fprintf(o, " --quote-all Wrap all fields in double quotes\n"); fprintf(o, " --quote-none Do not wrap any fields in double quotes, even if they have OFS or ORS in them\n"); @@ -143,40 +177,6 @@ static void check_arg_count(char** argv, int argi, int argc, int n) { } } -static char* sep_from_arg(char* arg, char* argv0) { - if (streq(arg, "cr")) - return "\r"; - if (streq(arg, "lf")) - return "\n"; - if (streq(arg, "lflf")) - return "\n\n"; - if (streq(arg, "crlf")) - return "\r\n"; - if (streq(arg, "crlfcrlf")) - return "\r\n\r\n"; - if (streq(arg, "tab")) - return "\t"; - if (streq(arg, "tab")) - return "\t"; - if (streq(arg, "space")) - return " "; - if (streq(arg, "comma")) - return ","; - if (streq(arg, "newline")) - return "\n"; - if (streq(arg, "pipe")) - return "|"; - if (streq(arg, "slash")) - return "/"; - if (streq(arg, "colon")) - return ":"; - if (streq(arg, "semicolon")) - return "|"; - if (streq(arg, "equals")) - return "="; - return arg; -} - static mapper_setup_t* look_up_mapper_setup(char* verb) { mapper_setup_t* pmapper_setup = NULL; for (int i = 0; i < mapper_lookup_table_length; i++) { From a4d5e3959ff26052be033b0b1b6e072d785ec068 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 18:37:12 -0700 Subject: [PATCH 37/43] UT cases for CSV formattting options --- c/cli/mlrcli.c | 5 +++-- c/output/lrec_writer_csv.c | 6 ++---- c/test/expected/out | 8 ++++++++ c/test/run | 1 + doc/content-for-feature-comparison.html | 2 +- doc/content-for-file-formats.html | 10 ++++------ doc/content-for-record-heterogeneity.html | 2 +- doc/content-for-to-do.html | 8 ++++---- doc/feature-comparison.html | 2 +- doc/file-formats.html | 10 ++++------ doc/record-heterogeneity.html | 2 +- doc/reference.html | 2 +- doc/to-do.html | 8 ++++---- 13 files changed, 35 insertions(+), 31 deletions(-) diff --git a/c/cli/mlrcli.c b/c/cli/mlrcli.c index e2d3642de..0025019aa 100644 --- a/c/cli/mlrcli.c +++ b/c/cli/mlrcli.c @@ -48,6 +48,7 @@ static lhmss_t* get_desc_to_chars_map() { if (pdesc_to_chars_map == NULL) { pdesc_to_chars_map = lhmss_alloc(); lhmss_put(pdesc_to_chars_map, "cr", "\r"); + lhmss_put(pdesc_to_chars_map, "crcr", "\r\r"); lhmss_put(pdesc_to_chars_map, "lf", "\n"); lhmss_put(pdesc_to_chars_map, "lflf", "\n\n"); lhmss_put(pdesc_to_chars_map, "crlf", "\r\n"); @@ -59,7 +60,7 @@ static lhmss_t* get_desc_to_chars_map() { lhmss_put(pdesc_to_chars_map, "pipe", "|"); lhmss_put(pdesc_to_chars_map, "slash", "/"); lhmss_put(pdesc_to_chars_map, "colon", ":"); - lhmss_put(pdesc_to_chars_map, "semicolon", "|"); + lhmss_put(pdesc_to_chars_map, "semicolon", ";"); lhmss_put(pdesc_to_chars_map, "equals", "="); } return pdesc_to_chars_map; @@ -134,7 +135,7 @@ static void main_usage(char* argv0, int exit_code) { fprintf(o, " %s", pe->key); } fprintf(o, "\n"); - fprintf(o, "Double-quoting for CSV:\n"); + fprintf(o, "Double-quoting for CSV output:\n"); fprintf(o, " --quote-all Wrap all fields in double quotes\n"); fprintf(o, " --quote-none Do not wrap any fields in double quotes, even if they have OFS or ORS in them\n"); fprintf(o, " --quote-minimal Wrap fields in double quotes only if they have OFS or ORS in them\n"); diff --git a/c/output/lrec_writer_csv.c b/c/output/lrec_writer_csv.c index c805907fb..61f163810 100644 --- a/c/output/lrec_writer_csv.c +++ b/c/output/lrec_writer_csv.c @@ -83,10 +83,8 @@ lrec_writer_t* lrec_writer_csv_alloc(char* ors, char* ofs, int oquoting) { lrec_writer_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_csv_state_t)); pstate->onr = 0; - //pstate->ors = ors; - //pstate->ofs = ofs; - pstate->ors = "\r\n"; // xxx temp - pstate->ofs = ","; // xxx temp + pstate->ors = ors; + pstate->ofs = ofs; pstate->orslen = strlen(pstate->ors); pstate->ofslen = strlen(pstate->ofs); diff --git a/c/test/expected/out b/c/test/expected/out index d67477712..1b9823777 100644 --- a/c/test/expected/out +++ b/c/test/expected/out @@ -2208,3 +2208,11 @@ a,b,c 4,5,6 x,y"yy,z +mlr --csv --ifs semicolon --ofs pipe --irs lf --ors lflf cut -x -f b ./test/input/rfc-csv/modify-defaults.csv +a|c + +1|3 + +4|6 + + diff --git a/c/test/run b/c/test/run index 9f7dd7b6f..53012e8c5 100755 --- a/c/test/run +++ b/c/test/run @@ -462,6 +462,7 @@ run_mlr --csv cat $indir/rfc-csv/quoted-comma-truncated.csv run_mlr --csv cat $indir/rfc-csv/quoted-crlf.csv run_mlr --csv cat $indir/rfc-csv/quoted-crlf-truncated.csv run_mlr --csv cat $indir/rfc-csv/simple-truncated.csv $indir/rfc-csv/simple.csv +run_mlr --csv --ifs semicolon --ofs pipe --irs lf --ors lflf cut -x -f b $indir/rfc-csv/modify-defaults.csv # ================================================================ # A key feature of this regression script is that it can be invoked from any diff --git a/doc/content-for-feature-comparison.html b/doc/content-for-feature-comparison.html index 7ab619426..8eecafa6e 100644 --- a/doc/content-for-feature-comparison.html +++ b/doc/content-for-feature-comparison.html @@ -2,7 +2,7 @@ POKI_PUT_TOC_HERE

File-format awareness

-Miller respects CSV headers. If you do mlr --csv-input cat *.csv then the header line is written once: +Miller respects CSV headers. If you do mlr --csv cat *.csv then the header line is written once:
diff --git a/doc/content-for-file-formats.html b/doc/content-for-file-formats.html index c039c8040..dacb910e5 100644 --- a/doc/content-for-file-formats.html +++ b/doc/content-for-file-formats.html @@ -9,14 +9,12 @@ changes of field names within a single data stream.

Miller has record separator RS and field separator FS, just as awk does. For TSV, use --fs tab; to convert TSV to -CSV, use --ifs tab --ofs , etc. (See also +CSV, use --ifs tab --ofs comma, etc. (See also POKI_PUT_LINK_FOR_PAGE(reference.html)HERE.) -

The --csvlite option supports programmable single-byte field and record separators, -e.g. you can do TSV. Meanwhile --csv supports RFC-4180 CSV ( -https://tools.ietf.org/html/rfc4180). -For more information about the current status of CSV support in Miller, please see -https://github.com/johnkerl/miller/releases/tag/v2.0.0. +

Miller’s --csv flag supports RFC-4180 CSV ( +https://tools.ietf.org/html/rfc4180). This includes CRLF line-terminators by default, regardless +of platform. You can use mlr --csv --rs lf for native Un*x (LF-terminated) CSV files.

Pretty-printed

Miller’s pretty-print format is like CSV, but column-aligned. For example, compare diff --git a/doc/content-for-record-heterogeneity.html b/doc/content-for-record-heterogeneity.html index 7f7534d05..d66321b7a 100644 --- a/doc/content-for-record-heterogeneity.html +++ b/doc/content-for-record-heterogeneity.html @@ -8,7 +8,7 @@ We think of CSV tables as rectangular: if there are 17 columns in the header the

CSV and pretty-print

-Miller simply prints a newline and a new header when there is a schema change. When there is no schema change, you get standard CSV as a special case. Likewise, Miller reads heterogeneous CSV or pretty-print input the same way. For example: +Miller simply prints a newline and a new header when there is a schema change. When there is no schema change, you get CSV per se as a special case. Likewise, Miller reads heterogeneous CSV or pretty-print input the same way. The difference between CSV and CSV-lite is that the former is RFC4180-compliant, while the latter readily handles heterogeneous data (which is non-compliant). For example:
POKI_RUN_COMMAND{{cat data/het.dkvp}}HERE diff --git a/doc/content-for-to-do.html b/doc/content-for-to-do.html index a64bce1fd..8fe1dd76d 100644 --- a/doc/content-for-to-do.html +++ b/doc/content-for-to-do.html @@ -6,11 +6,11 @@ announcment, by far the biggest asks were RFC-4180-compliant CSV, and packaging (Homebrew, .deb). -
  • Miller’s record, field, and pair separators can only be single +
  • Miller’s record, field, and pair separators can be single characters (e.g. newline, comma, equals sign), optionally allowing repeats on -input (e.g. multiple spaces treated as one). It would be nice if strings were -supported, e.g. "\n\n" paragraph-oriented record separation, or mix of -space and tab for field separation. +input (e.g. multiple spaces treated as one). Multi-character separator strings +(e.g. double-linefeed) are supported on input and output for CSV, and on output +for other formats. This is a work in progress.
  • String-oriented functions such as sub, and Miller’s filter, could be made far more powerful if a regular-expression diff --git a/doc/feature-comparison.html b/doc/feature-comparison.html index 0d4e412d2..2ac8223da 100644 --- a/doc/feature-comparison.html +++ b/doc/feature-comparison.html @@ -103,7 +103,7 @@ Miller commands were run with pretty-print-tabular output format.

    File-format awareness

    -Miller respects CSV headers. If you do mlr --csv-input cat *.csv then the header line is written once: +Miller respects CSV headers. If you do mlr --csv cat *.csv then the header line is written once:
    diff --git a/doc/file-formats.html b/doc/file-formats.html index 8aa725a16..33cc9c8af 100644 --- a/doc/file-formats.html +++ b/doc/file-formats.html @@ -112,14 +112,12 @@ changes of field names within a single data stream.

    Miller has record separator RS and field separator FS, just as awk does. For TSV, use --fs tab; to convert TSV to -CSV, use --ifs tab --ofs , etc. (See also +CSV, use --ifs tab --ofs comma, etc. (See also Reference.) -

    The --csvlite option supports programmable single-byte field and record separators, -e.g. you can do TSV. Meanwhile --csv supports RFC-4180 CSV ( -https://tools.ietf.org/html/rfc4180). -For more information about the current status of CSV support in Miller, please see -https://github.com/johnkerl/miller/releases/tag/v2.0.0. +

    Miller’s --csv flag supports RFC-4180 CSV ( +https://tools.ietf.org/html/rfc4180). This includes CRLF line-terminators by default, regardless +of platform. You can use mlr --csv --rs lf for native Un*x (LF-terminated) CSV files.

    Pretty-printed

    Miller’s pretty-print format is like CSV, but column-aligned. For example, compare diff --git a/doc/record-heterogeneity.html b/doc/record-heterogeneity.html index 60f121945..3910b2bf7 100644 --- a/doc/record-heterogeneity.html +++ b/doc/record-heterogeneity.html @@ -110,7 +110,7 @@ We think of CSV tables as rectangular: if there are 17 columns in the header the

    CSV and pretty-print

    -Miller simply prints a newline and a new header when there is a schema change. When there is no schema change, you get standard CSV as a special case. Likewise, Miller reads heterogeneous CSV or pretty-print input the same way. For example: +Miller simply prints a newline and a new header when there is a schema change. When there is no schema change, you get CSV per se as a special case. Likewise, Miller reads heterogeneous CSV or pretty-print input the same way. The difference between CSV and CSV-lite is that the former is RFC4180-compliant, while the latter readily handles heterogeneous data (which is non-compliant). For example:

    diff --git a/doc/reference.html b/doc/reference.html index f9e4910b7..56a820d7d 100644 --- a/doc/reference.html +++ b/doc/reference.html @@ -275,7 +275,7 @@ Other options: Output of one verb may be chained as input to another using "then", e.g. mlr stats1 -a min,mean,max -f flag,u,v -g color then sort -f color Please see http://johnkerl.org/miller/doc and/or http://github.com/johnkerl/miller for more information. -This is Miller version >= v2.1.1. +This is Miller version >= v2.1.4.

    diff --git a/doc/to-do.html b/doc/to-do.html index 3b874d3fd..40324e74f 100644 --- a/doc/to-do.html +++ b/doc/to-do.html @@ -101,11 +101,11 @@ Miller commands were run with pretty-print-tabular output format. announcment, by far the biggest asks were RFC-4180-compliant CSV, and packaging (Homebrew, .deb). -

  • Miller’s record, field, and pair separators can only be single +
  • Miller’s record, field, and pair separators can be single characters (e.g. newline, comma, equals sign), optionally allowing repeats on -input (e.g. multiple spaces treated as one). It would be nice if strings were -supported, e.g. "\n\n" paragraph-oriented record separation, or mix of -space and tab for field separation. +input (e.g. multiple spaces treated as one). Multi-character separator strings +(e.g. double-linefeed) are supported on input and output for CSV, and on output +for other formats. This is a work in progress.
  • String-oriented functions such as sub, and Miller’s filter, could be made far more powerful if a regular-expression From eac77bf0ecf5de3e97fd6d61d8f24233fe509ca4 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 18:39:40 -0700 Subject: [PATCH 38/43] UT cases for CSV formattting options --- doc/reference.html | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/doc/reference.html b/doc/reference.html index 56a820d7d..24993b55b 100644 --- a/doc/reference.html +++ b/doc/reference.html @@ -249,19 +249,22 @@ Data-format options, for input, output, or both: --xtab --ixtab --oxtab Pretty-printed vertical-tabular -p is a keystroke-saver for --nidx --fs space --repifs Separator options, for input, output, or both: - --rs --irs --ors Record separators, defaulting to newline - --fs --ifs --ofs --repifs Field separators, defaulting to "," - --ps --ips --ops Pair separators, defaulting to "=" - Notes (as of Miller v2.0.0): - * RS/FS/PS are used for DKVP, NIDX, and CSVLITE formats where they must be single-character. - * For CSV, PPRINT, and XTAB formats, RS/FS/PS command-line options are ignored. + --rs --irs --ors Record separators, e.g. newline + --fs --ifs --ofs --repifs Field separators, e.g. comma + --ps --ips --ops Pair separators, e.g. equals sign + Notes (as of Miller v2.1.4): + * IRS,IFS,IPS,ORS,OFS,OPS are specifiable for all file formats. + * IRS,IFS,IPS may be multi-character for CSV; they must be single-character for other formats. + The latter restriction will be lifted in a near-future release. + * ORS,OFS,OPS may be multi-character for all formats. * DKVP, NIDX, CSVLITE, PPRINT, and XTAB formats are intended to handle platform-native text data. - In particular, this means LF line-terminators on Linux/OSX. + In particular, this means LF line-terminators by default on Linux/OSX. * CSV is intended to handle RFC-4180-compliant data. - In particular, this means it *only* handles CRLF line-terminators. - * This will change in v2.1.0, at which point there will be a (default-off) LF-termination option - for CSV, multi-char RS/FS/PS, and double-quote support for DKVP. -Double-quoting for CSV: + In particular, this means it uses CRLF line-terminators by default. + So, you can use "--csv --rs lf" for Linux-native CSV files. + * You can use "--fs '|'", "--ips :", etc., or any of the following names for separators: + cr crcr lf lflf crlf crlfcrlf tab space comma newline pipe slash colon semicolon equals +Double-quoting for CSV output: --quote-all Wrap all fields in double quotes --quote-none Do not wrap any fields in double quotes, even if they have OFS or ORS in them --quote-minimal Wrap fields in double quotes only if they have OFS or ORS in them From 8c090d849114959f3bee08267ba77b99f8c5ed9d Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 18:40:22 -0700 Subject: [PATCH 39/43] UT cases for CSV formattting options --- c/test/input/rfc-csv/modify-defaults.csv | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 c/test/input/rfc-csv/modify-defaults.csv diff --git a/c/test/input/rfc-csv/modify-defaults.csv b/c/test/input/rfc-csv/modify-defaults.csv new file mode 100644 index 000000000..8278f303a --- /dev/null +++ b/c/test/input/rfc-csv/modify-defaults.csv @@ -0,0 +1,3 @@ +a;b;c +1;2;3 +4;;6 From 4baa7727ab1ba946f700a9e8a35b3c2d94962de8 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 18:40:28 -0700 Subject: [PATCH 40/43] UT cases for CSV formattting options --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index b97812fa5..3f9a2599a 100644 --- a/.gitignore +++ b/.gitignore @@ -33,5 +33,6 @@ c/dsls/filter_dsl_parse.h c/dsls/filter_dsl_parse.out c/dsls/pdm c/dsls/fdm +c/test/output c/output/out tags From 6879eecf9ef10fb6eeb1377dab142b740b18f6ca Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 18:41:34 -0700 Subject: [PATCH 41/43] UT cases for CSV formattting options --- c/todo.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/c/todo.txt b/c/todo.txt index ce71115e1..655f0ee32 100644 --- a/c/todo.txt +++ b/c/todo.txt @@ -6,11 +6,6 @@ BUGFIXES ================================================================ TOP OF LIST ----------------------------------------------------------------- -MEDIUM: incorp multi-char IXS for CSV --> functionally done; needs mlr -h, mlrdoc, and UT. --> then close it out - ---------------------------------------------------------------- MAJOR: autoconfig From 4be3cc29ed7d28bab5868929bc94b61fa011ad5f Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 18:46:16 -0700 Subject: [PATCH 42/43] neaten --- c/experimental/getlines.c | 1 - 1 file changed, 1 deletion(-) diff --git a/c/experimental/getlines.c b/c/experimental/getlines.c index 5f4cbe96b..c444613d0 100644 --- a/c/experimental/getlines.c +++ b/c/experimental/getlines.c @@ -402,7 +402,6 @@ int main(int argc, char** argv) { // ================================================================ // $ ./getl ../data/big.csv 5|tee x -./getl ../data/big.csv 5|tee x // $ mlr --opprint cat then sort -n t x // type t n type t n From 89df74a2c0734e1ed15241bf02dd65b17c74b5fa Mon Sep 17 00:00:00 2001 From: John Kerl Date: Sun, 13 Sep 2015 19:55:04 -0700 Subject: [PATCH 43/43] todo --- c/todo.txt | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/c/todo.txt b/c/todo.txt index 655f0ee32..ac33d8d43 100644 --- a/c/todo.txt +++ b/c/todo.txt @@ -11,7 +11,7 @@ MAJOR: autoconfig ---------------------------------------------------------------- MAJOR: multi-char separators for file formats other than CSV -k oxs done functionally +k oxs is functionally done * need backslash-handling/parsing ... at least, \r \n \t. and, into online help. ! ixs: o ips & ifs: needs *p==ixs with strneq(p, ixs); also double-null poke (sos&eos) @@ -19,6 +19,7 @@ k oxs done functionally o irs for stdio: it all comes down to getdelim. ! so focus on getline perf. ! maybe best idea is to re-impl getdelim with multichar irs. + - rework csv reader to look more like csvlite (which is performant)? ! temporary option is getdelim with final char of the multichar irs; strcmp backwards; usually get it right; occasionally have to strcat/memcpy multiple such. this is gross so don't do it unless multichar-getdelim doesn't pan out. @@ -36,11 +37,17 @@ MAJOR: csv mem-leak/read-perf but lrec_put w/ !LREC_FREE_VALUE for ptr-backed. * or *maybe* pbr retent/free-flags for string/mmap w/o denorm, but only if it's both elegant and fast +! experimental/getlines.c shows that even without the heap-fragging + issue, pfr+psb is 3.5x slower than getdelim. again suggesting + multi-char-terminated getdelim might be the best line of approach. ---------------------------------------------------------------- MINOR -? dkvp quoting ... maybe wait until after the mmap/perf split +* define dkvp, nidx, etc @ cover x 2 + +? dkvp quoting ... wait until after the mmap/perf split. else, very undesirable + performance regression. * go back and re-apply ctype/isprint portability things to new spots @@ -49,8 +56,7 @@ MINOR * dsls/ build outside of pwd? or just lemon $(absdir)/filenamegoeshere.y? * configure w/o autotools? likewise manpage. etc. multiple build levels. -* define dkvp, nidx, etc @ cover x 2 -* mlr faq page +b mlr faq page * --mmap @ mlr -h * ctype ff @ bld.out @@ -59,8 +65,6 @@ MINOR * -h vs. usage : stdout vs. stderr * pprint join? - - * header-length data mismatch et. al: file/line * make an updated dependency list, esp. in light of a2x et al.