diff --git a/.gitignore b/.gitignore index cf96d60c3..0ffcf4b7c 100644 --- a/.gitignore +++ b/.gitignore @@ -10,7 +10,7 @@ test-byte-readers test-peek-file-reader test-parse-trie test-lrec -test-maps-and-sets +test-multiple-containers test-join-bucket-keeper termcvt a.out @@ -35,6 +35,8 @@ c/dsls/filter_dsl_parse.h c/dsls/filter_dsl_parse.out c/dsls/pdm c/dsls/fdm +c/test/output +c/output/out tags *.la *.lo diff --git a/c/cli/argparse.c b/c/cli/argparse.c index 90b899579..76bc1beca 100644 --- a/c/cli/argparse.c +++ b/c/cli/argparse.c @@ -237,7 +237,7 @@ int main(int argc, char** argv) { if (plist == NULL) { printf("list is null\n"); } else { - char* out = slls_join(plist, ','); + char* out = slls_join(plist, ","); printf("list is %s\n", out); free(out); } diff --git a/c/cli/mlrcli.c b/c/cli/mlrcli.c index 21716a863..1692cb3d9 100644 --- a/c/cli/mlrcli.c +++ b/c/cli/mlrcli.c @@ -44,10 +44,37 @@ static mapper_setup_t* mapper_lookup_table[] = { static int mapper_lookup_table_length = sizeof(mapper_lookup_table) / sizeof(mapper_lookup_table[0]); // ---------------------------------------------------------------- -#define DEFAULT_RS '\n' -#define DEFAULT_FS ',' -#define DEFAULT_PS '=' +static lhmss_t* pdesc_to_chars_map = NULL; +static lhmss_t* get_desc_to_chars_map() { + if (pdesc_to_chars_map == NULL) { + pdesc_to_chars_map = lhmss_alloc(); + lhmss_put(pdesc_to_chars_map, "cr", "\r"); + lhmss_put(pdesc_to_chars_map, "crcr", "\r\r"); + lhmss_put(pdesc_to_chars_map, "lf", "\n"); + lhmss_put(pdesc_to_chars_map, "lflf", "\n\n"); + lhmss_put(pdesc_to_chars_map, "crlf", "\r\n"); + lhmss_put(pdesc_to_chars_map, "crlfcrlf", "\r\n\r\n"); + lhmss_put(pdesc_to_chars_map, "tab", "\t"); + lhmss_put(pdesc_to_chars_map, "space", " "); + lhmss_put(pdesc_to_chars_map, "comma", ","); + lhmss_put(pdesc_to_chars_map, "newline", "\n"); + lhmss_put(pdesc_to_chars_map, "pipe", "|"); + lhmss_put(pdesc_to_chars_map, "slash", "/"); + lhmss_put(pdesc_to_chars_map, "colon", ":"); + lhmss_put(pdesc_to_chars_map, "semicolon", ";"); + lhmss_put(pdesc_to_chars_map, "equals", "="); + } + return pdesc_to_chars_map; +} +static char* sep_from_arg(char* arg, char* argv0) { + char* chars = lhmss_get(get_desc_to_chars_map(), arg); + if (chars != NULL) + return chars; + else + return arg; +} +// ---------------------------------------------------------------- #define DEFAULT_OFMT "%lf" #define DEFAULT_OQUOTING QUOTE_MINIMAL @@ -89,19 +116,27 @@ static void main_usage(char* argv0, int exit_code) { fprintf(o, " --xtab --ixtab --oxtab Pretty-printed vertical-tabular\n"); fprintf(o, " -p is a keystroke-saver for --nidx --fs space --repifs\n"); fprintf(o, "Separator options, for input, output, or both:\n"); - fprintf(o, " --rs --irs --ors Record separators, defaulting to newline\n"); - fprintf(o, " --fs --ifs --ofs --repifs Field separators, defaulting to \"%c\"\n", DEFAULT_FS); - fprintf(o, " --ps --ips --ops Pair separators, defaulting to \"%c\"\n", DEFAULT_PS); - fprintf(o, " Notes (as of Miller v2.0.0):\n"); - fprintf(o, " * RS/FS/PS are used for DKVP, NIDX, and CSVLITE formats where they must be single-character.\n"); - fprintf(o, " * For CSV, PPRINT, and XTAB formats, RS/FS/PS command-line options are ignored.\n"); + fprintf(o, " --rs --irs --ors Record separators, e.g. newline\n"); + fprintf(o, " --fs --ifs --ofs --repifs Field separators, e.g. comma\n"); + fprintf(o, " --ps --ips --ops Pair separators, e.g. equals sign\n"); + fprintf(o, " Notes (as of Miller v2.1.4):\n"); + fprintf(o, " * IRS,IFS,IPS,ORS,OFS,OPS are specifiable for all file formats.\n"); + fprintf(o, " * IRS,IFS,IPS may be multi-character for CSV; they must be single-character for other formats.\n"); + fprintf(o, " The latter restriction will be lifted in a near-future release.\n"); + fprintf(o, " * ORS,OFS,OPS may be multi-character for all formats.\n"); fprintf(o, " * DKVP, NIDX, CSVLITE, PPRINT, and XTAB formats are intended to handle platform-native text data.\n"); - fprintf(o, " In particular, this means LF line-terminators on Linux/OSX.\n"); + fprintf(o, " In particular, this means LF line-terminators by default on Linux/OSX.\n"); fprintf(o, " * CSV is intended to handle RFC-4180-compliant data.\n"); - fprintf(o, " In particular, this means it *only* handles CRLF line-terminators.\n"); - fprintf(o, " * This will change in v2.1.0, at which point there will be a (default-off) LF-termination option\n"); - fprintf(o, " for CSV, multi-char RS/FS/PS, and double-quote support for DKVP.\n"); - fprintf(o, "Double-quoting for CSV:\n"); + fprintf(o, " In particular, this means it uses CRLF line-terminators by default.\n"); + fprintf(o, " So, you can use \"--csv --rs lf\" for Linux-native CSV files.\n"); + fprintf(o, " * You can use \"--fs '|'\", \"--ips :\", etc., or any of the following names for separators:\n"); + fprintf(o, " "); + lhmss_t* pmap = get_desc_to_chars_map(); + for (lhmsse_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) { + fprintf(o, " %s", pe->key); + } + fprintf(o, "\n"); + fprintf(o, "Double-quoting for CSV output:\n"); fprintf(o, " --quote-all Wrap all fields in double quotes\n"); fprintf(o, " --quote-none Do not wrap any fields in double quotes, even if they have OFS or ORS in them\n"); fprintf(o, " --quote-minimal Wrap fields in double quotes only if they have OFS or ORS in them\n"); @@ -144,30 +179,6 @@ static void check_arg_count(char** argv, int argi, int argc, int n) { } } -static char sep_from_arg(char* arg, char* argv0) { - if (streq(arg, "tab")) - return '\t'; - if (streq(arg, "space")) - return ' '; - if (streq(arg, "comma")) - return ','; - if (streq(arg, "newline")) - return '\n'; - if (streq(arg, "pipe")) - return '|'; - if (streq(arg, "slash")) - return '/'; - if (streq(arg, "colon")) - return ':'; - if (streq(arg, "semicolon")) - return '|'; - if (streq(arg, "equals")) - return '='; - if (strlen(arg) != 1) - main_usage(argv0, 1); - return arg[0]; -} - static mapper_setup_t* look_up_mapper_setup(char* verb) { mapper_setup_t* pmapper_setup = NULL; for (int i = 0; i < mapper_lookup_table_length; i++) { @@ -183,15 +194,40 @@ cli_opts_t* parse_command_line(int argc, char** argv) { cli_opts_t* popts = mlr_malloc_or_die(sizeof(cli_opts_t)); memset(popts, 0, sizeof(*popts)); - popts->irs = DEFAULT_RS; - popts->ifs = DEFAULT_FS; - popts->ips = DEFAULT_PS; + // xxx integrate these with DEFAULT_XS ... + lhmss_t* default_rses = lhmss_alloc(); + lhmss_put(default_rses, "dkvp", "\n"); + lhmss_put(default_rses, "csv", "\r\n"); + lhmss_put(default_rses, "csvlite", "\n"); + lhmss_put(default_rses, "nidx", "\n"); + lhmss_put(default_rses, "xtab", "\n"); + lhmss_put(default_rses, "pprint", "\n"); + + lhmss_t* default_fses = lhmss_alloc(); + lhmss_put(default_fses, "dkvp", ","); + lhmss_put(default_fses, "csv", ","); + lhmss_put(default_fses, "csvlite", ","); + lhmss_put(default_fses, "nidx", ","); // xxx update to space at version bump + lhmss_put(default_fses, "xtab", " "); + lhmss_put(default_fses, "pprint", " "); + + lhmss_t* default_pses = lhmss_alloc(); + lhmss_put(default_pses, "dkvp", "="); + lhmss_put(default_pses, "csv", "X"); + lhmss_put(default_pses, "csvlite", "X"); + lhmss_put(default_pses, "nidx", "X"); + lhmss_put(default_pses, "xtab", "X"); + lhmss_put(default_pses, "pprint", "X"); + + popts->irs = NULL; + popts->ifs = NULL; + popts->ips = NULL; popts->allow_repeat_ifs = FALSE; popts->allow_repeat_ips = FALSE; - popts->ors = DEFAULT_RS; - popts->ofs = DEFAULT_FS; - popts->ops = DEFAULT_PS; + popts->ors = NULL; + popts->ofs = NULL; + popts->ops = NULL; popts->ofmt = DEFAULT_OFMT; popts->oquoting = DEFAULT_OQUOTING; @@ -199,8 +235,8 @@ cli_opts_t* parse_command_line(int argc, char** argv) { popts->plrec_writer = NULL; popts->filenames = NULL; - popts->ifmt = "dkvp"; - char* ofmt = "dkvp"; + popts->ifile_fmt = "dkvp"; + popts->ofile_fmt = "dkvp"; popts->use_mmap_for_read = TRUE; int left_align_pprint = TRUE; @@ -232,7 +268,8 @@ cli_opts_t* parse_command_line(int argc, char** argv) { else if (streq(argv[argi], "--rs")) { check_arg_count(argv, argi, argc, 2); - popts->ors = popts->irs = sep_from_arg(argv[argi+1], argv[0]); + popts->ors = sep_from_arg(argv[argi+1], argv[0]); + popts->irs = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--irs")) { @@ -248,7 +285,8 @@ cli_opts_t* parse_command_line(int argc, char** argv) { else if (streq(argv[argi], "--fs")) { check_arg_count(argv, argi, argc, 2); - popts->ofs = popts->ifs = sep_from_arg(argv[argi+1], argv[0]); + popts->ofs = sep_from_arg(argv[argi+1], argv[0]); + popts->ifs = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--ifs")) { @@ -266,16 +304,17 @@ cli_opts_t* parse_command_line(int argc, char** argv) { } else if (streq(argv[argi], "-p")) { - popts->ifmt = "nidx"; - ofmt = "nidx"; - popts->ifs = ' '; - popts->ofs = ' '; + popts->ifile_fmt = "nidx"; + popts->ofile_fmt = "nidx"; + popts->ifs = " "; + popts->ofs = " "; popts->allow_repeat_ifs = TRUE; } else if (streq(argv[argi], "--ps")) { check_arg_count(argv, argi, argc, 2); - popts->ops = popts->ips = sep_from_arg(argv[argi+1], argv[0]); + popts->ops = sep_from_arg(argv[argi+1], argv[0]); + popts->ips = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--ips")) { @@ -289,40 +328,40 @@ cli_opts_t* parse_command_line(int argc, char** argv) { argi++; } - else if (streq(argv[argi], "--csv")) { popts->ifmt = ofmt = "csv"; } - else if (streq(argv[argi], "--icsv")) { popts->ifmt = "csv"; } - else if (streq(argv[argi], "--ocsv")) { ofmt = "csv"; } + else if (streq(argv[argi], "--csv")) { popts->ifile_fmt = popts->ofile_fmt = "csv"; } + else if (streq(argv[argi], "--icsv")) { popts->ifile_fmt = "csv"; } + else if (streq(argv[argi], "--ocsv")) { popts->ofile_fmt = "csv"; } - else if (streq(argv[argi], "--csvlite")) { popts->ifmt = ofmt = "csvlite"; } - else if (streq(argv[argi], "--icsvlite")) { popts->ifmt = "csvlite"; } - else if (streq(argv[argi], "--ocsvlite")) { ofmt = "csvlite"; } + else if (streq(argv[argi], "--csvlite")) { popts->ifile_fmt = popts->ofile_fmt = "csvlite"; } + else if (streq(argv[argi], "--icsvlite")) { popts->ifile_fmt = "csvlite"; } + else if (streq(argv[argi], "--ocsvlite")) { popts->ofile_fmt = "csvlite"; } - else if (streq(argv[argi], "--dkvp")) { popts->ifmt = ofmt = "dkvp"; } - else if (streq(argv[argi], "--idkvp")) { popts->ifmt = "dkvp"; } - else if (streq(argv[argi], "--odkvp")) { ofmt = "dkvp"; } + else if (streq(argv[argi], "--dkvp")) { popts->ifile_fmt = popts->ofile_fmt = "dkvp"; } + else if (streq(argv[argi], "--idkvp")) { popts->ifile_fmt = "dkvp"; } + else if (streq(argv[argi], "--odkvp")) { popts->ofile_fmt = "dkvp"; } - else if (streq(argv[argi], "--nidx")) { popts->ifmt = ofmt = "nidx"; } - else if (streq(argv[argi], "--inidx")) { popts->ifmt = "nidx"; } - else if (streq(argv[argi], "--onidx")) { ofmt = "nidx"; } + else if (streq(argv[argi], "--nidx")) { popts->ifile_fmt = popts->ofile_fmt = "nidx"; } + else if (streq(argv[argi], "--inidx")) { popts->ifile_fmt = "nidx"; } + else if (streq(argv[argi], "--onidx")) { popts->ofile_fmt = "nidx"; } - else if (streq(argv[argi], "--xtab")) { popts->ifmt = ofmt = "xtab"; } - else if (streq(argv[argi], "--ixtab")) { popts->ifmt = "xtab"; } - else if (streq(argv[argi], "--oxtab")) { ofmt = "xtab"; } + else if (streq(argv[argi], "--xtab")) { popts->ifile_fmt = popts->ofile_fmt = "xtab"; } + else if (streq(argv[argi], "--ixtab")) { popts->ifile_fmt = "xtab"; } + else if (streq(argv[argi], "--oxtab")) { popts->ofile_fmt = "xtab"; } else if (streq(argv[argi], "--ipprint")) { - popts->ifmt = "csvlite"; - popts->ifs = ' '; + popts->ifile_fmt = "csvlite"; + popts->ifs = " "; popts->allow_repeat_ifs = TRUE; } else if (streq(argv[argi], "--opprint")) { - ofmt = "pprint"; + popts->ofile_fmt = "pprint"; } else if (streq(argv[argi], "--pprint")) { - popts->ifmt = "csvlite"; - popts->ifs = ' '; + popts->ifile_fmt = "csvlite"; + popts->ifs = " "; popts->allow_repeat_ifs = TRUE; - ofmt = "pprint"; + popts->ofile_fmt = "pprint"; } else if (streq(argv[argi], "--right")) { left_align_pprint = FALSE; @@ -330,7 +369,7 @@ cli_opts_t* parse_command_line(int argc, char** argv) { else if (streq(argv[argi], "--ofmt")) { check_arg_count(argv, argi, argc, 2); - popts->ofmt = argv[argi+1]; + popts->ofile_fmt = argv[argi+1]; argi++; } @@ -362,12 +401,52 @@ cli_opts_t* parse_command_line(int argc, char** argv) { nusage(argv[0], argv[argi]); } - if (streq(ofmt, "dkvp")) popts->plrec_writer = lrec_writer_dkvp_alloc(popts->ors, popts->ofs, popts->ops); - else if (streq(ofmt, "csv")) popts->plrec_writer = lrec_writer_csv_alloc(popts->ors, popts->ofs, popts->oquoting); - else if (streq(ofmt, "csvlite")) popts->plrec_writer = lrec_writer_csvlite_alloc(popts->ors, popts->ofs); - else if (streq(ofmt, "nidx")) popts->plrec_writer = lrec_writer_nidx_alloc(popts->ors, popts->ofs); - else if (streq(ofmt, "xtab")) popts->plrec_writer = lrec_writer_xtab_alloc(); - else if (streq(ofmt, "pprint")) popts->plrec_writer = lrec_writer_pprint_alloc(left_align_pprint); + if (popts->irs == NULL) + popts->irs = lhmss_get(default_rses, popts->ifile_fmt); + if (popts->ifs == NULL) + popts->ifs = lhmss_get(default_fses, popts->ifile_fmt); + if (popts->ips == NULL) + popts->ips = lhmss_get(default_pses, popts->ifile_fmt); + + if (popts->ors == NULL) + popts->ors = lhmss_get(default_rses, popts->ofile_fmt); + if (popts->ofs == NULL) + popts->ofs = lhmss_get(default_fses, popts->ofile_fmt); + if (popts->ops == NULL) + popts->ops = lhmss_get(default_pses, popts->ofile_fmt); + + if (popts->irs == NULL) { + fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); + exit(1); + } + if (popts->ifs == NULL) { + fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); + exit(1); + } + if (popts->ips == NULL) { + fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); + exit(1); + } + + if (popts->ors == NULL) { + fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); + exit(1); + } + if (popts->ofs == NULL) { + fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); + exit(1); + } + if (popts->ops == NULL) { + fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); + exit(1); + } + + if (streq(popts->ofile_fmt, "dkvp")) popts->plrec_writer = lrec_writer_dkvp_alloc(popts->ors, popts->ofs, popts->ops); + else if (streq(popts->ofile_fmt, "csv")) popts->plrec_writer = lrec_writer_csv_alloc(popts->ors, popts->ofs, popts->oquoting); + else if (streq(popts->ofile_fmt, "csvlite")) popts->plrec_writer = lrec_writer_csvlite_alloc(popts->ors, popts->ofs); + else if (streq(popts->ofile_fmt, "nidx")) popts->plrec_writer = lrec_writer_nidx_alloc(popts->ors, popts->ofs); + else if (streq(popts->ofile_fmt, "xtab")) popts->plrec_writer = lrec_writer_xtab_alloc(popts->ors, popts->ofs); + else if (streq(popts->ofile_fmt, "pprint")) popts->plrec_writer = lrec_writer_pprint_alloc(popts->ors, popts->ofs, left_align_pprint); else { main_usage(argv[0], 1); } @@ -414,7 +493,7 @@ cli_opts_t* parse_command_line(int argc, char** argv) { if (argi == argc) popts->use_mmap_for_read = FALSE; - popts->plrec_reader = lrec_reader_alloc(popts->ifmt, popts->use_mmap_for_read, + popts->plrec_reader = lrec_reader_alloc(popts->ifile_fmt, popts->use_mmap_for_read, popts->irs, popts->ifs, popts->allow_repeat_ifs, popts->ips, popts->allow_repeat_ips); if (popts->plrec_reader == NULL) main_usage(argv[0], 1); diff --git a/c/cli/mlrcli.h b/c/cli/mlrcli.h index 63477d3a6..6f3bb9b95 100644 --- a/c/cli/mlrcli.h +++ b/c/cli/mlrcli.h @@ -17,17 +17,18 @@ #define QUOTE_NUMERIC 0xb4 typedef struct _cli_opts_t { - char irs; - char ifs; - char ips; + char* irs; + char* ifs; + char* ips; int allow_repeat_ifs; int allow_repeat_ips; int use_mmap_for_read; - char* ifmt; + char* ifile_fmt; + char* ofile_fmt; - char ors; - char ofs; - char ops; + char* ors; + char* ofs; + char* ops; char* ofmt; int oquoting; diff --git a/c/containers/dheap.c b/c/containers/dheap.c index ea46bdff0..74916ae87 100644 --- a/c/containers/dheap.c +++ b/c/containers/dheap.c @@ -120,20 +120,20 @@ void dheap_print(dheap_t *pdheap) // 4 5 6 7 // 8 9 10 11 12 13 14 15 -static void dheap_check_aux(dheap_t *pdheap, int i, char *file, int line) +static int dheap_check_aux(dheap_t *pdheap, int i, char *file, int line) { int n = pdheap->n; double *pe = pdheap->elements; if (i >= n) - return; + return TRUE; int li = dheap_left_child_index (i, pdheap->n); int ri = dheap_right_child_index(i, pdheap->n); if (li != -1) { if (pe[i] < pe[li]) { fprintf(stderr, "dheap check fail %s:%d pe[%d]=%lf < pe[%d]=%lf\n", file, line, i, pe[i], li, pe[li]); - exit(1); + return FALSE; } dheap_check_aux(pdheap, li, file, line); } @@ -141,15 +141,16 @@ static void dheap_check_aux(dheap_t *pdheap, int i, char *file, int line) if (pe[i] < pe[ri]) { fprintf(stderr, "dheap check fail %s:%d pe[%d]=%lf < pe[%d]=%lf\n", file, line, i, pe[i], ri, pe[ri]); - exit(1); + return FALSE; } dheap_check_aux(pdheap, ri, file, line); } + return TRUE; } -void dheap_check(dheap_t *pdheap, char *file, int line) +int dheap_check(dheap_t *pdheap, char *file, int line) { - dheap_check_aux(pdheap, 1, file, line); + return dheap_check_aux(pdheap, 1, file, line); } // ---------------------------------------------------------------- diff --git a/c/containers/dheap.h b/c/containers/dheap.h index 94a165034..e1533f1f6 100644 --- a/c/containers/dheap.h +++ b/c/containers/dheap.h @@ -19,10 +19,12 @@ dheap_t *dheap_alloc(); dheap_t *dheap_from_array(double *array, int n); void dheap_free(dheap_t *pheap); -void dheap_print(dheap_t *pdheap); -void dheap_check(dheap_t *pdheap, char *file, int line); - void dheap_add(dheap_t *pdheap, double v); double dheap_remove(dheap_t *pdheap); +// For debug +void dheap_print(dheap_t *pdheap); +// For unit test +int dheap_check(dheap_t *pdheap, char *file, int line); + #endif // DHEAP_H diff --git a/c/containers/hss.c b/c/containers/hss.c index 44af59745..19f311f16 100644 --- a/c/containers/hss.c +++ b/c/containers/hss.c @@ -231,7 +231,7 @@ static char* get_state_name(int state) { } } -void hss_dump(hss_t* pset) { +void hss_print(hss_t* pset) { for (int index = 0; index < pset->array_length; index++) { hsse_t* pe = &pset->array[index]; diff --git a/c/containers/join_bucket_keeper.c b/c/containers/join_bucket_keeper.c index aaff9e232..42d1e7277 100644 --- a/c/containers/join_bucket_keeper.c +++ b/c/containers/join_bucket_keeper.c @@ -47,10 +47,10 @@ join_bucket_keeper_t* join_bucket_keeper_alloc( char* left_file_name, char* input_file_format, int use_mmap_for_read, - char irs, - char ifs, + char* irs, + char* ifs, int allow_repeat_ifs, - char ips, + char* ips, int allow_repeat_ips, slls_t* pleft_field_names ) { diff --git a/c/containers/join_bucket_keeper.h b/c/containers/join_bucket_keeper.h index b2df8fcbb..0b249e482 100644 --- a/c/containers/join_bucket_keeper.h +++ b/c/containers/join_bucket_keeper.h @@ -35,10 +35,10 @@ join_bucket_keeper_t* join_bucket_keeper_alloc( char* left_file_name, char* input_file_format, int use_mmap_for_read, - char irs, - char ifs, + char* irs, + char* ifs, int allow_repeat_ifs, - char ips, + char* ips, int allow_repeat_ips, slls_t* pleft_field_names); diff --git a/c/containers/lhms2v.c b/c/containers/lhms2v.c index 5df240de6..8ee9b2162 100644 --- a/c/containers/lhms2v.c +++ b/c/containers/lhms2v.c @@ -309,7 +309,7 @@ static char* get_state_name(int state) { } } -void lhms2v_dump(lhms2v_t* pmap) { +void lhms2v_print(lhms2v_t* pmap) { for (int index = 0; index < pmap->array_length; index++) { lhms2ve_t* pe = &pmap->entries[index]; diff --git a/c/containers/lhmsi.c b/c/containers/lhmsi.c index 3553266b7..c7e390e16 100644 --- a/c/containers/lhmsi.c +++ b/c/containers/lhmsi.c @@ -325,7 +325,7 @@ static char* get_state_name(int state) { } } -void lhmsi_dump(lhmsi_t* pmap) { +void lhmsi_print(lhmsi_t* pmap) { for (int index = 0; index < pmap->array_length; index++) { lhmsie_t* pe = &pmap->entries[index]; diff --git a/c/containers/lhmslv.c b/c/containers/lhmslv.c index 77d43c3aa..aa4e7d6b3 100644 --- a/c/containers/lhmslv.c +++ b/c/containers/lhmslv.c @@ -246,6 +246,7 @@ void* lhmslv_remove(lhmslv_t* pmap, slls_t* key) { void lhmslv_clear(lhmslv_t* pmap) { for (int i = 0; i < pmap->array_length; i++) { lhmslve_clear(&pmap->entries[i]); + pmap->states[i] = EMPTY; } pmap->num_occupied = 0; pmap->num_freed = 0; @@ -271,7 +272,7 @@ static void lhmslv_enlarge(lhmslv_t* pmap) { } // ---------------------------------------------------------------- -void lhmslv_check_counts(lhmslv_t* pmap) { +int lhmslv_check_counts(lhmslv_t* pmap) { int nocc = 0; int ndel = 0; for (int index = 0; index < pmap->array_length; index++) { @@ -284,14 +285,15 @@ void lhmslv_check_counts(lhmslv_t* pmap) { fprintf(stderr, "occupancy-count mismatch: actual %d != cached %d\n", nocc, pmap->num_occupied); - exit(1); + return FALSE; } if (ndel != pmap->num_freed) { fprintf(stderr, "freed-count mismatch: actual %d != cached %d\n", ndel, pmap->num_freed); - exit(1); + return FALSE; } + return TRUE; } // ---------------------------------------------------------------- @@ -304,13 +306,13 @@ static char* get_state_name(int state) { } } -void lhmslv_dump(lhmslv_t* pmap) { +void lhmslv_print(lhmslv_t* pmap) { for (int index = 0; index < pmap->array_length; index++) { lhmslve_t* pe = &pmap->entries[index]; const char* key_string = (pe == NULL) ? "none" : pe->key == NULL ? "null" : - slls_join(pe->key, ','); + slls_join(pe->key, ","); const char* value_string = (pe == NULL) ? "none" : pe->pvvalue == NULL ? "null" : pe->pvvalue; @@ -325,7 +327,7 @@ void lhmslv_dump(lhmslv_t* pmap) { for (lhmslve_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) { const char* key_string = (pe == NULL) ? "none" : pe->key == NULL ? "null" : - slls_join(pe->key, ','); + slls_join(pe->key, ","); const char* value_string = (pe == NULL) ? "none" : pe->pvvalue == NULL ? "null" : pe->pvvalue; diff --git a/c/containers/lhmslv.h b/c/containers/lhmslv.h index 91a3a6a09..eb7d43c26 100644 --- a/c/containers/lhmslv.h +++ b/c/containers/lhmslv.h @@ -49,4 +49,7 @@ void* lhmslv_remove(lhmslv_t* pmap, slls_t* key); void lhmslv_clear(lhmslv_t* pmap); int lhmslv_size(lhmslv_t* pmap); +// Unit-test hook +int lhmslv_check_counts(lhmslv_t* pmap); + #endif // LHMSLV_H diff --git a/c/containers/lhmss.c b/c/containers/lhmss.c index cfe30a489..9a732c8b3 100644 --- a/c/containers/lhmss.c +++ b/c/containers/lhmss.c @@ -257,30 +257,6 @@ static void lhmss_enlarge(lhmss_t* pmap) { free(old_states); } -// ---------------------------------------------------------------- -void lhmss_check_counts(lhmss_t* pmap) { - int nocc = 0; - int ndel = 0; - for (int index = 0; index < pmap->array_length; index++) { - if (pmap->states[index] == OCCUPIED) - nocc++; - else if (pmap->states[index] == DELETED) - ndel++; - } - if (nocc != pmap->num_occupied) { - fprintf(stderr, - "occupancy-count mismatch: actual %d != cached %d.\n", - nocc, pmap->num_occupied); - exit(1); - } - if (ndel != pmap->num_freed) { - fprintf(stderr, - "freed-count mismatch: actual %d != cached %d.\n", - ndel, pmap->num_freed); - exit(1); - } -} - // ---------------------------------------------------------------- static char* get_state_name(int state) { switch(state) { @@ -291,7 +267,7 @@ static char* get_state_name(int state) { } } -void lhmss_dump(lhmss_t* pmap) { +void lhmss_print(lhmss_t* pmap) { for (int index = 0; index < pmap->array_length; index++) { lhmsse_t* pe = &pmap->entries[index]; @@ -322,3 +298,28 @@ void lhmss_dump(lhmss_t* pmap) { pe->ideal_index, key_string, value_string); } } + +// ---------------------------------------------------------------- +int lhmss_check_counts(lhmss_t* pmap) { + int nocc = 0; + int ndel = 0; + for (int index = 0; index < pmap->array_length; index++) { + if (pmap->states[index] == OCCUPIED) + nocc++; + else if (pmap->states[index] == DELETED) + ndel++; + } + if (nocc != pmap->num_occupied) { + fprintf(stderr, + "occupancy-count mismatch: actual %d != cached %d.\n", + nocc, pmap->num_occupied); + return FALSE; + } + if (ndel != pmap->num_freed) { + fprintf(stderr, + "deleted-count mismatch: actual %d != cached %d.\n", + ndel, pmap->num_freed); + return FALSE; + } + return TRUE; +} diff --git a/c/containers/lhmss.h b/c/containers/lhmss.h index 0cd5bd43f..131e02c5f 100644 --- a/c/containers/lhmss.h +++ b/c/containers/lhmss.h @@ -49,4 +49,9 @@ int lhmss_has_key(lhmss_t* pmap, char* key); void lhmss_remove(lhmss_t* pmap, char* key); void lhmss_rename(lhmss_t* pmap, char* old_key, char* new_key); +void lhmss_print(lhmss_t* pmap); + +// Unit-test hook +int lhmss_check_counts(lhmss_t* pmap); + #endif // LHMSS_H diff --git a/c/containers/lhmsv.c b/c/containers/lhmsv.c index 37fe1dcf0..eeb49c052 100644 --- a/c/containers/lhmsv.c +++ b/c/containers/lhmsv.c @@ -245,7 +245,7 @@ static void lhmsv_enlarge(lhmsv_t* pmap) { } // ---------------------------------------------------------------- -void lhmsv_check_counts(lhmsv_t* pmap) { +int lhmsv_check_counts(lhmsv_t* pmap) { int nocc = 0; int ndel = 0; for (int index = 0; index < pmap->array_length; index++) { @@ -258,14 +258,15 @@ void lhmsv_check_counts(lhmsv_t* pmap) { fprintf(stderr, "occupancy-count mismatch: actual %d != cached %d.\n", nocc, pmap->num_occupied); - exit(1); + return FALSE; } if (ndel != pmap->num_freed) { fprintf(stderr, "deleted-count mismatch: actual %d != cached %d.\n", ndel, pmap->num_freed); - exit(1); + return FALSE; } + return TRUE; } // ---------------------------------------------------------------- @@ -278,7 +279,7 @@ static char* get_state_name(int state) { } } -void lhmsv_dump(lhmsv_t* pmap) { +void lhmsv_print(lhmsv_t* pmap) { for (int index = 0; index < pmap->array_length; index++) { lhmsve_t* pe = &pmap->entries[index]; diff --git a/c/containers/lhmsv.h b/c/containers/lhmsv.h index 3a1a6355f..273ecbe74 100644 --- a/c/containers/lhmsv.h +++ b/c/containers/lhmsv.h @@ -47,4 +47,7 @@ void* lhmsv_get(lhmsv_t* pmap, char* key); int lhmsv_has_key(lhmsv_t* pmap, char* key); void lhmsv_remove(lhmsv_t* pmap, char* key); +// Unit-test hook +int lhmsv_check_counts(lhmsv_t* pmap); + #endif // LHMSV_H diff --git a/c/containers/lrec.c b/c/containers/lrec.c index f779a68c0..f986546ce 100644 --- a/c/containers/lrec.c +++ b/c/containers/lrec.c @@ -428,17 +428,17 @@ lrec_t* lrec_literal_4(char* k1, char* v1, char* k2, char* v2, char* k3, char* v void lrec_print(lrec_t* prec) { FILE* output_stream = stdout; - char rs = '\n'; - char fs = ','; - char ps = '='; + char ors = '\n'; + char ofs = ','; + char ops = '='; int nf = 0; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { if (nf > 0) - fputc(fs, output_stream); + fputc(ofs, output_stream); fputs(pe->key, output_stream); - fputc(ps, output_stream); + fputc(ops, output_stream); fputs(pe->value, output_stream); nf++; } - fputc(rs, output_stream); + fputc(ors, output_stream); } diff --git a/c/containers/lrec.h b/c/containers/lrec.h index b55d7de53..60b7575c6 100644 --- a/c/containers/lrec.h +++ b/c/containers/lrec.h @@ -111,6 +111,7 @@ void lrec_move_to_tail(lrec_t* prec, char* key); void lrec_free(lrec_t* prec); +void lrec_print(lrec_t* prec); void lrec_dump(lrec_t* prec); void lrec_dump_titled(char* msg, lrec_t* prec); @@ -123,6 +124,4 @@ lrec_t* lrec_literal_2(char* k1, char* v1, char* k2, char* v2); lrec_t* lrec_literal_3(char* k1, char* v1, char* k2, char* v2, char* k3, char* v3); lrec_t* lrec_literal_4(char* k1, char* v1, char* k2, char* v2, char* k3, char* v3, char* k4, char* v4); -void lrec_print(lrec_t* prec); - #endif // LREC_H diff --git a/c/containers/percentile_keeper.c b/c/containers/percentile_keeper.c index a4319a7d4..e7af55c81 100644 --- a/c/containers/percentile_keeper.c +++ b/c/containers/percentile_keeper.c @@ -65,3 +65,11 @@ double percentile_keeper_emit(percentile_keeper_t* ppercentile_keeper, double pe } return ppercentile_keeper->data[compute_index(ppercentile_keeper->size, percentile)]; } + +// ---------------------------------------------------------------- +void percentile_keeper_print(percentile_keeper_t* ppercentile_keeper) { + printf("percentile_keeper dump:\n"); + for (int i = 0; i < ppercentile_keeper->size; i++) + printf("[%02d] %.8lf\n", i, ppercentile_keeper->data[i]); +} + diff --git a/c/containers/percentile_keeper.h b/c/containers/percentile_keeper.h index f737c5adc..dcfba6307 100644 --- a/c/containers/percentile_keeper.h +++ b/c/containers/percentile_keeper.h @@ -18,4 +18,7 @@ void percentile_keeper_free(percentile_keeper_t* ppercentile_keeper); void percentile_keeper_ingest(percentile_keeper_t* ppercentile_keeper, double value); double percentile_keeper_emit(percentile_keeper_t* ppercentile_keeper, double percentile); +// For debug/test +void percentile_keeper_print(percentile_keeper_t* ppercentile_keeper); + #endif // PERCENTILE_KEEPER_H diff --git a/c/containers/slls.c b/c/containers/slls.c index b1935097a..9cafa77e8 100644 --- a/c/containers/slls.c +++ b/c/containers/slls.c @@ -118,17 +118,16 @@ slls_t* slls_from_line(char* line, char ifs, int allow_repeat_ifs) { // ---------------------------------------------------------------- // xxx cmt for debug. inefficient. or fix that. // xxx rename to slls_alloc_join -char* slls_join(slls_t* plist, char fs) { +char* slls_join(slls_t* plist, char* ofs) { int len = 0; for (sllse_t* pe = plist->phead; pe != NULL; pe = pe->pnext) - len += strlen(pe->value) + 1; // include space for fs and null-terminator + len += strlen(pe->value) + 1; // include space for ofs and null-terminator char* output = mlr_malloc_or_die(len); - char sep[2] = {fs, 0}; *output = 0; for (sllse_t* pe = plist->phead; pe != NULL; pe = pe->pnext) { strcat(output, pe->value); if (pe->pnext != NULL) { - strcat(output, sep); + strcat(output, ofs); } } diff --git a/c/containers/slls.h b/c/containers/slls.h index a3f1ddf61..b13ce4743 100644 --- a/c/containers/slls.h +++ b/c/containers/slls.h @@ -38,7 +38,7 @@ int slls_compare_lexically(slls_t* pa, slls_t* pb); void slls_sort(slls_t* plist); // Debug routines: -char* slls_join(slls_t* plist, char fs); +char* slls_join(slls_t* plist, char* ofs); void slls_print(slls_t* plist); #endif // SLLS_H diff --git a/c/containers/test_join_bucket_keeper.c b/c/containers/test_join_bucket_keeper.c index ae49d376f..19838d4b8 100644 --- a/c/containers/test_join_bucket_keeper.c +++ b/c/containers/test_join_bucket_keeper.c @@ -427,6 +427,7 @@ static char * run_all_tests() { } int main(int argc, char **argv) { + printf("TEST_JOIN_BUCKET_KEEPER ENTER\n"); if ((argc == 2) && streq(argv[1], "-v")) tjbk_verbose = TRUE; diff --git a/c/containers/test_lrec.c b/c/containers/test_lrec.c index 4ed619cea..593976d95 100644 --- a/c/containers/test_lrec.c +++ b/c/containers/test_lrec.c @@ -1,8 +1,5 @@ #include #include -#ifdef MLR_USE_MCHECK -#include -#endif // MLR_USE_MCHECK #include "lib/minunit.h" #include "lib/mlrutil.h" #include "containers/lrec.h" @@ -262,14 +259,7 @@ static char * run_all_tests() { } int main(int argc, char **argv) { -#ifdef MLR_USE_MCHECK - if (mcheck(NULL) != 0) { - printf("Could not set up mcheck\n"); - exit(1); - } - printf("Set up mcheck\n"); -#endif // MLR_USE_MCHECK - + printf("TEST_LREC ENTER\n"); char *result = run_all_tests(); printf("\n"); if (result != 0) { diff --git a/c/containers/test_maps_and_sets.c b/c/containers/test_maps_and_sets.c deleted file mode 100644 index 829216a06..000000000 --- a/c/containers/test_maps_and_sets.c +++ /dev/null @@ -1,478 +0,0 @@ -#include -#include -#include "lib/minunit.h" -#include "lib/mlrutil.h" -#include "containers/slls.h" -#include "containers/sllv.h" -#include "containers/hss.h" -#include "containers/lhmsi.h" -#include "containers/lhms2v.h" - -#ifdef __TEST_MAPS_AND_SETS_MAIN__ -int tests_run = 0; -int tests_failed = 0; -int assertions_run = 0; -int assertions_failed = 0; - -// ---------------------------------------------------------------- -static char* test_slls() { - slls_t* plist = slls_from_line(strdup(""), ',', FALSE); - mu_assert_lf(plist->length == 0); - - plist = slls_from_line(strdup("a"), ',', FALSE); - mu_assert_lf(plist->length == 1); - - plist = slls_from_line(strdup("c,d,a,e,b"), ',', FALSE); - mu_assert_lf(plist->length == 5); - - sllse_t* pe = plist->phead; - - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "c")); pe = pe->pnext; - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "d")); pe = pe->pnext; - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "a")); pe = pe->pnext; - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "e")); pe = pe->pnext; - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "b")); pe = pe->pnext; - mu_assert_lf(pe == NULL); - - slls_sort(plist); - - mu_assert_lf(plist->length == 5); - pe = plist->phead; - - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "a")); pe = pe->pnext; - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "b")); pe = pe->pnext; - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "c")); pe = pe->pnext; - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "d")); pe = pe->pnext; - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "e")); pe = pe->pnext; - mu_assert_lf(pe == NULL); - - return NULL; -} - -// ---------------------------------------------------------------- -static char* test_sllv_append() { - mu_assert_lf(0 == 0); - - sllv_t* pa = sllv_alloc(); - sllv_add(pa, "a"); - sllv_add(pa, "b"); - sllv_add(pa, "c"); - mu_assert_lf(pa->length == 3); - - sllve_t* pe = pa->phead; - - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "a")); pe = pe->pnext; - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "b")); pe = pe->pnext; - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "c")); pe = pe->pnext; - mu_assert_lf(pe == NULL); - - sllv_t* pb = sllv_alloc(); - sllv_add(pb, "d"); - sllv_add(pb, "e"); - mu_assert_lf(pb->length == 2); - - pe = pb->phead; - - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "d")); pe = pe->pnext; - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "e")); pe = pe->pnext; - mu_assert_lf(pe == NULL); - - pa = sllv_append(pa, pb); - - mu_assert_lf(pa->length == 5); - mu_assert_lf(pb->length == 2); - - pe = pa->phead; - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "a")); pe = pe->pnext; - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "b")); pe = pe->pnext; - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "c")); pe = pe->pnext; - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "d")); pe = pe->pnext; - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "e")); pe = pe->pnext; - mu_assert_lf(pe == NULL); - - pe = pb->phead; - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "d")); pe = pe->pnext; - mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "e")); pe = pe->pnext; - mu_assert_lf(pe == NULL); - - return NULL; -} - -// ---------------------------------------------------------------- -static char* test_hss() { - - hss_t *pset = hss_alloc(); - mu_assert_lf(pset->num_occupied == 0); - - hss_add(pset, "x"); - mu_assert_lf(pset->num_occupied == 1); - mu_assert_lf(!hss_has(pset, "w")); - mu_assert_lf(hss_has(pset, "x")); - mu_assert_lf(!hss_has(pset, "y")); - mu_assert_lf(!hss_has(pset, "z")); - mu_assert_lf(hss_check_counts(pset)); - - hss_add(pset, "y"); - mu_assert_lf(pset->num_occupied == 2); - mu_assert_lf(!hss_has(pset, "w")); - mu_assert_lf(hss_has(pset, "x")); - mu_assert_lf(hss_has(pset, "y")); - mu_assert_lf(!hss_has(pset, "z")); - mu_assert_lf(hss_check_counts(pset)); - - hss_add(pset, "x"); - mu_assert_lf(pset->num_occupied == 2); - mu_assert_lf(!hss_has(pset, "w")); - mu_assert_lf(hss_has(pset, "x")); - mu_assert_lf(hss_has(pset, "y")); - mu_assert_lf(!hss_has(pset, "z")); - mu_assert_lf(hss_check_counts(pset)); - - hss_add(pset, "z"); - mu_assert_lf(pset->num_occupied == 3); - mu_assert_lf(!hss_has(pset, "w")); - mu_assert_lf(hss_has(pset, "x")); - mu_assert_lf(hss_has(pset, "y")); - mu_assert_lf(hss_has(pset, "z")); - mu_assert_lf(hss_check_counts(pset)); - - hss_remove(pset, "y"); - mu_assert_lf(pset->num_occupied == 2); - mu_assert_lf(!hss_has(pset, "w")); - mu_assert_lf(hss_has(pset, "x")); - mu_assert_lf(!hss_has(pset, "y")); - mu_assert_lf(hss_has(pset, "z")); - mu_assert_lf(hss_check_counts(pset)); - - hss_clear(pset); - mu_assert_lf(!hss_has(pset, "w")); - mu_assert_lf(!hss_has(pset, "x")); - mu_assert_lf(!hss_has(pset, "y")); - mu_assert_lf(!hss_has(pset, "z")); - mu_assert_lf(hss_check_counts(pset)); - - hss_free(pset); - - return NULL; -} - -// ---------------------------------------------------------------- -static char* test_lhmsi() { - mu_assert_lf(0 == 0); - - lhmsi_t *pmap = lhmsi_alloc(); - mu_assert_lf(pmap->num_occupied == 0); - mu_assert_lf(!lhmsi_has_key(pmap, "w")); - mu_assert_lf(!lhmsi_has_key(pmap, "x")); - mu_assert_lf(!lhmsi_has_key(pmap, "y")); - mu_assert_lf(!lhmsi_has_key(pmap, "z")); - mu_assert_lf(lhmsi_check_counts(pmap)); - - lhmsi_put(pmap, "x", 3); - mu_assert_lf(pmap->num_occupied == 1); - mu_assert_lf(!lhmsi_has_key(pmap, "w")); - mu_assert_lf(lhmsi_has_key(pmap, "x")); - mu_assert_lf(!lhmsi_has_key(pmap, "y")); - mu_assert_lf(!lhmsi_has_key(pmap, "z")); - mu_assert_lf(lhmsi_check_counts(pmap)); - - lhmsi_put(pmap, "y", 5); - mu_assert_lf(pmap->num_occupied == 2); - mu_assert_lf(!lhmsi_has_key(pmap, "w")); - mu_assert_lf(lhmsi_has_key(pmap, "x")); - mu_assert_lf(lhmsi_has_key(pmap, "y")); - mu_assert_lf(!lhmsi_has_key(pmap, "z")); - mu_assert_lf(lhmsi_check_counts(pmap)); - - lhmsi_put(pmap, "x", 4); - mu_assert_lf(pmap->num_occupied == 2); - mu_assert_lf(!lhmsi_has_key(pmap, "w")); - mu_assert_lf(lhmsi_has_key(pmap, "x")); - mu_assert_lf(lhmsi_has_key(pmap, "y")); - mu_assert_lf(!lhmsi_has_key(pmap, "z")); - mu_assert_lf(lhmsi_check_counts(pmap)); - - lhmsi_put(pmap, "z", 7); - mu_assert_lf(pmap->num_occupied == 3); - mu_assert_lf(!lhmsi_has_key(pmap, "w")); - mu_assert_lf(lhmsi_has_key(pmap, "x")); - mu_assert_lf(lhmsi_has_key(pmap, "y")); - mu_assert_lf(lhmsi_has_key(pmap, "z")); - mu_assert_lf(lhmsi_check_counts(pmap)); - - lhmsi_remove(pmap, "y"); - mu_assert_lf(pmap->num_occupied == 2); - mu_assert_lf(!lhmsi_has_key(pmap, "w")); - mu_assert_lf(lhmsi_has_key(pmap, "x")); - mu_assert_lf(!lhmsi_has_key(pmap, "y")); - mu_assert_lf(lhmsi_has_key(pmap, "z")); - mu_assert_lf(lhmsi_check_counts(pmap)); - - lhmsi_clear(pmap); - mu_assert_lf(pmap->num_occupied == 0); - mu_assert_lf(!lhmsi_has_key(pmap, "w")); - mu_assert_lf(!lhmsi_has_key(pmap, "x")); - mu_assert_lf(!lhmsi_has_key(pmap, "y")); - mu_assert_lf(!lhmsi_has_key(pmap, "z")); - mu_assert_lf(lhmsi_check_counts(pmap)); - - lhmsi_free(pmap); - - return NULL; -} - -// lhmsi_remove(pmap, "y"); -// printf("map size = %d\n", pmap->num_occupied); -// lhmsi_dump(pmap); -// printf("map has(\"w\") = %d\n", lhmsi_has_key(pmap, "w")); -// printf("map has(\"x\") = %d\n", lhmsi_has_key(pmap, "x")); -// printf("map has(\"y\") = %d\n", lhmsi_has_key(pmap, "y")); -// printf("map has(\"z\") = %d\n", lhmsi_has_key(pmap, "z")); -// lhmsi_check_counts(pmap); -// lhmsi_free(pmap); - -// ---------------------------------------------------------------- -static char* test_lhms2v() { - mu_assert_lf(0 == 0); - - lhms2v_t *pmap = lhms2v_alloc(); - mu_assert_lf(pmap->num_occupied == 0); - mu_assert_lf(lhms2v_check_counts(pmap)); - - lhms2v_put(pmap, "a", "x", "3"); - mu_assert_lf(pmap->num_occupied == 1); - mu_assert_lf(lhms2v_check_counts(pmap)); - - lhms2v_put(pmap, "a", "y", "5"); - mu_assert_lf(pmap->num_occupied == 2); - mu_assert_lf(lhms2v_check_counts(pmap)); - - lhms2v_put(pmap, "a", "x", "4"); - mu_assert_lf(pmap->num_occupied == 2); - mu_assert_lf(lhms2v_check_counts(pmap)); - - lhms2v_put(pmap, "b", "z", "7"); - mu_assert_lf(pmap->num_occupied == 3); - mu_assert_lf(lhms2v_check_counts(pmap)); - - lhms2v_remove(pmap, "a", "y"); - mu_assert_lf(pmap->num_occupied == 2); - mu_assert_lf(lhms2v_check_counts(pmap)); - - lhms2v_clear(pmap); - mu_assert_lf(pmap->num_occupied == 0); - mu_assert_lf(lhms2v_check_counts(pmap)); - - lhms2v_free(pmap); - - return NULL; -} - -// ---------------------------------------------------------------- -static char* test_lhmslv() { - mu_assert_lf(0 == 0); - - return NULL; -} - -// slls_t* ax = slls_alloc(); -// slls_add_no_free(ax, "a"); -// slls_add_no_free(ax, "x"); -// -// slls_t* ay = slls_alloc(); -// slls_add_no_free(ay, "a"); -// slls_add_no_free(ay, "y"); -// -// slls_t* bz = slls_alloc(); -// slls_add_no_free(bz, "b"); -// slls_add_no_free(bz, "z"); -// -// lhmslv_t *pmap = lhmslv_alloc(); -// lhmslv_put(pmap, ax, "3"); -// lhmslv_put(pmap, ay, "5"); -// lhmslv_put(pmap, ax, "4"); -// lhmslv_put(pmap, bz, "7"); -// lhmslv_remove(pmap, ay); -// printf("map size = %d\n", lhmslv_size(pmap)); -// lhmslv_dump(pmap); -// lhmslv_check_counts(pmap); -// lhmslv_free(pmap); - -// ---------------------------------------------------------------- -static char* test_lhmss() { - mu_assert_lf(0 == 0); - - return NULL; -} - -// lhmss_t *pmap = lhmss_alloc(); -// lhmss_put(pmap, "x", "3"); -// lhmss_put(pmap, "y", "5"); -// lhmss_put(pmap, "x", "4"); -// lhmss_put(pmap, "z", "7"); -// lhmss_remove(pmap, "y"); -// printf("map size = %d\n", pmap->num_occupied); -// lhmss_dump(pmap); -// lhmss_check_counts(pmap); -// lhmss_free(pmap); - -// ---------------------------------------------------------------- -static char* test_lhmsv() { - mu_assert_lf(0 == 0); - - return NULL; -} - -// int x3 = 3; -// int x5 = 5; -// int x4 = 4; -// int x7 = 7; -// lhmsv_t *pmap = lhmsv_alloc(); -// lhmsv_put(pmap, "x", &x3); -// lhmsv_put(pmap, "y", &x5); -// lhmsv_put(pmap, "x", &x4); -// lhmsv_put(pmap, "z", &x7); -// lhmsv_remove(pmap, "y"); -// printf("map size = %d\n", pmap->num_occupied); -// lhmsv_dump(pmap); -// lhmsv_check_counts(pmap); -// lhmsv_free(pmap); - -// ---------------------------------------------------------------- -static char* test_percentile_keeper() { - mu_assert_lf(0 == 0); - - return NULL; -} - -//void percentile_keeper_dump(percentile_keeper_t* ppercentile_keeper) { -// for (int i = 0; i < ppercentile_keeper->size; i++) -// printf("[%02d] %.8lf\n", i, ppercentile_keeper->data[i]); -//} - -// char buffer[1024]; -// percentile_keeper_t* ppercentile_keeper = percentile_keeper_alloc(); -// char* line; -// while ((line = fgets(buffer, sizeof(buffer), stdin)) != NULL) { -// int len = strlen(line); -// if (len >= 1) // xxx write and use a chomp() -// if (line[len-1] == '\n') -// line[len-1] = 0; -// double v; -// if (!mlr_try_double_from_string(line, &v)) { -// percentile_keeper_ingest(ppercentile_keeper, v); -// } else { -// printf("meh? >>%s<<\n", line); -// } -// } -// percentile_keeper_dump(ppercentile_keeper); -// printf("\n"); -// double p; -// p = 0.10; printf("%.2lf: %.6lf\n", p, percentile_keeper_emit(ppercentile_keeper, p)); -// p = 0.50; printf("%.2lf: %.6lf\n", p, percentile_keeper_emit(ppercentile_keeper, p)); -// p = 0.90; printf("%.2lf: %.6lf\n", p, percentile_keeper_emit(ppercentile_keeper, p)); -// printf("\n"); -// percentile_keeper_dump(ppercentile_keeper); - -// ---------------------------------------------------------------- -static char* test_top_keeper() { - mu_assert_lf(0 == 0); - - return NULL; -} - -//void top_keeper_dump(top_keeper_t* ptop_keeper) { -// for (int i = 0; i < ptop_keeper->size; i++) -// printf("[%02d] %.8lf\n", i, ptop_keeper->top_values[i]); -// for (int i = ptop_keeper->size; i < ptop_keeper->capacity; i++) -// printf("[%02d] ---\n", i); -//} - -// int capacity = 5; -// char buffer[1024]; -// if (argc == 2) -// (void)sscanf(argv[1], "%d", &capacity); -// top_keeper_t* ptop_keeper = top_keeper_alloc(capacity); -// char* line; -// while ((line = fgets(buffer, sizeof(buffer), stdin)) != NULL) { -// int len = strlen(line); -// if (len >= 1) // xxx write and use a chomp() -// if (line[len-1] == '\n') -// line[len-1] = 0; -// if (streq(line, "")) { -// //top_keeper_dump(ptop_keeper); -// printf("\n"); -// } else { -// double v; -// if (!mlr_try_double_from_string(line, &v)) { -// top_keeper_add(ptop_keeper, v, NULL); -// top_keeper_dump(ptop_keeper); -// printf("\n"); -// } else { -// printf("meh? >>%s<<\n", line); -// } -// } -// } - -// ---------------------------------------------------------------- -static char* test_dheap() { - mu_assert_lf(0 == 0); - - return NULL; -} - -// dheap_t *pdheap = dheap_alloc(); -// dheap_check(pdheap, __FILE__, __LINE__); -// dheap_add(pdheap, 4.1); -// dheap_add(pdheap, 3.1); -// dheap_add(pdheap, 2.1); -// dheap_add(pdheap, 6.1); -// dheap_add(pdheap, 5.1); -// dheap_add(pdheap, 8.1); -// dheap_add(pdheap, 7.1); -// dheap_print(pdheap); -// dheap_check(pdheap, __FILE__, __LINE__); -// -// printf("\n"); -// printf("remove %lf\n", dheap_remove(pdheap)); -// printf("remove %lf\n", dheap_remove(pdheap)); -// printf("remove %lf\n", dheap_remove(pdheap)); -// printf("remove %lf\n", dheap_remove(pdheap)); -// printf("\n"); -// -// dheap_print(pdheap); -// dheap_check(pdheap, __FILE__, __LINE__); -// -// dheap_free(pdheap); - -// ================================================================ -static char * run_all_tests() { - mu_run_test(test_slls); - mu_run_test(test_sllv_append); - mu_run_test(test_hss); - mu_run_test(test_lhmsi); - mu_run_test(test_lhms2v); - mu_run_test(test_lhmslv); - mu_run_test(test_lhmss); - mu_run_test(test_lhmsv); - mu_run_test(test_percentile_keeper); - mu_run_test(test_top_keeper); - mu_run_test(test_dheap); - return 0; -} - -int main(int argc, char **argv) { - char *result = run_all_tests(); - printf("\n"); - if (result != 0) { - printf("Not all unit tests passed\n"); - } - else { - printf("TEST_MAPS_AND_SETS: ALL UNIT TESTS PASSED\n"); - } - printf("Tests passed: %d of %d\n", tests_run - tests_failed, tests_run); - printf("Assertions passed: %d of %d\n", assertions_run - assertions_failed, assertions_run); - - return result != 0; -} -#endif // __TEST_MAPS_AND_SETS_MAIN__ diff --git a/c/containers/test_multiple_containers.c b/c/containers/test_multiple_containers.c new file mode 100644 index 000000000..8e9f3653f --- /dev/null +++ b/c/containers/test_multiple_containers.c @@ -0,0 +1,659 @@ +#include +#include +#include "lib/minunit.h" +#include "lib/mlrutil.h" +#include "containers/slls.h" +#include "containers/sllv.h" +#include "containers/hss.h" +#include "containers/lhmsi.h" +#include "containers/lhmss.h" +#include "containers/lhmsv.h" +#include "containers/lhms2v.h" +#include "containers/lhmslv.h" +#include "containers/percentile_keeper.h" +#include "containers/top_keeper.h" +#include "containers/dheap.h" + +#ifdef __TEST_MULTIPLE_CONTAINERS_MAIN__ +int tests_run = 0; +int tests_failed = 0; +int assertions_run = 0; +int assertions_failed = 0; + +// ---------------------------------------------------------------- +static char* test_slls() { + slls_t* plist = slls_from_line(strdup(""), ',', FALSE); + mu_assert_lf(plist->length == 0); + + plist = slls_from_line(strdup("a"), ',', FALSE); + mu_assert_lf(plist->length == 1); + + plist = slls_from_line(strdup("c,d,a,e,b"), ',', FALSE); + mu_assert_lf(plist->length == 5); + + sllse_t* pe = plist->phead; + + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "c")); pe = pe->pnext; + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "d")); pe = pe->pnext; + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "a")); pe = pe->pnext; + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "e")); pe = pe->pnext; + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "b")); pe = pe->pnext; + mu_assert_lf(pe == NULL); + + slls_sort(plist); + + mu_assert_lf(plist->length == 5); + pe = plist->phead; + + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "a")); pe = pe->pnext; + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "b")); pe = pe->pnext; + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "c")); pe = pe->pnext; + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "d")); pe = pe->pnext; + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "e")); pe = pe->pnext; + mu_assert_lf(pe == NULL); + + return NULL; +} + +// ---------------------------------------------------------------- +static char* test_sllv() { + + sllv_t* pa = sllv_alloc(); + sllv_add(pa, "a"); + sllv_add(pa, "b"); + sllv_add(pa, "c"); + mu_assert_lf(pa->length == 3); + + sllve_t* pe = pa->phead; + + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "a")); pe = pe->pnext; + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "b")); pe = pe->pnext; + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "c")); pe = pe->pnext; + mu_assert_lf(pe == NULL); + + sllv_t* pb = sllv_alloc(); + sllv_add(pb, "d"); + sllv_add(pb, "e"); + mu_assert_lf(pb->length == 2); + + pe = pb->phead; + + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "d")); pe = pe->pnext; + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "e")); pe = pe->pnext; + mu_assert_lf(pe == NULL); + + pa = sllv_append(pa, pb); + + mu_assert_lf(pa->length == 5); + mu_assert_lf(pb->length == 2); + + pe = pa->phead; + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "a")); pe = pe->pnext; + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "b")); pe = pe->pnext; + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "c")); pe = pe->pnext; + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "d")); pe = pe->pnext; + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "e")); pe = pe->pnext; + mu_assert_lf(pe == NULL); + + pe = pb->phead; + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "d")); pe = pe->pnext; + mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "e")); pe = pe->pnext; + mu_assert_lf(pe == NULL); + + return NULL; +} + +// ---------------------------------------------------------------- +static char* test_hss() { + + hss_t *pset = hss_alloc(); + mu_assert_lf(pset->num_occupied == 0); + + hss_add(pset, "x"); + mu_assert_lf(pset->num_occupied == 1); + mu_assert_lf(!hss_has(pset, "w")); + mu_assert_lf( hss_has(pset, "x")); + mu_assert_lf(!hss_has(pset, "y")); + mu_assert_lf(!hss_has(pset, "z")); + mu_assert_lf(hss_check_counts(pset)); + + hss_add(pset, "y"); + mu_assert_lf(pset->num_occupied == 2); + mu_assert_lf(!hss_has(pset, "w")); + mu_assert_lf( hss_has(pset, "x")); + mu_assert_lf( hss_has(pset, "y")); + mu_assert_lf(!hss_has(pset, "z")); + mu_assert_lf(hss_check_counts(pset)); + + hss_add(pset, "x"); + mu_assert_lf(pset->num_occupied == 2); + mu_assert_lf(!hss_has(pset, "w")); + mu_assert_lf( hss_has(pset, "x")); + mu_assert_lf( hss_has(pset, "y")); + mu_assert_lf(!hss_has(pset, "z")); + mu_assert_lf(hss_check_counts(pset)); + + hss_add(pset, "z"); + mu_assert_lf(pset->num_occupied == 3); + mu_assert_lf(!hss_has(pset, "w")); + mu_assert_lf( hss_has(pset, "x")); + mu_assert_lf( hss_has(pset, "y")); + mu_assert_lf(hss_has(pset, "z")); + mu_assert_lf(hss_check_counts(pset)); + + hss_remove(pset, "y"); + mu_assert_lf(pset->num_occupied == 2); + mu_assert_lf(!hss_has(pset, "w")); + mu_assert_lf( hss_has(pset, "x")); + mu_assert_lf(!hss_has(pset, "y")); + mu_assert_lf( hss_has(pset, "z")); + mu_assert_lf(hss_check_counts(pset)); + + hss_clear(pset); + mu_assert_lf(!hss_has(pset, "w")); + mu_assert_lf(!hss_has(pset, "x")); + mu_assert_lf(!hss_has(pset, "y")); + mu_assert_lf(!hss_has(pset, "z")); + mu_assert_lf(hss_check_counts(pset)); + + hss_free(pset); + + return NULL; +} + +// ---------------------------------------------------------------- +static char* test_lhmsi() { + + lhmsi_t *pmap = lhmsi_alloc(); + mu_assert_lf(pmap->num_occupied == 0); + mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf(!lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf(!lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf(!lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf(lhmsi_check_counts(pmap)); + + lhmsi_put(pmap, "x", 3); + mu_assert_lf(pmap->num_occupied == 1); + mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf( lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 3); + mu_assert_lf(!lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == -999); + mu_assert_lf(!lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == -999); + mu_assert_lf(lhmsi_check_counts(pmap)); + + lhmsi_put(pmap, "y", 5); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf( lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 3); + mu_assert_lf( lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == 5); + mu_assert_lf(!lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == -999); + mu_assert_lf(lhmsi_check_counts(pmap)); + + lhmsi_put(pmap, "x", 4); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf( lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 4); + mu_assert_lf( lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == 5); + mu_assert_lf(!lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == -999); + mu_assert_lf(lhmsi_check_counts(pmap)); + + lhmsi_put(pmap, "z", 7); + mu_assert_lf(pmap->num_occupied == 3); + mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf( lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 4); + mu_assert_lf( lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == 5); + mu_assert_lf(lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == 7); + mu_assert_lf(lhmsi_check_counts(pmap)); + + lhmsi_remove(pmap, "y"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf( lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 4); + mu_assert_lf(!lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == -999); + mu_assert_lf( lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == 7); + mu_assert_lf(lhmsi_check_counts(pmap)); + + lhmsi_clear(pmap); + mu_assert_lf(pmap->num_occupied == 0); + mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999); + mu_assert_lf(!lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == -999); + mu_assert_lf(!lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == -999); + mu_assert_lf(!lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == -999); + mu_assert_lf(lhmsi_check_counts(pmap)); + + lhmsi_free(pmap); + + return NULL; +} + +// ---------------------------------------------------------------- +static char* test_lhmss() { + + lhmss_t *pmap = lhmss_alloc(); + mu_assert_lf(pmap->num_occupied == 0); + mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); + mu_assert_lf(!lhmss_has_key(pmap, "x")); mu_assert_lf(lhmss_get(pmap, "x") == NULL); + mu_assert_lf(!lhmss_has_key(pmap, "y")); mu_assert_lf(lhmss_get(pmap, "y") == NULL); + mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL); + mu_assert_lf(lhmss_check_counts(pmap)); + + lhmss_put(pmap, "x", "3"); + mu_assert_lf(pmap->num_occupied == 1); + mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); + mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "3")); + mu_assert_lf(!lhmss_has_key(pmap, "y")); mu_assert_lf(lhmss_get(pmap, "y") == NULL); + mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL); + mu_assert_lf(lhmss_check_counts(pmap)); + + lhmss_put(pmap, "y", "5"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); + mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "3")); + mu_assert_lf( lhmss_has_key(pmap, "y")); mu_assert_lf(streq(lhmss_get(pmap, "y"), "5")); + mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL); + mu_assert_lf(lhmss_check_counts(pmap)); + + lhmss_put(pmap, "x", "4"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); + mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "4")); + mu_assert_lf( lhmss_has_key(pmap, "y")); mu_assert_lf(streq(lhmss_get(pmap, "y"), "5")); + mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL); + mu_assert_lf(lhmss_check_counts(pmap)); + + lhmss_put(pmap, "z", "7"); + mu_assert_lf(pmap->num_occupied == 3); + mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); + mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "4")); + mu_assert_lf( lhmss_has_key(pmap, "y")); mu_assert_lf(streq(lhmss_get(pmap, "y"), "5")); + mu_assert_lf( lhmss_has_key(pmap, "z")); mu_assert_lf(streq(lhmss_get(pmap, "z"), "7")); + mu_assert_lf(lhmss_check_counts(pmap)); + + lhmss_remove(pmap, "y"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); + mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "4")); + mu_assert_lf(!lhmss_has_key(pmap, "y")); mu_assert_lf(lhmss_get(pmap, "y") == NULL); + mu_assert_lf( lhmss_has_key(pmap, "z")); mu_assert_lf(streq(lhmss_get(pmap, "z"), "7")); + mu_assert_lf(lhmss_check_counts(pmap)); + + lhmss_free(pmap); + + return NULL; +} + +// ---------------------------------------------------------------- +static char* test_lhmsv() { + + lhmsv_t *pmap = lhmsv_alloc(); + mu_assert_lf(pmap->num_occupied == 0); + mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL); + mu_assert_lf(!lhmsv_has_key(pmap, "x")); mu_assert_lf(lhmsv_get(pmap, "x") == NULL); + mu_assert_lf(!lhmsv_has_key(pmap, "y")); mu_assert_lf(lhmsv_get(pmap, "y") == NULL); + mu_assert_lf(!lhmsv_has_key(pmap, "z")); mu_assert_lf(lhmsv_get(pmap, "z") == NULL); + mu_assert_lf(lhmsv_check_counts(pmap)); + + lhmsv_put(pmap, "x", "3"); + mu_assert_lf(pmap->num_occupied == 1); + mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL); + mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "3")); + mu_assert_lf(!lhmsv_has_key(pmap, "y")); mu_assert_lf(lhmsv_get(pmap, "y") == NULL); + mu_assert_lf(!lhmsv_has_key(pmap, "z")); mu_assert_lf(lhmsv_get(pmap, "z") == NULL); + mu_assert_lf(lhmsv_check_counts(pmap)); + + lhmsv_put(pmap, "y", "5"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL); + mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "3")); + mu_assert_lf( lhmsv_has_key(pmap, "y")); mu_assert_lf(streq(lhmsv_get(pmap, "y"), "5")); + mu_assert_lf(!lhmsv_has_key(pmap, "z")); mu_assert_lf(lhmsv_get(pmap, "z") == NULL); + mu_assert_lf(lhmsv_check_counts(pmap)); + + lhmsv_put(pmap, "x", "4"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL); + mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "4")); + mu_assert_lf( lhmsv_has_key(pmap, "y")); mu_assert_lf(streq(lhmsv_get(pmap, "y"), "5")); + mu_assert_lf(!lhmsv_has_key(pmap, "z")); mu_assert_lf(lhmsv_get(pmap, "z") == NULL); + mu_assert_lf(lhmsv_check_counts(pmap)); + + lhmsv_put(pmap, "z", "7"); + mu_assert_lf(pmap->num_occupied == 3); + mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL); + mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "4")); + mu_assert_lf( lhmsv_has_key(pmap, "y")); mu_assert_lf(streq(lhmsv_get(pmap, "y"), "5")); + mu_assert_lf( lhmsv_has_key(pmap, "z")); mu_assert_lf(streq(lhmsv_get(pmap, "z"), "7")); + mu_assert_lf(lhmsv_check_counts(pmap)); + + lhmsv_remove(pmap, "y"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL); + mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "4")); + mu_assert_lf(!lhmsv_has_key(pmap, "y")); mu_assert_lf(lhmsv_get(pmap, "y") == NULL); + mu_assert_lf( lhmsv_has_key(pmap, "z")); mu_assert_lf(streq(lhmsv_get(pmap, "z"), "7")); + mu_assert_lf(lhmsv_check_counts(pmap)); + + lhmsv_free(pmap); + + return NULL; +} + +// ---------------------------------------------------------------- +static char* test_lhms2v() { + + lhms2v_t *pmap = lhms2v_alloc(); + mu_assert_lf(pmap->num_occupied == 0); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(lhms2v_get(pmap, "a", "x") == NULL); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(lhms2v_get(pmap, "a", "y") == NULL); + mu_assert_lf(!lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(lhms2v_get(pmap, "b", "z") == NULL); + mu_assert_lf(lhms2v_check_counts(pmap)); + + lhms2v_put(pmap, "a", "x", "3"); + mu_assert_lf(pmap->num_occupied == 1); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); + mu_assert_lf( lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "3")); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(lhms2v_get(pmap, "a", "y") == NULL); + mu_assert_lf(!lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(lhms2v_get(pmap, "b", "z") == NULL); + mu_assert_lf(lhms2v_check_counts(pmap)); + + lhms2v_put(pmap, "a", "y", "5"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); + mu_assert_lf( lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "3")); + mu_assert_lf( lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "y"), "5")); + mu_assert_lf(!lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(lhms2v_get(pmap, "b", "z") == NULL); + mu_assert_lf(lhms2v_check_counts(pmap)); + + lhms2v_put(pmap, "a", "x", "4"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); + mu_assert_lf( lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "4")); + mu_assert_lf( lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "y"), "5")); + mu_assert_lf(!lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(lhms2v_get(pmap, "b", "z") == NULL); + mu_assert_lf(lhms2v_check_counts(pmap)); + + lhms2v_put(pmap, "b", "z", "7"); + mu_assert_lf(pmap->num_occupied == 3); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); + mu_assert_lf( lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "4")); + mu_assert_lf( lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "y"), "5")); + mu_assert_lf( lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(streq(lhms2v_get(pmap, "b", "z"), "7")); + mu_assert_lf(lhms2v_check_counts(pmap)); + + lhms2v_remove(pmap, "a", "y"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); + mu_assert_lf( lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "4")); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(lhms2v_get(pmap, "a", "y") == NULL); + mu_assert_lf( lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(streq(lhms2v_get(pmap, "b", "z"), "7")); + mu_assert_lf(lhms2v_check_counts(pmap)); + + lhms2v_clear(pmap); + mu_assert_lf(pmap->num_occupied == 0); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(lhms2v_get(pmap, "a", "x") == NULL); + mu_assert_lf(!lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(lhms2v_get(pmap, "a", "y") == NULL); + mu_assert_lf(!lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(lhms2v_get(pmap, "b", "z") == NULL); + mu_assert_lf(lhms2v_check_counts(pmap)); + + lhms2v_free(pmap); + + return NULL; +} + +// ---------------------------------------------------------------- +static char* test_lhmslv() { + + slls_t* aw = slls_alloc(); slls_add_no_free(aw, "a"); slls_add_no_free(aw, "w"); + slls_t* ax = slls_alloc(); slls_add_no_free(ax, "a"); slls_add_no_free(ax, "x"); + slls_t* ay = slls_alloc(); slls_add_no_free(ay, "a"); slls_add_no_free(ay, "y"); + slls_t* bz = slls_alloc(); slls_add_no_free(bz, "b"); slls_add_no_free(bz, "z"); + + lhmslv_t *pmap = lhmslv_alloc(); + mu_assert_lf(pmap->num_occupied == 0); + mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); + mu_assert_lf(!lhmslv_has_key(pmap, ax)); mu_assert_lf(lhmslv_get(pmap, ax) == NULL); + mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL); + mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); + mu_assert_lf(lhmslv_check_counts(pmap)); + + lhmslv_put(pmap, ax, "3"); + mu_assert_lf(pmap->num_occupied == 1); + mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); + mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "3")); + mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL); + mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); + mu_assert_lf(lhmslv_check_counts(pmap)); + + lhmslv_put(pmap, ay, "5"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); + mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "3")); + mu_assert_lf( lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5")); + mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); + mu_assert_lf(lhmslv_check_counts(pmap)); + + lhmslv_put(pmap, ax, "4"); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); + mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4")); + mu_assert_lf( lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5")); + mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); + mu_assert_lf(lhmslv_check_counts(pmap)); + + lhmslv_put(pmap, bz, "7"); + mu_assert_lf(pmap->num_occupied == 3); + mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); + mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4")); + mu_assert_lf( lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5")); + mu_assert_lf( lhmslv_has_key(pmap, bz)); mu_assert_lf(streq(lhmslv_get(pmap, bz), "7")); + mu_assert_lf(lhmslv_check_counts(pmap)); + + lhmslv_remove(pmap, ay); + mu_assert_lf(pmap->num_occupied == 2); + mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); + mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4")); + mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL); + mu_assert_lf( lhmslv_has_key(pmap, bz)); mu_assert_lf(streq(lhmslv_get(pmap, bz), "7")); + mu_assert_lf(lhmslv_check_counts(pmap)); + + lhmslv_clear(pmap); + mu_assert_lf(pmap->num_occupied == 0); + mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL); + mu_assert_lf(!lhmslv_has_key(pmap, ax)); mu_assert_lf(lhmslv_get(pmap, ax) == NULL); + mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL); + mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL); + mu_assert_lf(lhmslv_check_counts(pmap)); + + lhmslv_free(pmap); + + return NULL; +} + +// ---------------------------------------------------------------- +static char* test_percentile_keeper() { + + percentile_keeper_t* ppercentile_keeper = percentile_keeper_alloc(); + percentile_keeper_ingest(ppercentile_keeper, 1.0); + percentile_keeper_ingest(ppercentile_keeper, 2.0); + percentile_keeper_ingest(ppercentile_keeper, 3.0); + percentile_keeper_ingest(ppercentile_keeper, 4.0); + percentile_keeper_ingest(ppercentile_keeper, 5.0); + percentile_keeper_print(ppercentile_keeper); + + double p, q; + p = 0.0; + q = percentile_keeper_emit(ppercentile_keeper, p); + printf("%4.2lf -> %7.4lf\n", p, q); + mu_assert_lf(q == 1.0); + + p = 10.0; + q = percentile_keeper_emit(ppercentile_keeper, p); + printf("%4.2lf -> %7.4lf\n", p, q); + mu_assert_lf(q == 1.0); + + p = 50.0; + q = percentile_keeper_emit(ppercentile_keeper, p); + printf("%4.2lf -> %7.4lf\n", p, q); + mu_assert_lf(q == 3.0); + + p = 90.0; + q = percentile_keeper_emit(ppercentile_keeper, p); + printf("%4.2lf -> %7.4lf\n", p, q); + mu_assert_lf(q == 5.0); + + p = 100.0; + q = percentile_keeper_emit(ppercentile_keeper, p); + printf("%4.2lf -> %7.4lf\n", p, q); + mu_assert_lf(q == 5.0); + + percentile_keeper_free(ppercentile_keeper); + + return NULL; +} + +// ---------------------------------------------------------------- +static char* test_top_keeper() { + int capacity = 3; + + top_keeper_t* ptop_keeper = top_keeper_alloc(capacity); + mu_assert_lf(ptop_keeper->size == 0); + + top_keeper_add(ptop_keeper, 5.0, NULL); + top_keeper_print(ptop_keeper); + mu_assert_lf(ptop_keeper->size == 1); + mu_assert_lf(ptop_keeper->top_values[0] == 5.0); + + top_keeper_add(ptop_keeper, 6.0, NULL); + top_keeper_print(ptop_keeper); + mu_assert_lf(ptop_keeper->size == 2); + mu_assert_lf(ptop_keeper->top_values[0] == 6.0); + mu_assert_lf(ptop_keeper->top_values[1] == 5.0); + + top_keeper_add(ptop_keeper, 4.0, NULL); + top_keeper_print(ptop_keeper); + mu_assert_lf(ptop_keeper->size == 3); + mu_assert_lf(ptop_keeper->top_values[0] == 6.0); + mu_assert_lf(ptop_keeper->top_values[1] == 5.0); + mu_assert_lf(ptop_keeper->top_values[2] == 4.0); + + top_keeper_add(ptop_keeper, 2.0, NULL); + top_keeper_print(ptop_keeper); + mu_assert_lf(ptop_keeper->size == 3); + mu_assert_lf(ptop_keeper->top_values[0] == 6.0); + mu_assert_lf(ptop_keeper->top_values[1] == 5.0); + mu_assert_lf(ptop_keeper->top_values[2] == 4.0); + + top_keeper_add(ptop_keeper, 7.0, NULL); + top_keeper_print(ptop_keeper); + mu_assert_lf(ptop_keeper->size == 3); + mu_assert_lf(ptop_keeper->top_values[0] == 7.0); + mu_assert_lf(ptop_keeper->top_values[1] == 6.0); + mu_assert_lf(ptop_keeper->top_values[2] == 5.0); + + top_keeper_free(ptop_keeper); + return NULL; +} + +// ---------------------------------------------------------------- +static char* test_dheap() { + + dheap_t *pdheap = dheap_alloc(); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 0); + + dheap_add(pdheap, 4.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 1); + + dheap_add(pdheap, 3.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 2); + + dheap_add(pdheap, 2.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 3); + + dheap_add(pdheap, 6.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 4); + + dheap_add(pdheap, 5.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 5); + + dheap_add(pdheap, 8.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 6); + + dheap_add(pdheap, 7.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 7); + + dheap_print(pdheap); + + mu_assert_lf(dheap_remove(pdheap) == 8.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 6); + + mu_assert_lf(dheap_remove(pdheap) == 7.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 5); + + mu_assert_lf(dheap_remove(pdheap) == 6.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 4); + + mu_assert_lf(dheap_remove(pdheap) == 5.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 3); + + mu_assert_lf(dheap_remove(pdheap) == 4.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 2); + + mu_assert_lf(dheap_remove(pdheap) == 3.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 1); + + mu_assert_lf(dheap_remove(pdheap) == 2.25); + mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__)); + mu_assert_lf(pdheap->n == 0); + + dheap_free(pdheap); + + return NULL; +} + +// ================================================================ +static char * run_all_tests() { + mu_run_test(test_slls); + mu_run_test(test_sllv); + mu_run_test(test_hss); + mu_run_test(test_lhmsi); + mu_run_test(test_lhmss); + mu_run_test(test_lhmsv); + mu_run_test(test_lhms2v); + mu_run_test(test_lhmslv); + mu_run_test(test_percentile_keeper); + mu_run_test(test_top_keeper); + mu_run_test(test_dheap); + return 0; +} + +int main(int argc, char **argv) { + printf("TEST_MULTIPLE_CONTAINERS ENTER\n"); + char *result = run_all_tests(); + printf("\n"); + if (result != 0) { + printf("Not all unit tests passed\n"); + } + else { + printf("TEST_MULTIPLE_CONTAINERS: ALL UNIT TESTS PASSED\n"); + } + printf("Tests passed: %d of %d\n", tests_run - tests_failed, tests_run); + printf("Assertions passed: %d of %d\n", assertions_run - assertions_failed, assertions_run); + + return result != 0; +} +#endif // __TEST_MULTIPLE_CONTAINERS_MAIN__ diff --git a/c/containers/test_parse_trie.c b/c/containers/test_parse_trie.c index ac3dec759..4d2d1c025 100644 --- a/c/containers/test_parse_trie.c +++ b/c/containers/test_parse_trie.c @@ -252,6 +252,7 @@ static char* all_tests() { } int main(int argc, char** argv) { + printf("TEST_PARSE_TRIE ENTER\n"); char* result = all_tests(); printf("\n"); if (result != 0) { diff --git a/c/containers/top_keeper.c b/c/containers/top_keeper.c index 3eda084cf..2db8197a3 100644 --- a/c/containers/top_keeper.c +++ b/c/containers/top_keeper.c @@ -1,3 +1,4 @@ +#include #include #include "lib/mlrutil.h" #include "containers/top_keeper.h" @@ -76,3 +77,12 @@ void top_keeper_add(top_keeper_t* ptop_keeper, double value, lrec_t* prec) { ptop_keeper->top_precords[destidx] = prec; // xxx copy?? xxx free on shift-off?!? } } + +// ---------------------------------------------------------------- +void top_keeper_print(top_keeper_t* ptop_keeper) { + printf("top_keeper dump:\n"); + for (int i = 0; i < ptop_keeper->size; i++) + printf("[%02d] %.8lf\n", i, ptop_keeper->top_values[i]); + for (int i = ptop_keeper->size; i < ptop_keeper->capacity; i++) + printf("[%02d] ---\n", i); +} diff --git a/c/containers/top_keeper.h b/c/containers/top_keeper.h index c657c8a9c..69b1b6ce6 100644 --- a/c/containers/top_keeper.h +++ b/c/containers/top_keeper.h @@ -17,4 +17,7 @@ top_keeper_t* top_keeper_alloc(int capacity); void top_keeper_free(top_keeper_t* ptop_keeper); void top_keeper_add(top_keeper_t* ptop_keeper, double value, lrec_t* prec); +// For debug/test +void top_keeper_print(top_keeper_t* ptop_keeper); + #endif // TOP_KEEPER_H diff --git a/c/experimental/getlines.c b/c/experimental/getlines.c index d5e6e6848..c444613d0 100644 --- a/c/experimental/getlines.c +++ b/c/experimental/getlines.c @@ -6,7 +6,9 @@ #include "input/file_reader_mmap.h" #include "input/lrec_readers.h" #include "lib/string_builder.h" -#include "input/old_peek_file_reader.h" +#include "input/byte_readers.h" +#include "input/peek_file_reader.h" +#include "containers/parse_trie.h" #define PEEK_BUF_LEN 32 #define STRING_BUILDER_INIT_SIZE 1024 @@ -263,28 +265,46 @@ static int read_file_mmap_psb(char* filename, int do_write) { } // ================================================================ -static char* read_line_pfr_psb(old_peek_file_reader_t* pfr, string_builder_t* psb, char* irs, int irs_len) { +#define IRS_STRIDX 11 +#define EOF_STRIDX 22 +#define IRSEOF_STRIDX 33 + +static char* read_line_pfr_psb(peek_file_reader_t* pfr, string_builder_t* psb, parse_trie_t* ptrie) { + int rc, stridx, matchlen; while (TRUE) { - if (old_pfr_at_eof(pfr)) { - if (sb_is_empty(psb)) - return NULL; - else + pfr_buffer_by(pfr, ptrie->maxlen); + rc = parse_trie_match(ptrie, pfr->peekbuf, pfr->sob, pfr->npeeked, pfr->peekbuflenmask, + &stridx, &matchlen); + if (rc) { + pfr_advance_by(pfr, matchlen); + switch(stridx) { + case IRS_STRIDX: return sb_finish(psb); - } else if (old_pfr_next_is(pfr, irs, irs_len)) { - old_pfr_advance_by(pfr, irs_len); - return sb_finish(psb); + break; + case IRSEOF_STRIDX: + return sb_finish(psb); + break; + case EOF_STRIDX: + return NULL; + break; + } } else { - sb_append_char(psb, old_pfr_read_char(pfr)); + sb_append_char(psb, pfr_read_char(pfr)); } } } static int read_file_pfr_psb(char* filename, int do_write) { - FILE* fp = fopen_or_die(filename); - char* irs = "\n"; - int irs_len = strlen(irs); + byte_reader_t* pbr = stdio_byte_reader_alloc(); + pbr->popen_func(pbr, filename); + + peek_file_reader_t* pfr = pfr_alloc(pbr, PEEK_BUF_LEN); + + parse_trie_t* ptrie = parse_trie_alloc(); + parse_trie_add_string(ptrie, "\n", IRS_STRIDX); + parse_trie_add_string(ptrie, "\xff", EOF_STRIDX); + parse_trie_add_string(ptrie, "\n\xff", IRSEOF_STRIDX); - old_peek_file_reader_t* pfr = old_pfr_alloc(fp, PEEK_BUF_LEN); string_builder_t sb; string_builder_t* psb = &sb; sb_init(&sb, STRING_BUILDER_INIT_SIZE); @@ -292,7 +312,7 @@ static int read_file_pfr_psb(char* filename, int do_write) { int bc = 0; while (TRUE) { - char* line = read_line_pfr_psb(pfr, psb, irs, irs_len); + char* line = read_line_pfr_psb(pfr, psb, ptrie); if (line == NULL) break; if (do_write) { @@ -302,7 +322,7 @@ static int read_file_pfr_psb(char* filename, int do_write) { bc += strlen(line); free(line); } - fclose(fp); + pbr->pclose_func(pbr); return bc; } @@ -384,41 +404,42 @@ int main(int argc, char** argv) { // $ ./getl ../data/big.csv 5|tee x // $ mlr --opprint cat then sort -n t x -// type t n -// getdelim 0.118618 55888899 -// getdelim 0.121467 55888899 -// getdelim 0.121943 55888899 -// getdelim 0.124756 55888899 -// getdelim 0.127039 55888899 -// getc_unlocked_fixed_len 0.167563 55888899 -// getc_unlocked_fixed_len 0.167803 55888899 -// getc_unlocked_fixed_len 0.168808 55888899 -// getc_unlocked_fixed_len 0.168980 55888899 -// getc_unlocked_fixed_len 0.176187 55888899 -// getc_unlocked_psb 0.238986 55888899 -// getc_unlocked_psb 0.241325 55888899 -// getc_unlocked_psb 0.246466 55888899 -// getc_unlocked_psb 0.247592 55888899 -// getc_unlocked_psb 0.248112 55888899 -// mmap_psb 0.250021 55888899 -// mmap_psb 0.254118 55888899 -// mmap_psb 0.257428 55888899 -// mmap_psb 0.261807 55888899 -// mmap_psb 0.264367 55888899 -// pfr_psb 0.760035 55888900 -// pfr_psb 0.765121 55888900 -// pfr_psb 0.768731 55888900 -// pfr_psb 0.771937 55888900 -// pfr_psb 0.780460 55888900 -// fgetc_fixed_len 2.516459 55888899 -// fgetc_fixed_len 2.522877 55888899 -// fgetc_fixed_len 2.587373 55888899 -// fgetc_psb 2.590090 55888899 -// fgetc_psb 2.590536 55888899 -// fgetc_fixed_len 2.608356 55888899 -// fgetc_psb 2.623930 55888899 -// fgetc_fixed_len 2.624310 55888899 -// fgetc_psb 2.637269 55888899 +// type t n type t n +// getdelim 0.118618 55888899 getdelim 0.118057 55888899 +// getdelim 0.121467 55888899 getdelim 0.118727 55888899 +// getdelim 0.121943 55888899 getdelim 0.119609 55888899 +// getdelim 0.124756 55888899 getdelim 0.122506 55888899 +// getdelim 0.127039 55888899 getdelim 0.123099 55888899 +// getc_unlocked_fixed_len 0.167563 55888899 getc_unlocked_fixed_len 0.168109 55888899 +// getc_unlocked_fixed_len 0.167803 55888899 getc_unlocked_fixed_len 0.168392 55888899 +// getc_unlocked_fixed_len 0.168808 55888899 getc_unlocked_fixed_len 0.169387 55888899 +// getc_unlocked_fixed_len 0.168980 55888899 getc_unlocked_fixed_len 0.178484 55888899 +// getc_unlocked_fixed_len 0.176187 55888899 getc_unlocked_fixed_len 0.182793 55888899 +// getc_unlocked_psb 0.238986 55888899 getc_unlocked_psb 0.293240 55888899 +// getc_unlocked_psb 0.241325 55888899 getc_unlocked_psb 0.298449 55888899 +// getc_unlocked_psb 0.246466 55888899 getc_unlocked_psb 0.298508 55888899 +// getc_unlocked_psb 0.247592 55888899 getc_unlocked_psb 0.301125 55888899 +// getc_unlocked_psb 0.248112 55888899 mmap_psb 0.313239 55888899 +// mmap_psb 0.250021 55888899 mmap_psb 0.315061 55888899 +// mmap_psb 0.254118 55888899 mmap_psb 0.315517 55888899 +// mmap_psb 0.257428 55888899 mmap_psb 0.316790 55888899 +// mmap_psb 0.261807 55888899 mmap_psb 0.320654 55888899 +// mmap_psb 0.264367 55888899 getc_unlocked_psb 0.326494 55888899 +// pfr_psb 0.760035 55888900 pfr_psb 0.417141 55888899 +// pfr_psb 0.765121 55888900 pfr_psb 0.439269 55888899 +// pfr_psb 0.768731 55888900 pfr_psb 0.439342 55888899 +// pfr_psb 0.771937 55888900 pfr_psb 0.447218 55888899 +// pfr_psb 0.780460 55888900 pfr_psb 0.453839 55888899 +// fgetc_fixed_len 2.516459 55888899 fgetc_psb 2.476543 55888899 +// fgetc_fixed_len 2.522877 55888899 fgetc_psb 2.477130 55888899 +// fgetc_fixed_len 2.587373 55888899 fgetc_psb 2.484007 55888899 +// fgetc_psb 2.590090 55888899 fgetc_psb 2.484495 55888899 +// fgetc_psb 2.590536 55888899 fgetc_fixed_len 2.493730 55888899 +// fgetc_fixed_len 2.608356 55888899 fgetc_fixed_len 2.528333 55888899 +// fgetc_psb 2.623930 55888899 fgetc_fixed_len 2.533535 55888899 +// fgetc_fixed_len 2.624310 55888899 fgetc_fixed_len 2.555377 55888899 +// fgetc_psb 2.637269 55888899 fgetc_fixed_len 2.736391 55888899 +// fgetc_psb 2.743828 55888899 // $ mlr --opprint cat then stats1 -a min,max,stddev,mean -f t -g type then sort -n t_mean x // type t_min t_max t_stddev t_mean @@ -430,6 +451,15 @@ int main(int argc, char** argv) { // fgetc_fixed_len 2.516459 2.624310 0.049478 2.571875 // fgetc_psb 2.590090 2.680364 0.037489 2.624438 +// type t_min t_max t_stddev t_mean +// getdelim 0.118057 0.123099 0.002271 0.120400 +// getc_unlocked_fixed_len 0.168109 0.182793 0.006768 0.173433 +// getc_unlocked_psb 0.293240 0.326494 0.013134 0.303563 +// mmap_psb 0.313239 0.320654 0.002771 0.316252 +// pfr_psb 0.417141 0.453839 0.013830 0.439362 +// fgetc_psb 2.476543 2.743828 0.117803 2.533201 +// fgetc_fixed_len 2.493730 2.736391 0.095892 2.569473 + // ---------------------------------------------------------------- // Analysis: // * getdelim is good; fatal flaw is single-char line-terminator @@ -441,4 +471,4 @@ int main(int argc, char** argv) { // * getc_unlocked vs. fgetc, no-brainer for this single-threaded code. // * string-builder is a little than fixed-length malloc, as expected // -- it's adding value. -// ! old_peek_file_reader is where the optimization opportunities are +// ! peek_file_reader is where the optimization opportunities are diff --git a/c/input/lrec_reader_csv.c b/c/input/lrec_reader_csv.c index f2e4f5429..098d5c1de 100644 --- a/c/input/lrec_reader_csv.c +++ b/c/input/lrec_reader_csv.c @@ -139,7 +139,7 @@ static slls_t* lrec_reader_csv_get_fields(lrec_reader_csv_state_t* pstate) { pfr->peekbuf, pfr->sob, pfr->npeeked, pfr->peekbuflenmask, &stridx, &matchlen); #ifdef DEBUG_PARSER - pfr_dump(pfr); + pfr_print(pfr); #endif if (rc) { #ifdef DEBUG_PARSER @@ -291,15 +291,15 @@ static void lrec_reader_csv_free(void* pvstate) { } // ---------------------------------------------------------------- -lrec_reader_t* lrec_reader_csv_alloc(byte_reader_t* pbr, char irs, char ifs) { +lrec_reader_t* lrec_reader_csv_alloc(byte_reader_t* pbr, char* irs, char* ifs) { lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t)); lrec_reader_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_csv_state_t)); pstate->ilno = 0LL; pstate->eof = "\xff"; - pstate->irs = "\r\n"; // xxx multi-byte the cli irs/ifs/etc, and integrate here - pstate->ifs = ","; // xxx multi-byte the cli irs/ifs/etc, and integrate here + pstate->irs = irs; + pstate->ifs = ifs; pstate->ifs_eof = mlr_paste_2_strings(pstate->ifs, "\xff"); pstate->dquote = "\""; diff --git a/c/input/lrec_reader_stdio_dkvp.c b/c/input/lrec_reader_stdio_dkvp.c index 39b50260c..dd9fc25ea 100644 --- a/c/input/lrec_reader_stdio_dkvp.c +++ b/c/input/lrec_reader_stdio_dkvp.c @@ -52,7 +52,7 @@ lrec_reader_t* lrec_reader_stdio_dkvp_alloc(char irs, char ifs, char ips, int al } // ---------------------------------------------------------------- -// xxx needs checking on repeated occurrences of ps between fs occurrences. don't zero-poke there. +// xxx needs checking on repeated occurrences of ps between ifs occurrences. don't zero-poke there. // // xxx needs abend on null lhs. // diff --git a/c/input/lrec_readers.c b/c/input/lrec_readers.c index 5d72f47f5..1cc60447b 100644 --- a/c/input/lrec_readers.c +++ b/c/input/lrec_readers.c @@ -1,35 +1,71 @@ #include "lib/mlrutil.h" +#include "lib/mlr_globals.h" #include "input/lrec_readers.h" #include "input/byte_readers.h" -lrec_reader_t* lrec_reader_alloc(char* fmtdesc, int use_mmap, char irs, char ifs, int allow_repeat_ifs, - char ips, int allow_repeat_ips) +static char xxx_temp_check_single_char_separator(char* name, char* value) { + if (strlen(value) != 1) { + fprintf(stderr, + "%s: multi-character separators are not yet supported for all formats. Got %s=\"%s\".\n", + MLR_GLOBALS.argv0, name, value); + exit(1); + } + return value[0]; +} + +lrec_reader_t* lrec_reader_alloc(char* fmtdesc, int use_mmap, char* irs, char* ifs, int allow_repeat_ifs, + char* ips, int allow_repeat_ips) { // xxx refactor for https://github.com/johnkerl/miller/issues/51 et al. byte_reader_t* pbr = use_mmap ? mmap_byte_reader_alloc() : stdio_byte_reader_alloc(); if (streq(fmtdesc, "dkvp")) { if (use_mmap) - return lrec_reader_mmap_dkvp_alloc(irs, ifs, ips, allow_repeat_ifs); + return lrec_reader_mmap_dkvp_alloc( + xxx_temp_check_single_char_separator("irs", irs), + xxx_temp_check_single_char_separator("ifs", ifs), + xxx_temp_check_single_char_separator("ips", ips), + allow_repeat_ifs); else - return lrec_reader_stdio_dkvp_alloc(irs, ifs, ips, allow_repeat_ifs); + return lrec_reader_stdio_dkvp_alloc( + xxx_temp_check_single_char_separator("irs", irs), + xxx_temp_check_single_char_separator("ifs", ifs), + xxx_temp_check_single_char_separator("ips", ips), + allow_repeat_ifs); } else if (streq(fmtdesc, "csv")) { return lrec_reader_csv_alloc(pbr, irs, ifs); } else if (streq(fmtdesc, "csvlite")) { if (use_mmap) - return lrec_reader_mmap_csvlite_alloc(irs, ifs, allow_repeat_ifs); + return lrec_reader_mmap_csvlite_alloc( + xxx_temp_check_single_char_separator("irs", irs), + xxx_temp_check_single_char_separator("ifs", ifs), + allow_repeat_ifs); else - return lrec_reader_stdio_csvlite_alloc(irs, ifs, allow_repeat_ifs); + return lrec_reader_stdio_csvlite_alloc( + xxx_temp_check_single_char_separator("irs", irs), + xxx_temp_check_single_char_separator("ifs", ifs), + allow_repeat_ifs); } else if (streq(fmtdesc, "nidx")) { if (use_mmap) - return lrec_reader_mmap_nidx_alloc(irs, ifs, allow_repeat_ifs); + return lrec_reader_mmap_nidx_alloc( + xxx_temp_check_single_char_separator("irs", irs), + xxx_temp_check_single_char_separator("ifs", ifs), + allow_repeat_ifs); else - return lrec_reader_stdio_nidx_alloc(irs, ifs, allow_repeat_ifs); + return lrec_reader_stdio_nidx_alloc( + xxx_temp_check_single_char_separator("irs", irs), + xxx_temp_check_single_char_separator("ifs", ifs), + allow_repeat_ifs); } else if (streq(fmtdesc, "xtab")) { if (use_mmap) - return lrec_reader_mmap_xtab_alloc(irs, ips, TRUE/*allow_repeat_ips*/); + return lrec_reader_mmap_xtab_alloc( + xxx_temp_check_single_char_separator("irs", irs), + xxx_temp_check_single_char_separator("ips", ips), + TRUE/*allow_repeat_ips*/); else - return lrec_reader_stdio_xtab_alloc(ips, TRUE); // xxx parameterize allow_repeat_ips + return lrec_reader_stdio_xtab_alloc( + xxx_temp_check_single_char_separator("ips", ips), + TRUE); // xxx parameterize allow_repeat_ips } else { return NULL; } diff --git a/c/input/lrec_readers.h b/c/input/lrec_readers.h index c793492aa..0ef8c28ef 100644 --- a/c/input/lrec_readers.h +++ b/c/input/lrec_readers.h @@ -6,12 +6,12 @@ // ---------------------------------------------------------------- // Primary entry points -// fmtdesc: "dkvp", "csv", "nidx", "xtab". -lrec_reader_t* lrec_reader_alloc(char* fmtdesc, int use_mmap, char irs, char ifs, int allow_repeat_ifs, - char ips, int allow_repeat_ips); +// Factory method. fmtdesc: "dkvp", "nidx", "csv", "csvlite", "nidx", "xtab". +lrec_reader_t* lrec_reader_alloc(char* fmtdesc, int use_mmap, char* irs, char* ifs, int allow_repeat_ifs, + char* ips, int allow_repeat_ips); lrec_reader_t* lrec_reader_stdio_csvlite_alloc(char irs, char ifs, int allow_repeat_ifs); -lrec_reader_t* lrec_reader_csv_alloc(byte_reader_t* pbr, char irs, char ifs); +lrec_reader_t* lrec_reader_csv_alloc(byte_reader_t* pbr, char* irs, char* ifs); lrec_reader_t* lrec_reader_stdio_dkvp_alloc(char irs, char ifs, char ips, int allow_repeat_ifs); lrec_reader_t* lrec_reader_stdio_nidx_alloc(char irs, char ifs, int allow_repeat_ifs); lrec_reader_t* lrec_reader_stdio_xtab_alloc(char ips, int allow_repeat_ips); diff --git a/c/input/peek_file_reader.c b/c/input/peek_file_reader.c index 03dd154bf..ac02feb97 100644 --- a/c/input/peek_file_reader.c +++ b/c/input/peek_file_reader.c @@ -3,7 +3,7 @@ #include "input/peek_file_reader.h" // ---------------------------------------------------------------- -void pfr_dump(peek_file_reader_t* pfr) { +void pfr_print(peek_file_reader_t* pfr) { printf("======================== pfr at %p\n", pfr); printf(" peekbuflen = %d\n", pfr->peekbuflen); printf(" npeeked = %d\n", pfr->npeeked); diff --git a/c/input/peek_file_reader.h b/c/input/peek_file_reader.h index 54ef3ba20..fde1cd72c 100644 --- a/c/input/peek_file_reader.h +++ b/c/input/peek_file_reader.h @@ -93,6 +93,6 @@ static inline void pfr_advance_by(peek_file_reader_t* pfr, int len) { } // ---------------------------------------------------------------- -void pfr_dump(peek_file_reader_t* pfr); +void pfr_print(peek_file_reader_t* pfr); #endif // PEEK_FILE_READER_H diff --git a/c/input/test_byte_readers.c b/c/input/test_byte_readers.c index 9d506268e..65b6c3b79 100644 --- a/c/input/test_byte_readers.c +++ b/c/input/test_byte_readers.c @@ -197,6 +197,7 @@ static char * run_all_tests() { } int main(int argc, char **argv) { + printf("TEST_BYTE_READERS ENTER\n"); char *result = run_all_tests(); printf("\n"); if (result != 0) { diff --git a/c/input/test_peek_file_reader.c b/c/input/test_peek_file_reader.c index 9957ed3c3..58358c4f5 100644 --- a/c/input/test_peek_file_reader.c +++ b/c/input/test_peek_file_reader.c @@ -41,24 +41,24 @@ static char* test_non_empty() { peek_file_reader_t* pfr = pfr_alloc(pbr, 7); - pfr_dump(pfr); mu_assert_lf(pfr_peek_char(pfr) == 'a'); - pfr_dump(pfr); mu_assert_lf(pfr_read_char(pfr) == 'a'); - pfr_dump(pfr); mu_assert_lf(pfr_peek_char(pfr) == 'b'); - pfr_dump(pfr); mu_assert_lf(pfr_read_char(pfr) == 'b'); + pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == 'a'); + pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == 'a'); + pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == 'b'); + pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == 'b'); - pfr_dump(pfr); mu_assert_lf(pfr_peek_char(pfr) == ','); - pfr_dump(pfr); mu_assert_lf(pfr_peek_char(pfr) == ','); - pfr_dump(pfr); mu_assert_lf(pfr_read_char(pfr) == ','); - pfr_dump(pfr); pfr_buffer_by(pfr, 5); - pfr_dump(pfr); pfr_advance_by(pfr, 5); - pfr_dump(pfr); mu_assert_lf(pfr_read_char(pfr) == '2'); + pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == ','); + pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == ','); + pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == ','); + pfr_print(pfr); pfr_buffer_by(pfr, 5); + pfr_print(pfr); pfr_advance_by(pfr, 5); + pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == '2'); - pfr_dump(pfr); mu_assert_lf(pfr_peek_char(pfr) == '3'); - pfr_dump(pfr); mu_assert_lf(pfr_peek_char(pfr) == '3'); - pfr_dump(pfr); mu_assert_lf(pfr_read_char(pfr) == '3'); - pfr_dump(pfr); pfr_buffer_by(pfr, 5); - pfr_dump(pfr); pfr_advance_by(pfr, 5); - pfr_dump(pfr); mu_assert_lf(pfr_read_char(pfr) == '\n'); + pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == '3'); + pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == '3'); + pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == '3'); + pfr_print(pfr); pfr_buffer_by(pfr, 5); + pfr_print(pfr); pfr_advance_by(pfr, 5); + pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == '\n'); pbr->pclose_func(pbr); pfr_free(pfr); @@ -74,6 +74,7 @@ static char * run_all_tests() { } int main(int argc, char **argv) { + printf("TEST_PEEK_FILE_READER ENTER\n"); char *result = run_all_tests(); printf("\n"); if (result != 0) { diff --git a/c/lib/mlrutil.c b/c/lib/mlrutil.c index bfd8081e0..5521a0090 100644 --- a/c/lib/mlrutil.c +++ b/c/lib/mlrutil.c @@ -227,10 +227,10 @@ int mlr_string_pair_hash_func(char* str1, char* str2) { } // ---------------------------------------------------------------- -char* mlr_get_line(FILE* input_stream, char rs) { +char* mlr_get_line(FILE* input_stream, char irs) { char* line = NULL; size_t linecap = 0; - ssize_t linelen = getdelim(&line, &linecap, rs, input_stream); + ssize_t linelen = getdelim(&line, &linecap, irs, input_stream); if (linelen <= 0) { return NULL; } diff --git a/c/lib/mlrutil.h b/c/lib/mlrutil.h index b166e31f3..6ce9a3d19 100644 --- a/c/lib/mlrutil.h +++ b/c/lib/mlrutil.h @@ -58,7 +58,7 @@ int mlr_string_hash_func(char *str); int mlr_string_pair_hash_func(char* str1, char* str2); // xxx cmt mem mgt -char* mlr_get_line(FILE* input_stream, char rs); +char* mlr_get_line(FILE* input_stream, char irs); // portable timegm replacement time_t mlr_timegm (struct tm *tm); diff --git a/c/lib/test_mlrutil.c b/c/lib/test_mlrutil.c index 391053af3..cc9cb3da7 100644 --- a/c/lib/test_mlrutil.c +++ b/c/lib/test_mlrutil.c @@ -58,10 +58,10 @@ static char * all_tests() { } int main(int argc, char **argv) { + printf("TEST_MLRUTIL ENTER\n"); char *result = all_tests(); printf("\n"); if (result != 0) { - //printf("%s\n", result); printf("Not all unit tests passed\n"); } else { diff --git a/c/lib/test_string_builder.c b/c/lib/test_string_builder.c index 0b0edcd82..6b9b9c726 100644 --- a/c/lib/test_string_builder.c +++ b/c/lib/test_string_builder.c @@ -78,10 +78,10 @@ static char * all_tests() { } int main(int argc, char **argv) { + printf("TEST_STRING_BUILDER ENTER\n"); char *result = all_tests(); printf("\n"); if (result != 0) { - //printf("%s\n", result); printf("Not all unit tests passed\n"); } else { diff --git a/c/mapping/mapper_join.c b/c/mapping/mapper_join.c index b6d44f7bd..d9248335a 100644 --- a/c/mapping/mapper_join.c +++ b/c/mapping/mapper_join.c @@ -30,12 +30,12 @@ typedef struct _mapper_join_opts_t { // These allow the joiner to have its own different format/delimiter for // the left-file: char* input_file_format; - char irs; - char ifs; - char ips; + char* irs; + char* ifs; + char* ips; int allow_repeat_ifs; int allow_repeat_ips; - char* ifmt; + char* ifile_fmt; int use_mmap_for_read; } mapper_join_opts_t; @@ -237,12 +237,12 @@ static void mapper_join_free(void* pvstate) { static void merge_options(mapper_join_opts_t* popts) { if (popts->input_file_format == NULL) - popts->input_file_format = MLR_GLOBALS.popts->ifmt; - if (popts->irs == OPTION_UNSPECIFIED) + popts->input_file_format = MLR_GLOBALS.popts->ifile_fmt; + if (popts->irs == NULL) popts->irs = MLR_GLOBALS.popts->irs; - if (popts->ifs == OPTION_UNSPECIFIED) + if (popts->ifs == NULL) popts->ifs = MLR_GLOBALS.popts->ifs; - if (popts->ips == OPTION_UNSPECIFIED) + if (popts->ips == NULL) popts->ips = MLR_GLOBALS.popts->ips; if (popts->allow_repeat_ifs == OPTION_UNSPECIFIED) popts->allow_repeat_ifs = MLR_GLOBALS.popts->allow_repeat_ifs; @@ -360,9 +360,9 @@ static mapper_t* mapper_join_parse_cli(int* pargi, int argc, char** argv) { popts->emit_right_unpairables = FALSE; popts->input_file_format = NULL; - popts->irs = OPTION_UNSPECIFIED; - popts->ifs = OPTION_UNSPECIFIED; - popts->ips = OPTION_UNSPECIFIED; + popts->irs = NULL; + popts->ifs = NULL; + popts->ips = NULL; popts->allow_repeat_ifs = OPTION_UNSPECIFIED; popts->allow_repeat_ips = OPTION_UNSPECIFIED; popts->use_mmap_for_read = OPTION_UNSPECIFIED; @@ -370,25 +370,25 @@ static mapper_t* mapper_join_parse_cli(int* pargi, int argc, char** argv) { char* verb = argv[(*pargi)++]; ap_state_t* pstate = ap_alloc(); - ap_define_string_flag(pstate, "-f", &popts->left_file_name); - ap_define_string_list_flag(pstate, "-j", &popts->poutput_join_field_names); - ap_define_string_list_flag(pstate, "-l", &popts->pleft_join_field_names); - ap_define_string_list_flag(pstate, "-r", &popts->pright_join_field_names); - ap_define_string_flag(pstate, "--lp", &popts->left_prefix); - ap_define_string_flag(pstate, "--rp", &popts->right_prefix); - ap_define_false_flag(pstate, "--np", &popts->emit_pairables); - ap_define_true_flag(pstate, "--ul", &popts->emit_left_unpairables); - ap_define_true_flag(pstate, "--ur", &popts->emit_right_unpairables); - ap_define_true_flag(pstate, "-u", &popts->allow_unsorted_input); + ap_define_string_flag(pstate, "-f", &popts->left_file_name); + ap_define_string_list_flag(pstate, "-j", &popts->poutput_join_field_names); + ap_define_string_list_flag(pstate, "-l", &popts->pleft_join_field_names); + ap_define_string_list_flag(pstate, "-r", &popts->pright_join_field_names); + ap_define_string_flag(pstate, "--lp", &popts->left_prefix); + ap_define_string_flag(pstate, "--rp", &popts->right_prefix); + ap_define_false_flag(pstate, "--np", &popts->emit_pairables); + ap_define_true_flag(pstate, "--ul", &popts->emit_left_unpairables); + ap_define_true_flag(pstate, "--ur", &popts->emit_right_unpairables); + ap_define_true_flag(pstate, "-u", &popts->allow_unsorted_input); - ap_define_string_flag(pstate, "-i", &popts->input_file_format); - ap_define_char_flag(pstate, "--irs", &popts->irs); - ap_define_char_flag(pstate, "--ifs", &popts->ifs); - ap_define_char_flag(pstate, "--ips", &popts->ips); - ap_define_true_flag(pstate, "--repifs", &popts->allow_repeat_ifs); - ap_define_true_flag(pstate, "--repips", &popts->allow_repeat_ips); - ap_define_true_flag(pstate, "--use-mmap", &popts->use_mmap_for_read); - ap_define_false_flag(pstate, "--no-mmap", &popts->use_mmap_for_read); + ap_define_string_flag(pstate, "-i", &popts->input_file_format); + ap_define_string_flag(pstate, "--irs", &popts->irs); + ap_define_string_flag(pstate, "--ifs", &popts->ifs); + ap_define_string_flag(pstate, "--ips", &popts->ips); + ap_define_true_flag(pstate, "--repifs", &popts->allow_repeat_ifs); + ap_define_true_flag(pstate, "--repips", &popts->allow_repeat_ips); + ap_define_true_flag(pstate, "--use-mmap", &popts->use_mmap_for_read); + ap_define_false_flag(pstate, "--no-mmap", &popts->use_mmap_for_read); if (!ap_parse(pstate, verb, pargi, argc, argv)) { mapper_join_usage(argv[0], verb); diff --git a/c/mlrmain.c b/c/mlrmain.c index ce76c76b2..9141c5301 100644 --- a/c/mlrmain.c +++ b/c/mlrmain.c @@ -1,9 +1,6 @@ #include #include #include -#ifdef MLR_USE_MCHECK -#include -#endif #include "cli/mlrcli.h" #include "lib/mlrutil.h" @@ -16,13 +13,6 @@ #include "stream/stream.h" int main(int argc, char** argv) { -#ifdef MLR_USE_MCHECK - if (mcheck(NULL) != 0) { - fprintf(stderr, "Could not set up mcheck\n"); - exit(1); - } - fprintf(stderr, "Set up mcheck\n"); -#endif mlr_global_init(argv[0], NULL, NULL); cli_opts_t* popts = parse_command_line(argc, argv); mlr_global_init(argv[0], popts->ofmt, popts); diff --git a/c/output/lrec_writer_csv.c b/c/output/lrec_writer_csv.c index c89abe9fc..61f163810 100644 --- a/c/output/lrec_writer_csv.c +++ b/c/output/lrec_writer_csv.c @@ -13,8 +13,8 @@ static void quote_numeric_output_func(FILE* fp, char* string, char* ors, char* o typedef struct _lrec_writer_csv_state_t { int onr; - char *ors; // xxx char -> char* - char *ofs; // xxx char -> char* + char *ors; + char *ofs; int orslen; int ofslen; quoted_output_func_t* pquoted_output_func; @@ -78,15 +78,13 @@ static void lrec_writer_csv_free(void* pvstate) { } } -lrec_writer_t* lrec_writer_csv_alloc(char ors, char ofs, int oquoting) { +lrec_writer_t* lrec_writer_csv_alloc(char* ors, char* ofs, int oquoting) { lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t)); lrec_writer_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_csv_state_t)); pstate->onr = 0; - //pstate->ors = ors; - //pstate->ofs = ofs; - pstate->ors = "\r\n"; // xxx temp - pstate->ofs = ","; // xxx temp + pstate->ors = ors; + pstate->ofs = ofs; pstate->orslen = strlen(pstate->ors); pstate->ofslen = strlen(pstate->ofs); diff --git a/c/output/lrec_writer_csvlite.c b/c/output/lrec_writer_csvlite.c index c608ffce1..cdf5538dd 100644 --- a/c/output/lrec_writer_csvlite.c +++ b/c/output/lrec_writer_csvlite.c @@ -4,9 +4,9 @@ #include "output/lrec_writers.h" typedef struct _lrec_writer_csvlite_state_t { - int onr; - char ors; - char ofs; + int onr; + char* ors; + char* ofs; long long num_header_lines_output; slls_t* plast_header_output; } lrec_writer_csvlite_state_t; @@ -18,8 +18,8 @@ static void lrec_writer_csvlite_process(FILE* output_stream, lrec_t* prec, void* if (prec == NULL) return; lrec_writer_csvlite_state_t* pstate = pvstate; - char ors = pstate->ors; - char ofs = pstate->ofs; + char* ors = pstate->ors; + char* ofs = pstate->ofs; if (pstate->plast_header_output != NULL) { // xxx make a fcn to compare these w/o copy: put it in mixutil. @@ -27,7 +27,7 @@ static void lrec_writer_csvlite_process(FILE* output_stream, lrec_t* prec, void* slls_free(pstate->plast_header_output); pstate->plast_header_output = NULL; if (pstate->num_header_lines_output > 0LL) - fputc(ors, output_stream); + fputs(ors, output_stream); } } @@ -35,11 +35,11 @@ static void lrec_writer_csvlite_process(FILE* output_stream, lrec_t* prec, void* int nf = 0; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { if (nf > 0) - fputc(ofs, output_stream); + fputs(ofs, output_stream); fputs(pe->key, output_stream); nf++; } - fputc(ors, output_stream); + fputs(ors, output_stream); pstate->plast_header_output = mlr_copy_keys_from_record(prec); pstate->num_header_lines_output++; } @@ -47,11 +47,11 @@ static void lrec_writer_csvlite_process(FILE* output_stream, lrec_t* prec, void* int nf = 0; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { if (nf > 0) - fputc(ofs, output_stream); + fputs(ofs, output_stream); fputs(pe->value, output_stream); nf++; } - fputc(ors, output_stream); + fputs(ors, output_stream); pstate->onr++; lrec_free(prec); // xxx cmt mem-mgmt @@ -65,7 +65,7 @@ static void lrec_writer_csvlite_free(void* pvstate) { } } -lrec_writer_t* lrec_writer_csvlite_alloc(char ors, char ofs) { +lrec_writer_t* lrec_writer_csvlite_alloc(char* ors, char* ofs) { lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t)); lrec_writer_csvlite_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_csvlite_state_t)); diff --git a/c/output/lrec_writer_dkvp.c b/c/output/lrec_writer_dkvp.c index f49d85d2f..ef4a29fff 100644 --- a/c/output/lrec_writer_dkvp.c +++ b/c/output/lrec_writer_dkvp.c @@ -3,9 +3,9 @@ #include "output/lrec_writers.h" typedef struct _lrec_writer_dkvp_state_t { - char rs; - char fs; - char ps; + char* ors; + char* ofs; + char* ops; } lrec_writer_dkvp_state_t; // ---------------------------------------------------------------- @@ -13,33 +13,33 @@ static void lrec_writer_dkvp_process(FILE* output_stream, lrec_t* prec, void* pv if (prec == NULL) return; lrec_writer_dkvp_state_t* pstate = pvstate; - char rs = pstate->rs; - char fs = pstate->fs; - char ps = pstate->ps; + char* ors = pstate->ors; + char* ofs = pstate->ofs; + char* ops = pstate->ops; int nf = 0; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { if (nf > 0) - fputc(fs, output_stream); + fputs(ofs, output_stream); fputs(pe->key, output_stream); - fputc(ps, output_stream); + fputs(ops, output_stream); fputs(pe->value, output_stream); nf++; } - fputc(rs, output_stream); + fputs(ors, output_stream); lrec_free(prec); // xxx cmt mem-mgmt } static void lrec_writer_dkvp_free(void* pvstate) { } -lrec_writer_t* lrec_writer_dkvp_alloc(char rs, char fs, char ps) { +lrec_writer_t* lrec_writer_dkvp_alloc(char* ors, char* ofs, char* ops) { lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t)); lrec_writer_dkvp_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_dkvp_state_t)); - pstate->rs = rs; - pstate->fs = fs; - pstate->ps = ps; + pstate->ors = ors; + pstate->ofs = ofs; + pstate->ops = ops; plrec_writer->pvstate = (void*)pstate; plrec_writer->pprocess_func = &lrec_writer_dkvp_process; diff --git a/c/output/lrec_writer_nidx.c b/c/output/lrec_writer_nidx.c index 85b476193..1ea8e6e74 100644 --- a/c/output/lrec_writer_nidx.c +++ b/c/output/lrec_writer_nidx.c @@ -3,8 +3,8 @@ #include "output/lrec_writers.h" typedef struct _lrec_writer_nidx_state_t { - char rs; - char fs; + char* ors; + char* ofs; } lrec_writer_nidx_state_t; // ---------------------------------------------------------------- @@ -12,29 +12,29 @@ static void lrec_writer_nidx_process(FILE* output_stream, lrec_t* prec, void* pv if (prec == NULL) return; lrec_writer_nidx_state_t* pstate = pvstate; - char rs = pstate->rs; - char fs = pstate->fs; + char* ors = pstate->ors; + char* ofs = pstate->ofs; int nf = 0; for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) { if (nf > 0) - fputc(fs, output_stream); + fputs(ofs, output_stream); fputs(pe->value, output_stream); nf++; } - fputc(rs, output_stream); + fputs(ors, output_stream); lrec_free(prec); // xxx cmt mem-mgmt } static void lrec_writer_nidx_free(void* pvstate) { } -lrec_writer_t* lrec_writer_nidx_alloc(char rs, char fs) { +lrec_writer_t* lrec_writer_nidx_alloc(char* ors, char* ofs) { lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t)); lrec_writer_nidx_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_nidx_state_t)); - pstate->rs = rs; - pstate->fs = fs; + pstate->ors = ors; + pstate->ofs = ofs; plrec_writer->pvstate = (void*)pstate; plrec_writer->pprocess_func = &lrec_writer_nidx_process; diff --git a/c/output/lrec_writer_pprint.c b/c/output/lrec_writer_pprint.c index 2b8a7fd2c..425b4fe5e 100644 --- a/c/output/lrec_writer_pprint.c +++ b/c/output/lrec_writer_pprint.c @@ -11,9 +11,11 @@ typedef struct _lrec_writer_pprint_state_t { slls_t* pprev_keys; int left_align; long long num_blocks_written; + char* ors; + char* ofs; } lrec_writer_pprint_state_t; -static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, int left_align); +static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, char* ors, char* ofs, int left_align); // ---------------------------------------------------------------- static void lrec_writer_pprint_process(FILE* output_stream, lrec_t* prec, void* pvstate) { @@ -31,8 +33,8 @@ static void lrec_writer_pprint_process(FILE* output_stream, lrec_t* prec, void* if (drain) { if (pstate->num_blocks_written > 0LL) // xxx cmt - fputc('\n', output_stream); - print_and_free_record_list(pstate->precords, output_stream, pstate->left_align); + fputs(pstate->ors, output_stream); + print_and_free_record_list(pstate->precords, output_stream, pstate->ors, pstate->ofs, pstate->left_align); if (pstate->pprev_keys != NULL) { slls_free(pstate->pprev_keys); pstate->pprev_keys = NULL; @@ -48,7 +50,7 @@ static void lrec_writer_pprint_process(FILE* output_stream, lrec_t* prec, void* } // ---------------------------------------------------------------- -static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, int left_align) { +static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, char* ors, char* ofs, int left_align) { if (precords->length == 0) return; lrec_t* prec1 = precords->phead->pvdata; @@ -95,7 +97,7 @@ static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, in fprintf(output_stream, "%s", pe->key); } } - fputc('\n', output_stream); + fputs(ors, output_stream); } j = 0; @@ -122,7 +124,7 @@ static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, in fprintf(output_stream, "%s", value); } } - fputc('\n', output_stream); + fputs(ors, output_stream); lrec_free(prec); // xxx cmt mem-mgmt } @@ -143,12 +145,14 @@ static void lrec_writer_pprint_free(void* pvstate) { } } -lrec_writer_t* lrec_writer_pprint_alloc(int left_align) { +lrec_writer_t* lrec_writer_pprint_alloc(char* ors, char* ofs, int left_align) { lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t)); lrec_writer_pprint_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_pprint_state_t)); pstate->precords = sllv_alloc(); pstate->pprev_keys = NULL; + pstate->ors = ors; + pstate->ofs = ofs; pstate->left_align = left_align; pstate->num_blocks_written = 0LL; diff --git a/c/output/lrec_writer_xtab.c b/c/output/lrec_writer_xtab.c index 39bad0a5a..8eadec118 100644 --- a/c/output/lrec_writer_xtab.c +++ b/c/output/lrec_writer_xtab.c @@ -4,6 +4,8 @@ #include "output/lrec_writers.h" typedef struct _lrec_writer_xtab_state_t { + char* ors; + char* ofs; long long record_count; } lrec_writer_xtab_state_t; @@ -13,7 +15,7 @@ static void lrec_writer_xtab_process(FILE* output_stream, lrec_t* prec, void* pv return; lrec_writer_xtab_state_t* pstate = pvstate; if (pstate->record_count > 0LL) - fprintf(output_stream, "\n"); + fputs(pstate->ors, output_stream); pstate->record_count++; int max_key_width = 1; @@ -28,8 +30,8 @@ static void lrec_writer_xtab_process(FILE* output_stream, lrec_t* prec, void* pv fprintf(output_stream, "%s", pe->key); int d = max_key_width - strlen_for_utf8_display(pe->key); for (int i = 0; i < d; i++) - fputc(' ', output_stream); - fprintf(output_stream, " %s\n", pe->value); + fputs(pstate->ofs, output_stream); + fprintf(output_stream, "%s%s%s", pstate->ofs, pe->value, pstate->ors); } lrec_free(prec); // xxx cmt mem-mgmt } @@ -37,10 +39,12 @@ static void lrec_writer_xtab_process(FILE* output_stream, lrec_t* prec, void* pv static void lrec_writer_xtab_free(void* pvstate) { } -lrec_writer_t* lrec_writer_xtab_alloc() { +lrec_writer_t* lrec_writer_xtab_alloc(char* ors, char* ofs) { lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t)); lrec_writer_xtab_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_xtab_state_t)); + pstate->ors = ors; + pstate->ofs = ofs; pstate->record_count = 0LL; plrec_writer->pvstate = pstate; diff --git a/c/output/lrec_writers.h b/c/output/lrec_writers.h index 737cfaf0e..f96ad2592 100644 --- a/c/output/lrec_writers.h +++ b/c/output/lrec_writers.h @@ -2,11 +2,11 @@ #define LREC_WRITERS_H #include "output/lrec_writer.h" -lrec_writer_t* lrec_writer_csv_alloc(char rs, char fs, int oquoting); -lrec_writer_t* lrec_writer_csvlite_alloc(char rs, char fs); -lrec_writer_t* lrec_writer_dkvp_alloc(char rs, char fs, char ps); -lrec_writer_t* lrec_writer_nidx_alloc(char rs, char fs); -lrec_writer_t* lrec_writer_pprint_alloc(int left_align); -lrec_writer_t* lrec_writer_xtab_alloc(); +lrec_writer_t* lrec_writer_csv_alloc(char* ors, char* ofs, int oquoting); +lrec_writer_t* lrec_writer_csvlite_alloc(char* ors, char* ofs); +lrec_writer_t* lrec_writer_dkvp_alloc(char* ors, char* ofs, char* ops); +lrec_writer_t* lrec_writer_nidx_alloc(char* ors, char* ofs); +lrec_writer_t* lrec_writer_pprint_alloc(char* ors, char*ofs, int left_align); +lrec_writer_t* lrec_writer_xtab_alloc(char* ors, char* ofs); #endif // LREC_WRITERS_H diff --git a/c/test/README.md b/c/test/README.md new file mode 100644 index 000000000..a12a168cb --- /dev/null +++ b/c/test/README.md @@ -0,0 +1,5 @@ +There are two classes of testing for Miller: + +* C source-file names starting with `test_` use MinUnit to **unit-test** various subsystems of interest. These are separate executables built and run by the build framework. + +* `test/run` runs the main `mlr` executable with canned inputs, comparing actual to canned outputs, to **regression-test** Miller's end-to-end operation. diff --git a/c/test/expected/out b/c/test/expected/out index d67477712..1b9823777 100644 --- a/c/test/expected/out +++ b/c/test/expected/out @@ -2208,3 +2208,11 @@ a,b,c 4,5,6 x,y"yy,z +mlr --csv --ifs semicolon --ofs pipe --irs lf --ors lflf cut -x -f b ./test/input/rfc-csv/modify-defaults.csv +a|c + +1|3 + +4|6 + + diff --git a/c/test/input/rfc-csv/modify-defaults.csv b/c/test/input/rfc-csv/modify-defaults.csv new file mode 100644 index 000000000..8278f303a --- /dev/null +++ b/c/test/input/rfc-csv/modify-defaults.csv @@ -0,0 +1,3 @@ +a;b;c +1;2;3 +4;;6 diff --git a/c/test/run b/c/test/run index 9f7dd7b6f..53012e8c5 100755 --- a/c/test/run +++ b/c/test/run @@ -462,6 +462,7 @@ run_mlr --csv cat $indir/rfc-csv/quoted-comma-truncated.csv run_mlr --csv cat $indir/rfc-csv/quoted-crlf.csv run_mlr --csv cat $indir/rfc-csv/quoted-crlf-truncated.csv run_mlr --csv cat $indir/rfc-csv/simple-truncated.csv $indir/rfc-csv/simple.csv +run_mlr --csv --ifs semicolon --ofs pipe --irs lf --ors lflf cut -x -f b $indir/rfc-csv/modify-defaults.csv # ================================================================ # A key feature of this regression script is that it can be invoked from any diff --git a/c/todo.txt b/c/todo.txt index f8c4908a9..ac33d8d43 100644 --- a/c/todo.txt +++ b/c/todo.txt @@ -1,45 +1,80 @@ ================================================================ BUGFIXES -! memory leak in csv reader! careful about slls data, and do not use lrec_put_no_free --> heap-fragging? --> redo inline-pasting but this time correctly weight the fragging effect --> denormalize :( pointer-copying is fine for string/mmap-backed cases in the absence of dquotes; - no struping needed *at all*. +:D ================================================================ TOP OF LIST -* v2.1.0: - o perf - o rs/fs -> csv - o multichar rs/fs/ps for all formats - o optimize csv read perf - o double-quote feature -> dkvp +---------------------------------------------------------------- +MAJOR: autoconfig - o make a profiler proggy-pair for getline vs. psb/pkr for simple cat - o rs/fs/ps from char to char* throughout - o parameterize csv rs/fs - o implement mmap-backed psb/pkr via vptr intf and profile that - o RFC "there may be a header" -- ?!? use nidx-style integer-numbered columns?? --no-header? - o DKVP double-quote support - i still need separate csvlite/csv on output since the former tolerates heterogeneity +---------------------------------------------------------------- +MAJOR: multi-char separators for file formats other than CSV +k oxs is functionally done +* need backslash-handling/parsing ... at least, \r \n \t. and, into online help. +! ixs: + o ips & ifs: needs *p==ixs with strneq(p, ixs); also double-null poke (sos&eos) + o irs for mmap: same + o irs for stdio: it all comes down to getdelim. + ! so focus on getline perf. + ! maybe best idea is to re-impl getdelim with multichar irs. + - rework csv reader to look more like csvlite (which is performant)? + ! temporary option is getdelim with final char of the multichar irs; strcmp backwards; + usually get it right; occasionally have to strcat/memcpy multiple such. this is + gross so don't do it unless multichar-getdelim doesn't pan out. + +---------------------------------------------------------------- +MAJOR: csv mem-leak/read-perf +* current option runs faster w/o free, apparently due to heap-fragging + o memory leak in csv reader! careful about slls data, and do not use lrec_put_no_free + o redo inline-pasting but this time correctly weight the fragging effect +* for stdio, needs some thought ... +* ... but for mmap, it's almost always not necessary to strdup at all: + only on escaped-double-quote case. +* denormalize the pbr & make stdio pbr & ptr-backed (mmap,UT-string) pbr. +* code-dup (yes, sadly!) the CSV reader into two & do strups in stdio + but lrec_put w/ !LREC_FREE_VALUE for ptr-backed. +* or *maybe* pbr retent/free-flags for string/mmap w/o denorm, but only + if it's both elegant and fast +! experimental/getlines.c shows that even without the heap-fragging + issue, pfr+psb is 3.5x slower than getdelim. again suggesting + multi-char-terminated getdelim might be the best line of approach. + +---------------------------------------------------------------- +MINOR + +* define dkvp, nidx, etc @ cover x 2 + +? dkvp quoting ... wait until after the mmap/perf split. else, very undesirable + performance regression. + +* go back and re-apply ctype/isprint portability things to new spots + +* more dead-code mains ... lrec-eval; what else? + +* dsls/ build outside of pwd? or just lemon $(absdir)/filenamegoeshere.y? +* configure w/o autotools? likewise manpage. etc. multiple build levels. + +b mlr faq page + +* --mmap @ mlr -h +* ctype ff @ bld.out +* platform os/ccomp list to mlrdoc + +* -h vs. usage : stdout vs. stderr +* pprint join? * header-length data mismatch et. al: file/line * make an updated dependency list, esp. in light of a2x et al. * probably its own mlrdoc page ... at least, highlighted in build page -* trie-parse to-do: - o make a power-of-two ring buffer for pfr & trie - -* autoconf -* .deb -* homebrew - ---------------------------------------------------------------- little: +* RFC "there may be a header" -- ?!? use nidx-style integer-numbered columns?? --no-header? + * -Wall -Wextra -Wpedantic-?? Werror=unused-but-set-variable? * --mmap into online help ... diff --git a/doc/content-for-feature-comparison.html b/doc/content-for-feature-comparison.html index 7ab619426..8eecafa6e 100644 --- a/doc/content-for-feature-comparison.html +++ b/doc/content-for-feature-comparison.html @@ -2,7 +2,7 @@ POKI_PUT_TOC_HERE

File-format awareness

-Miller respects CSV headers. If you do mlr --csv-input cat *.csv then the header line is written once: +Miller respects CSV headers. If you do mlr --csv cat *.csv then the header line is written once:
diff --git a/doc/content-for-file-formats.html b/doc/content-for-file-formats.html index c039c8040..dacb910e5 100644 --- a/doc/content-for-file-formats.html +++ b/doc/content-for-file-formats.html @@ -9,14 +9,12 @@ changes of field names within a single data stream.

Miller has record separator RS and field separator FS, just as awk does. For TSV, use --fs tab; to convert TSV to -CSV, use --ifs tab --ofs , etc. (See also +CSV, use --ifs tab --ofs comma, etc. (See also POKI_PUT_LINK_FOR_PAGE(reference.html)HERE.) -

The --csvlite option supports programmable single-byte field and record separators, -e.g. you can do TSV. Meanwhile --csv supports RFC-4180 CSV ( -https://tools.ietf.org/html/rfc4180). -For more information about the current status of CSV support in Miller, please see -https://github.com/johnkerl/miller/releases/tag/v2.0.0. +

Miller’s --csv flag supports RFC-4180 CSV ( +https://tools.ietf.org/html/rfc4180). This includes CRLF line-terminators by default, regardless +of platform. You can use mlr --csv --rs lf for native Un*x (LF-terminated) CSV files.

Pretty-printed

Miller’s pretty-print format is like CSV, but column-aligned. For example, compare diff --git a/doc/content-for-record-heterogeneity.html b/doc/content-for-record-heterogeneity.html index 7f7534d05..d66321b7a 100644 --- a/doc/content-for-record-heterogeneity.html +++ b/doc/content-for-record-heterogeneity.html @@ -8,7 +8,7 @@ We think of CSV tables as rectangular: if there are 17 columns in the header the

CSV and pretty-print

-Miller simply prints a newline and a new header when there is a schema change. When there is no schema change, you get standard CSV as a special case. Likewise, Miller reads heterogeneous CSV or pretty-print input the same way. For example: +Miller simply prints a newline and a new header when there is a schema change. When there is no schema change, you get CSV per se as a special case. Likewise, Miller reads heterogeneous CSV or pretty-print input the same way. The difference between CSV and CSV-lite is that the former is RFC4180-compliant, while the latter readily handles heterogeneous data (which is non-compliant). For example:
POKI_RUN_COMMAND{{cat data/het.dkvp}}HERE diff --git a/doc/content-for-to-do.html b/doc/content-for-to-do.html index a64bce1fd..8fe1dd76d 100644 --- a/doc/content-for-to-do.html +++ b/doc/content-for-to-do.html @@ -6,11 +6,11 @@ announcment, by far the biggest asks were RFC-4180-compliant CSV, and packaging (Homebrew, .deb). -
  • Miller’s record, field, and pair separators can only be single +
  • Miller’s record, field, and pair separators can be single characters (e.g. newline, comma, equals sign), optionally allowing repeats on -input (e.g. multiple spaces treated as one). It would be nice if strings were -supported, e.g. "\n\n" paragraph-oriented record separation, or mix of -space and tab for field separation. +input (e.g. multiple spaces treated as one). Multi-character separator strings +(e.g. double-linefeed) are supported on input and output for CSV, and on output +for other formats. This is a work in progress.
  • String-oriented functions such as sub, and Miller’s filter, could be made far more powerful if a regular-expression diff --git a/doc/feature-comparison.html b/doc/feature-comparison.html index 0d4e412d2..2ac8223da 100644 --- a/doc/feature-comparison.html +++ b/doc/feature-comparison.html @@ -103,7 +103,7 @@ Miller commands were run with pretty-print-tabular output format.

    File-format awareness

    -Miller respects CSV headers. If you do mlr --csv-input cat *.csv then the header line is written once: +Miller respects CSV headers. If you do mlr --csv cat *.csv then the header line is written once:
    diff --git a/doc/file-formats.html b/doc/file-formats.html index 8aa725a16..33cc9c8af 100644 --- a/doc/file-formats.html +++ b/doc/file-formats.html @@ -112,14 +112,12 @@ changes of field names within a single data stream.

    Miller has record separator RS and field separator FS, just as awk does. For TSV, use --fs tab; to convert TSV to -CSV, use --ifs tab --ofs , etc. (See also +CSV, use --ifs tab --ofs comma, etc. (See also Reference.) -

    The --csvlite option supports programmable single-byte field and record separators, -e.g. you can do TSV. Meanwhile --csv supports RFC-4180 CSV ( -https://tools.ietf.org/html/rfc4180). -For more information about the current status of CSV support in Miller, please see -https://github.com/johnkerl/miller/releases/tag/v2.0.0. +

    Miller’s --csv flag supports RFC-4180 CSV ( +https://tools.ietf.org/html/rfc4180). This includes CRLF line-terminators by default, regardless +of platform. You can use mlr --csv --rs lf for native Un*x (LF-terminated) CSV files.

    Pretty-printed

    Miller’s pretty-print format is like CSV, but column-aligned. For example, compare diff --git a/doc/record-heterogeneity.html b/doc/record-heterogeneity.html index 60f121945..3910b2bf7 100644 --- a/doc/record-heterogeneity.html +++ b/doc/record-heterogeneity.html @@ -110,7 +110,7 @@ We think of CSV tables as rectangular: if there are 17 columns in the header the

    CSV and pretty-print

    -Miller simply prints a newline and a new header when there is a schema change. When there is no schema change, you get standard CSV as a special case. Likewise, Miller reads heterogeneous CSV or pretty-print input the same way. For example: +Miller simply prints a newline and a new header when there is a schema change. When there is no schema change, you get CSV per se as a special case. Likewise, Miller reads heterogeneous CSV or pretty-print input the same way. The difference between CSV and CSV-lite is that the former is RFC4180-compliant, while the latter readily handles heterogeneous data (which is non-compliant). For example:

    diff --git a/doc/reference.html b/doc/reference.html index f9e4910b7..24993b55b 100644 --- a/doc/reference.html +++ b/doc/reference.html @@ -249,19 +249,22 @@ Data-format options, for input, output, or both: --xtab --ixtab --oxtab Pretty-printed vertical-tabular -p is a keystroke-saver for --nidx --fs space --repifs Separator options, for input, output, or both: - --rs --irs --ors Record separators, defaulting to newline - --fs --ifs --ofs --repifs Field separators, defaulting to "," - --ps --ips --ops Pair separators, defaulting to "=" - Notes (as of Miller v2.0.0): - * RS/FS/PS are used for DKVP, NIDX, and CSVLITE formats where they must be single-character. - * For CSV, PPRINT, and XTAB formats, RS/FS/PS command-line options are ignored. + --rs --irs --ors Record separators, e.g. newline + --fs --ifs --ofs --repifs Field separators, e.g. comma + --ps --ips --ops Pair separators, e.g. equals sign + Notes (as of Miller v2.1.4): + * IRS,IFS,IPS,ORS,OFS,OPS are specifiable for all file formats. + * IRS,IFS,IPS may be multi-character for CSV; they must be single-character for other formats. + The latter restriction will be lifted in a near-future release. + * ORS,OFS,OPS may be multi-character for all formats. * DKVP, NIDX, CSVLITE, PPRINT, and XTAB formats are intended to handle platform-native text data. - In particular, this means LF line-terminators on Linux/OSX. + In particular, this means LF line-terminators by default on Linux/OSX. * CSV is intended to handle RFC-4180-compliant data. - In particular, this means it *only* handles CRLF line-terminators. - * This will change in v2.1.0, at which point there will be a (default-off) LF-termination option - for CSV, multi-char RS/FS/PS, and double-quote support for DKVP. -Double-quoting for CSV: + In particular, this means it uses CRLF line-terminators by default. + So, you can use "--csv --rs lf" for Linux-native CSV files. + * You can use "--fs '|'", "--ips :", etc., or any of the following names for separators: + cr crcr lf lflf crlf crlfcrlf tab space comma newline pipe slash colon semicolon equals +Double-quoting for CSV output: --quote-all Wrap all fields in double quotes --quote-none Do not wrap any fields in double quotes, even if they have OFS or ORS in them --quote-minimal Wrap fields in double quotes only if they have OFS or ORS in them @@ -275,7 +278,7 @@ Other options: Output of one verb may be chained as input to another using "then", e.g. mlr stats1 -a min,mean,max -f flag,u,v -g color then sort -f color Please see http://johnkerl.org/miller/doc and/or http://github.com/johnkerl/miller for more information. -This is Miller version >= v2.1.1. +This is Miller version >= v2.1.4.

    diff --git a/doc/to-do.html b/doc/to-do.html index 3b874d3fd..40324e74f 100644 --- a/doc/to-do.html +++ b/doc/to-do.html @@ -101,11 +101,11 @@ Miller commands were run with pretty-print-tabular output format. announcment, by far the biggest asks were RFC-4180-compliant CSV, and packaging (Homebrew, .deb). -

  • Miller’s record, field, and pair separators can only be single +
  • Miller’s record, field, and pair separators can be single characters (e.g. newline, comma, equals sign), optionally allowing repeats on -input (e.g. multiple spaces treated as one). It would be nice if strings were -supported, e.g. "\n\n" paragraph-oriented record separation, or mix of -space and tab for field separation. +input (e.g. multiple spaces treated as one). Multi-character separator strings +(e.g. double-linefeed) are supported on input and output for CSV, and on output +for other formats. This is a work in progress.
  • String-oriented functions such as sub, and Miller’s filter, could be made far more powerful if a regular-expression