This commit is contained in:
Thomas Klausner 2015-09-14 08:59:10 +02:00
commit 9f0fb9995c
66 changed files with 1307 additions and 897 deletions

4
.gitignore vendored
View file

@ -10,7 +10,7 @@ test-byte-readers
test-peek-file-reader
test-parse-trie
test-lrec
test-maps-and-sets
test-multiple-containers
test-join-bucket-keeper
termcvt
a.out
@ -35,6 +35,8 @@ c/dsls/filter_dsl_parse.h
c/dsls/filter_dsl_parse.out
c/dsls/pdm
c/dsls/fdm
c/test/output
c/output/out
tags
*.la
*.lo

View file

@ -237,7 +237,7 @@ int main(int argc, char** argv) {
if (plist == NULL) {
printf("list is null\n");
} else {
char* out = slls_join(plist, ',');
char* out = slls_join(plist, ",");
printf("list is %s\n", out);
free(out);
}

View file

@ -44,10 +44,37 @@ static mapper_setup_t* mapper_lookup_table[] = {
static int mapper_lookup_table_length = sizeof(mapper_lookup_table) / sizeof(mapper_lookup_table[0]);
// ----------------------------------------------------------------
#define DEFAULT_RS '\n'
#define DEFAULT_FS ','
#define DEFAULT_PS '='
static lhmss_t* pdesc_to_chars_map = NULL;
static lhmss_t* get_desc_to_chars_map() {
if (pdesc_to_chars_map == NULL) {
pdesc_to_chars_map = lhmss_alloc();
lhmss_put(pdesc_to_chars_map, "cr", "\r");
lhmss_put(pdesc_to_chars_map, "crcr", "\r\r");
lhmss_put(pdesc_to_chars_map, "lf", "\n");
lhmss_put(pdesc_to_chars_map, "lflf", "\n\n");
lhmss_put(pdesc_to_chars_map, "crlf", "\r\n");
lhmss_put(pdesc_to_chars_map, "crlfcrlf", "\r\n\r\n");
lhmss_put(pdesc_to_chars_map, "tab", "\t");
lhmss_put(pdesc_to_chars_map, "space", " ");
lhmss_put(pdesc_to_chars_map, "comma", ",");
lhmss_put(pdesc_to_chars_map, "newline", "\n");
lhmss_put(pdesc_to_chars_map, "pipe", "|");
lhmss_put(pdesc_to_chars_map, "slash", "/");
lhmss_put(pdesc_to_chars_map, "colon", ":");
lhmss_put(pdesc_to_chars_map, "semicolon", ";");
lhmss_put(pdesc_to_chars_map, "equals", "=");
}
return pdesc_to_chars_map;
}
static char* sep_from_arg(char* arg, char* argv0) {
char* chars = lhmss_get(get_desc_to_chars_map(), arg);
if (chars != NULL)
return chars;
else
return arg;
}
// ----------------------------------------------------------------
#define DEFAULT_OFMT "%lf"
#define DEFAULT_OQUOTING QUOTE_MINIMAL
@ -89,19 +116,27 @@ static void main_usage(char* argv0, int exit_code) {
fprintf(o, " --xtab --ixtab --oxtab Pretty-printed vertical-tabular\n");
fprintf(o, " -p is a keystroke-saver for --nidx --fs space --repifs\n");
fprintf(o, "Separator options, for input, output, or both:\n");
fprintf(o, " --rs --irs --ors Record separators, defaulting to newline\n");
fprintf(o, " --fs --ifs --ofs --repifs Field separators, defaulting to \"%c\"\n", DEFAULT_FS);
fprintf(o, " --ps --ips --ops Pair separators, defaulting to \"%c\"\n", DEFAULT_PS);
fprintf(o, " Notes (as of Miller v2.0.0):\n");
fprintf(o, " * RS/FS/PS are used for DKVP, NIDX, and CSVLITE formats where they must be single-character.\n");
fprintf(o, " * For CSV, PPRINT, and XTAB formats, RS/FS/PS command-line options are ignored.\n");
fprintf(o, " --rs --irs --ors Record separators, e.g. newline\n");
fprintf(o, " --fs --ifs --ofs --repifs Field separators, e.g. comma\n");
fprintf(o, " --ps --ips --ops Pair separators, e.g. equals sign\n");
fprintf(o, " Notes (as of Miller v2.1.4):\n");
fprintf(o, " * IRS,IFS,IPS,ORS,OFS,OPS are specifiable for all file formats.\n");
fprintf(o, " * IRS,IFS,IPS may be multi-character for CSV; they must be single-character for other formats.\n");
fprintf(o, " The latter restriction will be lifted in a near-future release.\n");
fprintf(o, " * ORS,OFS,OPS may be multi-character for all formats.\n");
fprintf(o, " * DKVP, NIDX, CSVLITE, PPRINT, and XTAB formats are intended to handle platform-native text data.\n");
fprintf(o, " In particular, this means LF line-terminators on Linux/OSX.\n");
fprintf(o, " In particular, this means LF line-terminators by default on Linux/OSX.\n");
fprintf(o, " * CSV is intended to handle RFC-4180-compliant data.\n");
fprintf(o, " In particular, this means it *only* handles CRLF line-terminators.\n");
fprintf(o, " * This will change in v2.1.0, at which point there will be a (default-off) LF-termination option\n");
fprintf(o, " for CSV, multi-char RS/FS/PS, and double-quote support for DKVP.\n");
fprintf(o, "Double-quoting for CSV:\n");
fprintf(o, " In particular, this means it uses CRLF line-terminators by default.\n");
fprintf(o, " So, you can use \"--csv --rs lf\" for Linux-native CSV files.\n");
fprintf(o, " * You can use \"--fs '|'\", \"--ips :\", etc., or any of the following names for separators:\n");
fprintf(o, " ");
lhmss_t* pmap = get_desc_to_chars_map();
for (lhmsse_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
fprintf(o, " %s", pe->key);
}
fprintf(o, "\n");
fprintf(o, "Double-quoting for CSV output:\n");
fprintf(o, " --quote-all Wrap all fields in double quotes\n");
fprintf(o, " --quote-none Do not wrap any fields in double quotes, even if they have OFS or ORS in them\n");
fprintf(o, " --quote-minimal Wrap fields in double quotes only if they have OFS or ORS in them\n");
@ -144,30 +179,6 @@ static void check_arg_count(char** argv, int argi, int argc, int n) {
}
}
static char sep_from_arg(char* arg, char* argv0) {
if (streq(arg, "tab"))
return '\t';
if (streq(arg, "space"))
return ' ';
if (streq(arg, "comma"))
return ',';
if (streq(arg, "newline"))
return '\n';
if (streq(arg, "pipe"))
return '|';
if (streq(arg, "slash"))
return '/';
if (streq(arg, "colon"))
return ':';
if (streq(arg, "semicolon"))
return '|';
if (streq(arg, "equals"))
return '=';
if (strlen(arg) != 1)
main_usage(argv0, 1);
return arg[0];
}
static mapper_setup_t* look_up_mapper_setup(char* verb) {
mapper_setup_t* pmapper_setup = NULL;
for (int i = 0; i < mapper_lookup_table_length; i++) {
@ -183,15 +194,40 @@ cli_opts_t* parse_command_line(int argc, char** argv) {
cli_opts_t* popts = mlr_malloc_or_die(sizeof(cli_opts_t));
memset(popts, 0, sizeof(*popts));
popts->irs = DEFAULT_RS;
popts->ifs = DEFAULT_FS;
popts->ips = DEFAULT_PS;
// xxx integrate these with DEFAULT_XS ...
lhmss_t* default_rses = lhmss_alloc();
lhmss_put(default_rses, "dkvp", "\n");
lhmss_put(default_rses, "csv", "\r\n");
lhmss_put(default_rses, "csvlite", "\n");
lhmss_put(default_rses, "nidx", "\n");
lhmss_put(default_rses, "xtab", "\n");
lhmss_put(default_rses, "pprint", "\n");
lhmss_t* default_fses = lhmss_alloc();
lhmss_put(default_fses, "dkvp", ",");
lhmss_put(default_fses, "csv", ",");
lhmss_put(default_fses, "csvlite", ",");
lhmss_put(default_fses, "nidx", ","); // xxx update to space at version bump
lhmss_put(default_fses, "xtab", " ");
lhmss_put(default_fses, "pprint", " ");
lhmss_t* default_pses = lhmss_alloc();
lhmss_put(default_pses, "dkvp", "=");
lhmss_put(default_pses, "csv", "X");
lhmss_put(default_pses, "csvlite", "X");
lhmss_put(default_pses, "nidx", "X");
lhmss_put(default_pses, "xtab", "X");
lhmss_put(default_pses, "pprint", "X");
popts->irs = NULL;
popts->ifs = NULL;
popts->ips = NULL;
popts->allow_repeat_ifs = FALSE;
popts->allow_repeat_ips = FALSE;
popts->ors = DEFAULT_RS;
popts->ofs = DEFAULT_FS;
popts->ops = DEFAULT_PS;
popts->ors = NULL;
popts->ofs = NULL;
popts->ops = NULL;
popts->ofmt = DEFAULT_OFMT;
popts->oquoting = DEFAULT_OQUOTING;
@ -199,8 +235,8 @@ cli_opts_t* parse_command_line(int argc, char** argv) {
popts->plrec_writer = NULL;
popts->filenames = NULL;
popts->ifmt = "dkvp";
char* ofmt = "dkvp";
popts->ifile_fmt = "dkvp";
popts->ofile_fmt = "dkvp";
popts->use_mmap_for_read = TRUE;
int left_align_pprint = TRUE;
@ -232,7 +268,8 @@ cli_opts_t* parse_command_line(int argc, char** argv) {
else if (streq(argv[argi], "--rs")) {
check_arg_count(argv, argi, argc, 2);
popts->ors = popts->irs = sep_from_arg(argv[argi+1], argv[0]);
popts->ors = sep_from_arg(argv[argi+1], argv[0]);
popts->irs = sep_from_arg(argv[argi+1], argv[0]);
argi++;
}
else if (streq(argv[argi], "--irs")) {
@ -248,7 +285,8 @@ cli_opts_t* parse_command_line(int argc, char** argv) {
else if (streq(argv[argi], "--fs")) {
check_arg_count(argv, argi, argc, 2);
popts->ofs = popts->ifs = sep_from_arg(argv[argi+1], argv[0]);
popts->ofs = sep_from_arg(argv[argi+1], argv[0]);
popts->ifs = sep_from_arg(argv[argi+1], argv[0]);
argi++;
}
else if (streq(argv[argi], "--ifs")) {
@ -266,16 +304,17 @@ cli_opts_t* parse_command_line(int argc, char** argv) {
}
else if (streq(argv[argi], "-p")) {
popts->ifmt = "nidx";
ofmt = "nidx";
popts->ifs = ' ';
popts->ofs = ' ';
popts->ifile_fmt = "nidx";
popts->ofile_fmt = "nidx";
popts->ifs = " ";
popts->ofs = " ";
popts->allow_repeat_ifs = TRUE;
}
else if (streq(argv[argi], "--ps")) {
check_arg_count(argv, argi, argc, 2);
popts->ops = popts->ips = sep_from_arg(argv[argi+1], argv[0]);
popts->ops = sep_from_arg(argv[argi+1], argv[0]);
popts->ips = sep_from_arg(argv[argi+1], argv[0]);
argi++;
}
else if (streq(argv[argi], "--ips")) {
@ -289,40 +328,40 @@ cli_opts_t* parse_command_line(int argc, char** argv) {
argi++;
}
else if (streq(argv[argi], "--csv")) { popts->ifmt = ofmt = "csv"; }
else if (streq(argv[argi], "--icsv")) { popts->ifmt = "csv"; }
else if (streq(argv[argi], "--ocsv")) { ofmt = "csv"; }
else if (streq(argv[argi], "--csv")) { popts->ifile_fmt = popts->ofile_fmt = "csv"; }
else if (streq(argv[argi], "--icsv")) { popts->ifile_fmt = "csv"; }
else if (streq(argv[argi], "--ocsv")) { popts->ofile_fmt = "csv"; }
else if (streq(argv[argi], "--csvlite")) { popts->ifmt = ofmt = "csvlite"; }
else if (streq(argv[argi], "--icsvlite")) { popts->ifmt = "csvlite"; }
else if (streq(argv[argi], "--ocsvlite")) { ofmt = "csvlite"; }
else if (streq(argv[argi], "--csvlite")) { popts->ifile_fmt = popts->ofile_fmt = "csvlite"; }
else if (streq(argv[argi], "--icsvlite")) { popts->ifile_fmt = "csvlite"; }
else if (streq(argv[argi], "--ocsvlite")) { popts->ofile_fmt = "csvlite"; }
else if (streq(argv[argi], "--dkvp")) { popts->ifmt = ofmt = "dkvp"; }
else if (streq(argv[argi], "--idkvp")) { popts->ifmt = "dkvp"; }
else if (streq(argv[argi], "--odkvp")) { ofmt = "dkvp"; }
else if (streq(argv[argi], "--dkvp")) { popts->ifile_fmt = popts->ofile_fmt = "dkvp"; }
else if (streq(argv[argi], "--idkvp")) { popts->ifile_fmt = "dkvp"; }
else if (streq(argv[argi], "--odkvp")) { popts->ofile_fmt = "dkvp"; }
else if (streq(argv[argi], "--nidx")) { popts->ifmt = ofmt = "nidx"; }
else if (streq(argv[argi], "--inidx")) { popts->ifmt = "nidx"; }
else if (streq(argv[argi], "--onidx")) { ofmt = "nidx"; }
else if (streq(argv[argi], "--nidx")) { popts->ifile_fmt = popts->ofile_fmt = "nidx"; }
else if (streq(argv[argi], "--inidx")) { popts->ifile_fmt = "nidx"; }
else if (streq(argv[argi], "--onidx")) { popts->ofile_fmt = "nidx"; }
else if (streq(argv[argi], "--xtab")) { popts->ifmt = ofmt = "xtab"; }
else if (streq(argv[argi], "--ixtab")) { popts->ifmt = "xtab"; }
else if (streq(argv[argi], "--oxtab")) { ofmt = "xtab"; }
else if (streq(argv[argi], "--xtab")) { popts->ifile_fmt = popts->ofile_fmt = "xtab"; }
else if (streq(argv[argi], "--ixtab")) { popts->ifile_fmt = "xtab"; }
else if (streq(argv[argi], "--oxtab")) { popts->ofile_fmt = "xtab"; }
else if (streq(argv[argi], "--ipprint")) {
popts->ifmt = "csvlite";
popts->ifs = ' ';
popts->ifile_fmt = "csvlite";
popts->ifs = " ";
popts->allow_repeat_ifs = TRUE;
}
else if (streq(argv[argi], "--opprint")) {
ofmt = "pprint";
popts->ofile_fmt = "pprint";
}
else if (streq(argv[argi], "--pprint")) {
popts->ifmt = "csvlite";
popts->ifs = ' ';
popts->ifile_fmt = "csvlite";
popts->ifs = " ";
popts->allow_repeat_ifs = TRUE;
ofmt = "pprint";
popts->ofile_fmt = "pprint";
}
else if (streq(argv[argi], "--right")) {
left_align_pprint = FALSE;
@ -330,7 +369,7 @@ cli_opts_t* parse_command_line(int argc, char** argv) {
else if (streq(argv[argi], "--ofmt")) {
check_arg_count(argv, argi, argc, 2);
popts->ofmt = argv[argi+1];
popts->ofile_fmt = argv[argi+1];
argi++;
}
@ -362,12 +401,52 @@ cli_opts_t* parse_command_line(int argc, char** argv) {
nusage(argv[0], argv[argi]);
}
if (streq(ofmt, "dkvp")) popts->plrec_writer = lrec_writer_dkvp_alloc(popts->ors, popts->ofs, popts->ops);
else if (streq(ofmt, "csv")) popts->plrec_writer = lrec_writer_csv_alloc(popts->ors, popts->ofs, popts->oquoting);
else if (streq(ofmt, "csvlite")) popts->plrec_writer = lrec_writer_csvlite_alloc(popts->ors, popts->ofs);
else if (streq(ofmt, "nidx")) popts->plrec_writer = lrec_writer_nidx_alloc(popts->ors, popts->ofs);
else if (streq(ofmt, "xtab")) popts->plrec_writer = lrec_writer_xtab_alloc();
else if (streq(ofmt, "pprint")) popts->plrec_writer = lrec_writer_pprint_alloc(left_align_pprint);
if (popts->irs == NULL)
popts->irs = lhmss_get(default_rses, popts->ifile_fmt);
if (popts->ifs == NULL)
popts->ifs = lhmss_get(default_fses, popts->ifile_fmt);
if (popts->ips == NULL)
popts->ips = lhmss_get(default_pses, popts->ifile_fmt);
if (popts->ors == NULL)
popts->ors = lhmss_get(default_rses, popts->ofile_fmt);
if (popts->ofs == NULL)
popts->ofs = lhmss_get(default_fses, popts->ofile_fmt);
if (popts->ops == NULL)
popts->ops = lhmss_get(default_pses, popts->ofile_fmt);
if (popts->irs == NULL) {
fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__);
exit(1);
}
if (popts->ifs == NULL) {
fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__);
exit(1);
}
if (popts->ips == NULL) {
fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__);
exit(1);
}
if (popts->ors == NULL) {
fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__);
exit(1);
}
if (popts->ofs == NULL) {
fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__);
exit(1);
}
if (popts->ops == NULL) {
fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__);
exit(1);
}
if (streq(popts->ofile_fmt, "dkvp")) popts->plrec_writer = lrec_writer_dkvp_alloc(popts->ors, popts->ofs, popts->ops);
else if (streq(popts->ofile_fmt, "csv")) popts->plrec_writer = lrec_writer_csv_alloc(popts->ors, popts->ofs, popts->oquoting);
else if (streq(popts->ofile_fmt, "csvlite")) popts->plrec_writer = lrec_writer_csvlite_alloc(popts->ors, popts->ofs);
else if (streq(popts->ofile_fmt, "nidx")) popts->plrec_writer = lrec_writer_nidx_alloc(popts->ors, popts->ofs);
else if (streq(popts->ofile_fmt, "xtab")) popts->plrec_writer = lrec_writer_xtab_alloc(popts->ors, popts->ofs);
else if (streq(popts->ofile_fmt, "pprint")) popts->plrec_writer = lrec_writer_pprint_alloc(popts->ors, popts->ofs, left_align_pprint);
else {
main_usage(argv[0], 1);
}
@ -414,7 +493,7 @@ cli_opts_t* parse_command_line(int argc, char** argv) {
if (argi == argc)
popts->use_mmap_for_read = FALSE;
popts->plrec_reader = lrec_reader_alloc(popts->ifmt, popts->use_mmap_for_read,
popts->plrec_reader = lrec_reader_alloc(popts->ifile_fmt, popts->use_mmap_for_read,
popts->irs, popts->ifs, popts->allow_repeat_ifs, popts->ips, popts->allow_repeat_ips);
if (popts->plrec_reader == NULL)
main_usage(argv[0], 1);

View file

@ -17,17 +17,18 @@
#define QUOTE_NUMERIC 0xb4
typedef struct _cli_opts_t {
char irs;
char ifs;
char ips;
char* irs;
char* ifs;
char* ips;
int allow_repeat_ifs;
int allow_repeat_ips;
int use_mmap_for_read;
char* ifmt;
char* ifile_fmt;
char* ofile_fmt;
char ors;
char ofs;
char ops;
char* ors;
char* ofs;
char* ops;
char* ofmt;
int oquoting;

View file

@ -120,20 +120,20 @@ void dheap_print(dheap_t *pdheap)
// 4 5 6 7
// 8 9 10 11 12 13 14 15
static void dheap_check_aux(dheap_t *pdheap, int i, char *file, int line)
static int dheap_check_aux(dheap_t *pdheap, int i, char *file, int line)
{
int n = pdheap->n;
double *pe = pdheap->elements;
if (i >= n)
return;
return TRUE;
int li = dheap_left_child_index (i, pdheap->n);
int ri = dheap_right_child_index(i, pdheap->n);
if (li != -1) {
if (pe[i] < pe[li]) {
fprintf(stderr, "dheap check fail %s:%d pe[%d]=%lf < pe[%d]=%lf\n",
file, line, i, pe[i], li, pe[li]);
exit(1);
return FALSE;
}
dheap_check_aux(pdheap, li, file, line);
}
@ -141,15 +141,16 @@ static void dheap_check_aux(dheap_t *pdheap, int i, char *file, int line)
if (pe[i] < pe[ri]) {
fprintf(stderr, "dheap check fail %s:%d pe[%d]=%lf < pe[%d]=%lf\n",
file, line, i, pe[i], ri, pe[ri]);
exit(1);
return FALSE;
}
dheap_check_aux(pdheap, ri, file, line);
}
return TRUE;
}
void dheap_check(dheap_t *pdheap, char *file, int line)
int dheap_check(dheap_t *pdheap, char *file, int line)
{
dheap_check_aux(pdheap, 1, file, line);
return dheap_check_aux(pdheap, 1, file, line);
}
// ----------------------------------------------------------------

View file

@ -19,10 +19,12 @@ dheap_t *dheap_alloc();
dheap_t *dheap_from_array(double *array, int n);
void dheap_free(dheap_t *pheap);
void dheap_print(dheap_t *pdheap);
void dheap_check(dheap_t *pdheap, char *file, int line);
void dheap_add(dheap_t *pdheap, double v);
double dheap_remove(dheap_t *pdheap);
// For debug
void dheap_print(dheap_t *pdheap);
// For unit test
int dheap_check(dheap_t *pdheap, char *file, int line);
#endif // DHEAP_H

View file

@ -231,7 +231,7 @@ static char* get_state_name(int state) {
}
}
void hss_dump(hss_t* pset) {
void hss_print(hss_t* pset) {
for (int index = 0; index < pset->array_length; index++) {
hsse_t* pe = &pset->array[index];

View file

@ -47,10 +47,10 @@ join_bucket_keeper_t* join_bucket_keeper_alloc(
char* left_file_name,
char* input_file_format,
int use_mmap_for_read,
char irs,
char ifs,
char* irs,
char* ifs,
int allow_repeat_ifs,
char ips,
char* ips,
int allow_repeat_ips,
slls_t* pleft_field_names
) {

View file

@ -35,10 +35,10 @@ join_bucket_keeper_t* join_bucket_keeper_alloc(
char* left_file_name,
char* input_file_format,
int use_mmap_for_read,
char irs,
char ifs,
char* irs,
char* ifs,
int allow_repeat_ifs,
char ips,
char* ips,
int allow_repeat_ips,
slls_t* pleft_field_names);

View file

@ -309,7 +309,7 @@ static char* get_state_name(int state) {
}
}
void lhms2v_dump(lhms2v_t* pmap) {
void lhms2v_print(lhms2v_t* pmap) {
for (int index = 0; index < pmap->array_length; index++) {
lhms2ve_t* pe = &pmap->entries[index];

View file

@ -325,7 +325,7 @@ static char* get_state_name(int state) {
}
}
void lhmsi_dump(lhmsi_t* pmap) {
void lhmsi_print(lhmsi_t* pmap) {
for (int index = 0; index < pmap->array_length; index++) {
lhmsie_t* pe = &pmap->entries[index];

View file

@ -246,6 +246,7 @@ void* lhmslv_remove(lhmslv_t* pmap, slls_t* key) {
void lhmslv_clear(lhmslv_t* pmap) {
for (int i = 0; i < pmap->array_length; i++) {
lhmslve_clear(&pmap->entries[i]);
pmap->states[i] = EMPTY;
}
pmap->num_occupied = 0;
pmap->num_freed = 0;
@ -271,7 +272,7 @@ static void lhmslv_enlarge(lhmslv_t* pmap) {
}
// ----------------------------------------------------------------
void lhmslv_check_counts(lhmslv_t* pmap) {
int lhmslv_check_counts(lhmslv_t* pmap) {
int nocc = 0;
int ndel = 0;
for (int index = 0; index < pmap->array_length; index++) {
@ -284,14 +285,15 @@ void lhmslv_check_counts(lhmslv_t* pmap) {
fprintf(stderr,
"occupancy-count mismatch: actual %d != cached %d\n",
nocc, pmap->num_occupied);
exit(1);
return FALSE;
}
if (ndel != pmap->num_freed) {
fprintf(stderr,
"freed-count mismatch: actual %d != cached %d\n",
ndel, pmap->num_freed);
exit(1);
return FALSE;
}
return TRUE;
}
// ----------------------------------------------------------------
@ -304,13 +306,13 @@ static char* get_state_name(int state) {
}
}
void lhmslv_dump(lhmslv_t* pmap) {
void lhmslv_print(lhmslv_t* pmap) {
for (int index = 0; index < pmap->array_length; index++) {
lhmslve_t* pe = &pmap->entries[index];
const char* key_string = (pe == NULL) ? "none" :
pe->key == NULL ? "null" :
slls_join(pe->key, ',');
slls_join(pe->key, ",");
const char* value_string = (pe == NULL) ? "none" :
pe->pvvalue == NULL ? "null" :
pe->pvvalue;
@ -325,7 +327,7 @@ void lhmslv_dump(lhmslv_t* pmap) {
for (lhmslve_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) {
const char* key_string = (pe == NULL) ? "none" :
pe->key == NULL ? "null" :
slls_join(pe->key, ',');
slls_join(pe->key, ",");
const char* value_string = (pe == NULL) ? "none" :
pe->pvvalue == NULL ? "null" :
pe->pvvalue;

View file

@ -49,4 +49,7 @@ void* lhmslv_remove(lhmslv_t* pmap, slls_t* key);
void lhmslv_clear(lhmslv_t* pmap);
int lhmslv_size(lhmslv_t* pmap);
// Unit-test hook
int lhmslv_check_counts(lhmslv_t* pmap);
#endif // LHMSLV_H

View file

@ -257,30 +257,6 @@ static void lhmss_enlarge(lhmss_t* pmap) {
free(old_states);
}
// ----------------------------------------------------------------
void lhmss_check_counts(lhmss_t* pmap) {
int nocc = 0;
int ndel = 0;
for (int index = 0; index < pmap->array_length; index++) {
if (pmap->states[index] == OCCUPIED)
nocc++;
else if (pmap->states[index] == DELETED)
ndel++;
}
if (nocc != pmap->num_occupied) {
fprintf(stderr,
"occupancy-count mismatch: actual %d != cached %d.\n",
nocc, pmap->num_occupied);
exit(1);
}
if (ndel != pmap->num_freed) {
fprintf(stderr,
"freed-count mismatch: actual %d != cached %d.\n",
ndel, pmap->num_freed);
exit(1);
}
}
// ----------------------------------------------------------------
static char* get_state_name(int state) {
switch(state) {
@ -291,7 +267,7 @@ static char* get_state_name(int state) {
}
}
void lhmss_dump(lhmss_t* pmap) {
void lhmss_print(lhmss_t* pmap) {
for (int index = 0; index < pmap->array_length; index++) {
lhmsse_t* pe = &pmap->entries[index];
@ -322,3 +298,28 @@ void lhmss_dump(lhmss_t* pmap) {
pe->ideal_index, key_string, value_string);
}
}
// ----------------------------------------------------------------
int lhmss_check_counts(lhmss_t* pmap) {
int nocc = 0;
int ndel = 0;
for (int index = 0; index < pmap->array_length; index++) {
if (pmap->states[index] == OCCUPIED)
nocc++;
else if (pmap->states[index] == DELETED)
ndel++;
}
if (nocc != pmap->num_occupied) {
fprintf(stderr,
"occupancy-count mismatch: actual %d != cached %d.\n",
nocc, pmap->num_occupied);
return FALSE;
}
if (ndel != pmap->num_freed) {
fprintf(stderr,
"deleted-count mismatch: actual %d != cached %d.\n",
ndel, pmap->num_freed);
return FALSE;
}
return TRUE;
}

View file

@ -49,4 +49,9 @@ int lhmss_has_key(lhmss_t* pmap, char* key);
void lhmss_remove(lhmss_t* pmap, char* key);
void lhmss_rename(lhmss_t* pmap, char* old_key, char* new_key);
void lhmss_print(lhmss_t* pmap);
// Unit-test hook
int lhmss_check_counts(lhmss_t* pmap);
#endif // LHMSS_H

View file

@ -245,7 +245,7 @@ static void lhmsv_enlarge(lhmsv_t* pmap) {
}
// ----------------------------------------------------------------
void lhmsv_check_counts(lhmsv_t* pmap) {
int lhmsv_check_counts(lhmsv_t* pmap) {
int nocc = 0;
int ndel = 0;
for (int index = 0; index < pmap->array_length; index++) {
@ -258,14 +258,15 @@ void lhmsv_check_counts(lhmsv_t* pmap) {
fprintf(stderr,
"occupancy-count mismatch: actual %d != cached %d.\n",
nocc, pmap->num_occupied);
exit(1);
return FALSE;
}
if (ndel != pmap->num_freed) {
fprintf(stderr,
"deleted-count mismatch: actual %d != cached %d.\n",
ndel, pmap->num_freed);
exit(1);
return FALSE;
}
return TRUE;
}
// ----------------------------------------------------------------
@ -278,7 +279,7 @@ static char* get_state_name(int state) {
}
}
void lhmsv_dump(lhmsv_t* pmap) {
void lhmsv_print(lhmsv_t* pmap) {
for (int index = 0; index < pmap->array_length; index++) {
lhmsve_t* pe = &pmap->entries[index];

View file

@ -47,4 +47,7 @@ void* lhmsv_get(lhmsv_t* pmap, char* key);
int lhmsv_has_key(lhmsv_t* pmap, char* key);
void lhmsv_remove(lhmsv_t* pmap, char* key);
// Unit-test hook
int lhmsv_check_counts(lhmsv_t* pmap);
#endif // LHMSV_H

View file

@ -428,17 +428,17 @@ lrec_t* lrec_literal_4(char* k1, char* v1, char* k2, char* v2, char* k3, char* v
void lrec_print(lrec_t* prec) {
FILE* output_stream = stdout;
char rs = '\n';
char fs = ',';
char ps = '=';
char ors = '\n';
char ofs = ',';
char ops = '=';
int nf = 0;
for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
if (nf > 0)
fputc(fs, output_stream);
fputc(ofs, output_stream);
fputs(pe->key, output_stream);
fputc(ps, output_stream);
fputc(ops, output_stream);
fputs(pe->value, output_stream);
nf++;
}
fputc(rs, output_stream);
fputc(ors, output_stream);
}

View file

@ -111,6 +111,7 @@ void lrec_move_to_tail(lrec_t* prec, char* key);
void lrec_free(lrec_t* prec);
void lrec_print(lrec_t* prec);
void lrec_dump(lrec_t* prec);
void lrec_dump_titled(char* msg, lrec_t* prec);
@ -123,6 +124,4 @@ lrec_t* lrec_literal_2(char* k1, char* v1, char* k2, char* v2);
lrec_t* lrec_literal_3(char* k1, char* v1, char* k2, char* v2, char* k3, char* v3);
lrec_t* lrec_literal_4(char* k1, char* v1, char* k2, char* v2, char* k3, char* v3, char* k4, char* v4);
void lrec_print(lrec_t* prec);
#endif // LREC_H

View file

@ -65,3 +65,11 @@ double percentile_keeper_emit(percentile_keeper_t* ppercentile_keeper, double pe
}
return ppercentile_keeper->data[compute_index(ppercentile_keeper->size, percentile)];
}
// ----------------------------------------------------------------
void percentile_keeper_print(percentile_keeper_t* ppercentile_keeper) {
printf("percentile_keeper dump:\n");
for (int i = 0; i < ppercentile_keeper->size; i++)
printf("[%02d] %.8lf\n", i, ppercentile_keeper->data[i]);
}

View file

@ -18,4 +18,7 @@ void percentile_keeper_free(percentile_keeper_t* ppercentile_keeper);
void percentile_keeper_ingest(percentile_keeper_t* ppercentile_keeper, double value);
double percentile_keeper_emit(percentile_keeper_t* ppercentile_keeper, double percentile);
// For debug/test
void percentile_keeper_print(percentile_keeper_t* ppercentile_keeper);
#endif // PERCENTILE_KEEPER_H

View file

@ -118,17 +118,16 @@ slls_t* slls_from_line(char* line, char ifs, int allow_repeat_ifs) {
// ----------------------------------------------------------------
// xxx cmt for debug. inefficient. or fix that.
// xxx rename to slls_alloc_join
char* slls_join(slls_t* plist, char fs) {
char* slls_join(slls_t* plist, char* ofs) {
int len = 0;
for (sllse_t* pe = plist->phead; pe != NULL; pe = pe->pnext)
len += strlen(pe->value) + 1; // include space for fs and null-terminator
len += strlen(pe->value) + 1; // include space for ofs and null-terminator
char* output = mlr_malloc_or_die(len);
char sep[2] = {fs, 0};
*output = 0;
for (sllse_t* pe = plist->phead; pe != NULL; pe = pe->pnext) {
strcat(output, pe->value);
if (pe->pnext != NULL) {
strcat(output, sep);
strcat(output, ofs);
}
}

View file

@ -38,7 +38,7 @@ int slls_compare_lexically(slls_t* pa, slls_t* pb);
void slls_sort(slls_t* plist);
// Debug routines:
char* slls_join(slls_t* plist, char fs);
char* slls_join(slls_t* plist, char* ofs);
void slls_print(slls_t* plist);
#endif // SLLS_H

View file

@ -427,6 +427,7 @@ static char * run_all_tests() {
}
int main(int argc, char **argv) {
printf("TEST_JOIN_BUCKET_KEEPER ENTER\n");
if ((argc == 2) && streq(argv[1], "-v"))
tjbk_verbose = TRUE;

View file

@ -1,8 +1,5 @@
#include <stdio.h>
#include <string.h>
#ifdef MLR_USE_MCHECK
#include <mcheck.h>
#endif // MLR_USE_MCHECK
#include "lib/minunit.h"
#include "lib/mlrutil.h"
#include "containers/lrec.h"
@ -262,14 +259,7 @@ static char * run_all_tests() {
}
int main(int argc, char **argv) {
#ifdef MLR_USE_MCHECK
if (mcheck(NULL) != 0) {
printf("Could not set up mcheck\n");
exit(1);
}
printf("Set up mcheck\n");
#endif // MLR_USE_MCHECK
printf("TEST_LREC ENTER\n");
char *result = run_all_tests();
printf("\n");
if (result != 0) {

View file

@ -1,478 +0,0 @@
#include <stdio.h>
#include <string.h>
#include "lib/minunit.h"
#include "lib/mlrutil.h"
#include "containers/slls.h"
#include "containers/sllv.h"
#include "containers/hss.h"
#include "containers/lhmsi.h"
#include "containers/lhms2v.h"
#ifdef __TEST_MAPS_AND_SETS_MAIN__
int tests_run = 0;
int tests_failed = 0;
int assertions_run = 0;
int assertions_failed = 0;
// ----------------------------------------------------------------
static char* test_slls() {
slls_t* plist = slls_from_line(strdup(""), ',', FALSE);
mu_assert_lf(plist->length == 0);
plist = slls_from_line(strdup("a"), ',', FALSE);
mu_assert_lf(plist->length == 1);
plist = slls_from_line(strdup("c,d,a,e,b"), ',', FALSE);
mu_assert_lf(plist->length == 5);
sllse_t* pe = plist->phead;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "c")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "d")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "a")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "e")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "b")); pe = pe->pnext;
mu_assert_lf(pe == NULL);
slls_sort(plist);
mu_assert_lf(plist->length == 5);
pe = plist->phead;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "a")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "b")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "c")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "d")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "e")); pe = pe->pnext;
mu_assert_lf(pe == NULL);
return NULL;
}
// ----------------------------------------------------------------
static char* test_sllv_append() {
mu_assert_lf(0 == 0);
sllv_t* pa = sllv_alloc();
sllv_add(pa, "a");
sllv_add(pa, "b");
sllv_add(pa, "c");
mu_assert_lf(pa->length == 3);
sllve_t* pe = pa->phead;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "a")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "b")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "c")); pe = pe->pnext;
mu_assert_lf(pe == NULL);
sllv_t* pb = sllv_alloc();
sllv_add(pb, "d");
sllv_add(pb, "e");
mu_assert_lf(pb->length == 2);
pe = pb->phead;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "d")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "e")); pe = pe->pnext;
mu_assert_lf(pe == NULL);
pa = sllv_append(pa, pb);
mu_assert_lf(pa->length == 5);
mu_assert_lf(pb->length == 2);
pe = pa->phead;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "a")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "b")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "c")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "d")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "e")); pe = pe->pnext;
mu_assert_lf(pe == NULL);
pe = pb->phead;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "d")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "e")); pe = pe->pnext;
mu_assert_lf(pe == NULL);
return NULL;
}
// ----------------------------------------------------------------
static char* test_hss() {
hss_t *pset = hss_alloc();
mu_assert_lf(pset->num_occupied == 0);
hss_add(pset, "x");
mu_assert_lf(pset->num_occupied == 1);
mu_assert_lf(!hss_has(pset, "w"));
mu_assert_lf(hss_has(pset, "x"));
mu_assert_lf(!hss_has(pset, "y"));
mu_assert_lf(!hss_has(pset, "z"));
mu_assert_lf(hss_check_counts(pset));
hss_add(pset, "y");
mu_assert_lf(pset->num_occupied == 2);
mu_assert_lf(!hss_has(pset, "w"));
mu_assert_lf(hss_has(pset, "x"));
mu_assert_lf(hss_has(pset, "y"));
mu_assert_lf(!hss_has(pset, "z"));
mu_assert_lf(hss_check_counts(pset));
hss_add(pset, "x");
mu_assert_lf(pset->num_occupied == 2);
mu_assert_lf(!hss_has(pset, "w"));
mu_assert_lf(hss_has(pset, "x"));
mu_assert_lf(hss_has(pset, "y"));
mu_assert_lf(!hss_has(pset, "z"));
mu_assert_lf(hss_check_counts(pset));
hss_add(pset, "z");
mu_assert_lf(pset->num_occupied == 3);
mu_assert_lf(!hss_has(pset, "w"));
mu_assert_lf(hss_has(pset, "x"));
mu_assert_lf(hss_has(pset, "y"));
mu_assert_lf(hss_has(pset, "z"));
mu_assert_lf(hss_check_counts(pset));
hss_remove(pset, "y");
mu_assert_lf(pset->num_occupied == 2);
mu_assert_lf(!hss_has(pset, "w"));
mu_assert_lf(hss_has(pset, "x"));
mu_assert_lf(!hss_has(pset, "y"));
mu_assert_lf(hss_has(pset, "z"));
mu_assert_lf(hss_check_counts(pset));
hss_clear(pset);
mu_assert_lf(!hss_has(pset, "w"));
mu_assert_lf(!hss_has(pset, "x"));
mu_assert_lf(!hss_has(pset, "y"));
mu_assert_lf(!hss_has(pset, "z"));
mu_assert_lf(hss_check_counts(pset));
hss_free(pset);
return NULL;
}
// ----------------------------------------------------------------
static char* test_lhmsi() {
mu_assert_lf(0 == 0);
lhmsi_t *pmap = lhmsi_alloc();
mu_assert_lf(pmap->num_occupied == 0);
mu_assert_lf(!lhmsi_has_key(pmap, "w"));
mu_assert_lf(!lhmsi_has_key(pmap, "x"));
mu_assert_lf(!lhmsi_has_key(pmap, "y"));
mu_assert_lf(!lhmsi_has_key(pmap, "z"));
mu_assert_lf(lhmsi_check_counts(pmap));
lhmsi_put(pmap, "x", 3);
mu_assert_lf(pmap->num_occupied == 1);
mu_assert_lf(!lhmsi_has_key(pmap, "w"));
mu_assert_lf(lhmsi_has_key(pmap, "x"));
mu_assert_lf(!lhmsi_has_key(pmap, "y"));
mu_assert_lf(!lhmsi_has_key(pmap, "z"));
mu_assert_lf(lhmsi_check_counts(pmap));
lhmsi_put(pmap, "y", 5);
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(!lhmsi_has_key(pmap, "w"));
mu_assert_lf(lhmsi_has_key(pmap, "x"));
mu_assert_lf(lhmsi_has_key(pmap, "y"));
mu_assert_lf(!lhmsi_has_key(pmap, "z"));
mu_assert_lf(lhmsi_check_counts(pmap));
lhmsi_put(pmap, "x", 4);
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(!lhmsi_has_key(pmap, "w"));
mu_assert_lf(lhmsi_has_key(pmap, "x"));
mu_assert_lf(lhmsi_has_key(pmap, "y"));
mu_assert_lf(!lhmsi_has_key(pmap, "z"));
mu_assert_lf(lhmsi_check_counts(pmap));
lhmsi_put(pmap, "z", 7);
mu_assert_lf(pmap->num_occupied == 3);
mu_assert_lf(!lhmsi_has_key(pmap, "w"));
mu_assert_lf(lhmsi_has_key(pmap, "x"));
mu_assert_lf(lhmsi_has_key(pmap, "y"));
mu_assert_lf(lhmsi_has_key(pmap, "z"));
mu_assert_lf(lhmsi_check_counts(pmap));
lhmsi_remove(pmap, "y");
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(!lhmsi_has_key(pmap, "w"));
mu_assert_lf(lhmsi_has_key(pmap, "x"));
mu_assert_lf(!lhmsi_has_key(pmap, "y"));
mu_assert_lf(lhmsi_has_key(pmap, "z"));
mu_assert_lf(lhmsi_check_counts(pmap));
lhmsi_clear(pmap);
mu_assert_lf(pmap->num_occupied == 0);
mu_assert_lf(!lhmsi_has_key(pmap, "w"));
mu_assert_lf(!lhmsi_has_key(pmap, "x"));
mu_assert_lf(!lhmsi_has_key(pmap, "y"));
mu_assert_lf(!lhmsi_has_key(pmap, "z"));
mu_assert_lf(lhmsi_check_counts(pmap));
lhmsi_free(pmap);
return NULL;
}
// lhmsi_remove(pmap, "y");
// printf("map size = %d\n", pmap->num_occupied);
// lhmsi_dump(pmap);
// printf("map has(\"w\") = %d\n", lhmsi_has_key(pmap, "w"));
// printf("map has(\"x\") = %d\n", lhmsi_has_key(pmap, "x"));
// printf("map has(\"y\") = %d\n", lhmsi_has_key(pmap, "y"));
// printf("map has(\"z\") = %d\n", lhmsi_has_key(pmap, "z"));
// lhmsi_check_counts(pmap);
// lhmsi_free(pmap);
// ----------------------------------------------------------------
static char* test_lhms2v() {
mu_assert_lf(0 == 0);
lhms2v_t *pmap = lhms2v_alloc();
mu_assert_lf(pmap->num_occupied == 0);
mu_assert_lf(lhms2v_check_counts(pmap));
lhms2v_put(pmap, "a", "x", "3");
mu_assert_lf(pmap->num_occupied == 1);
mu_assert_lf(lhms2v_check_counts(pmap));
lhms2v_put(pmap, "a", "y", "5");
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(lhms2v_check_counts(pmap));
lhms2v_put(pmap, "a", "x", "4");
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(lhms2v_check_counts(pmap));
lhms2v_put(pmap, "b", "z", "7");
mu_assert_lf(pmap->num_occupied == 3);
mu_assert_lf(lhms2v_check_counts(pmap));
lhms2v_remove(pmap, "a", "y");
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(lhms2v_check_counts(pmap));
lhms2v_clear(pmap);
mu_assert_lf(pmap->num_occupied == 0);
mu_assert_lf(lhms2v_check_counts(pmap));
lhms2v_free(pmap);
return NULL;
}
// ----------------------------------------------------------------
static char* test_lhmslv() {
mu_assert_lf(0 == 0);
return NULL;
}
// slls_t* ax = slls_alloc();
// slls_add_no_free(ax, "a");
// slls_add_no_free(ax, "x");
//
// slls_t* ay = slls_alloc();
// slls_add_no_free(ay, "a");
// slls_add_no_free(ay, "y");
//
// slls_t* bz = slls_alloc();
// slls_add_no_free(bz, "b");
// slls_add_no_free(bz, "z");
//
// lhmslv_t *pmap = lhmslv_alloc();
// lhmslv_put(pmap, ax, "3");
// lhmslv_put(pmap, ay, "5");
// lhmslv_put(pmap, ax, "4");
// lhmslv_put(pmap, bz, "7");
// lhmslv_remove(pmap, ay);
// printf("map size = %d\n", lhmslv_size(pmap));
// lhmslv_dump(pmap);
// lhmslv_check_counts(pmap);
// lhmslv_free(pmap);
// ----------------------------------------------------------------
static char* test_lhmss() {
mu_assert_lf(0 == 0);
return NULL;
}
// lhmss_t *pmap = lhmss_alloc();
// lhmss_put(pmap, "x", "3");
// lhmss_put(pmap, "y", "5");
// lhmss_put(pmap, "x", "4");
// lhmss_put(pmap, "z", "7");
// lhmss_remove(pmap, "y");
// printf("map size = %d\n", pmap->num_occupied);
// lhmss_dump(pmap);
// lhmss_check_counts(pmap);
// lhmss_free(pmap);
// ----------------------------------------------------------------
static char* test_lhmsv() {
mu_assert_lf(0 == 0);
return NULL;
}
// int x3 = 3;
// int x5 = 5;
// int x4 = 4;
// int x7 = 7;
// lhmsv_t *pmap = lhmsv_alloc();
// lhmsv_put(pmap, "x", &x3);
// lhmsv_put(pmap, "y", &x5);
// lhmsv_put(pmap, "x", &x4);
// lhmsv_put(pmap, "z", &x7);
// lhmsv_remove(pmap, "y");
// printf("map size = %d\n", pmap->num_occupied);
// lhmsv_dump(pmap);
// lhmsv_check_counts(pmap);
// lhmsv_free(pmap);
// ----------------------------------------------------------------
static char* test_percentile_keeper() {
mu_assert_lf(0 == 0);
return NULL;
}
//void percentile_keeper_dump(percentile_keeper_t* ppercentile_keeper) {
// for (int i = 0; i < ppercentile_keeper->size; i++)
// printf("[%02d] %.8lf\n", i, ppercentile_keeper->data[i]);
//}
// char buffer[1024];
// percentile_keeper_t* ppercentile_keeper = percentile_keeper_alloc();
// char* line;
// while ((line = fgets(buffer, sizeof(buffer), stdin)) != NULL) {
// int len = strlen(line);
// if (len >= 1) // xxx write and use a chomp()
// if (line[len-1] == '\n')
// line[len-1] = 0;
// double v;
// if (!mlr_try_double_from_string(line, &v)) {
// percentile_keeper_ingest(ppercentile_keeper, v);
// } else {
// printf("meh? >>%s<<\n", line);
// }
// }
// percentile_keeper_dump(ppercentile_keeper);
// printf("\n");
// double p;
// p = 0.10; printf("%.2lf: %.6lf\n", p, percentile_keeper_emit(ppercentile_keeper, p));
// p = 0.50; printf("%.2lf: %.6lf\n", p, percentile_keeper_emit(ppercentile_keeper, p));
// p = 0.90; printf("%.2lf: %.6lf\n", p, percentile_keeper_emit(ppercentile_keeper, p));
// printf("\n");
// percentile_keeper_dump(ppercentile_keeper);
// ----------------------------------------------------------------
static char* test_top_keeper() {
mu_assert_lf(0 == 0);
return NULL;
}
//void top_keeper_dump(top_keeper_t* ptop_keeper) {
// for (int i = 0; i < ptop_keeper->size; i++)
// printf("[%02d] %.8lf\n", i, ptop_keeper->top_values[i]);
// for (int i = ptop_keeper->size; i < ptop_keeper->capacity; i++)
// printf("[%02d] ---\n", i);
//}
// int capacity = 5;
// char buffer[1024];
// if (argc == 2)
// (void)sscanf(argv[1], "%d", &capacity);
// top_keeper_t* ptop_keeper = top_keeper_alloc(capacity);
// char* line;
// while ((line = fgets(buffer, sizeof(buffer), stdin)) != NULL) {
// int len = strlen(line);
// if (len >= 1) // xxx write and use a chomp()
// if (line[len-1] == '\n')
// line[len-1] = 0;
// if (streq(line, "")) {
// //top_keeper_dump(ptop_keeper);
// printf("\n");
// } else {
// double v;
// if (!mlr_try_double_from_string(line, &v)) {
// top_keeper_add(ptop_keeper, v, NULL);
// top_keeper_dump(ptop_keeper);
// printf("\n");
// } else {
// printf("meh? >>%s<<\n", line);
// }
// }
// }
// ----------------------------------------------------------------
static char* test_dheap() {
mu_assert_lf(0 == 0);
return NULL;
}
// dheap_t *pdheap = dheap_alloc();
// dheap_check(pdheap, __FILE__, __LINE__);
// dheap_add(pdheap, 4.1);
// dheap_add(pdheap, 3.1);
// dheap_add(pdheap, 2.1);
// dheap_add(pdheap, 6.1);
// dheap_add(pdheap, 5.1);
// dheap_add(pdheap, 8.1);
// dheap_add(pdheap, 7.1);
// dheap_print(pdheap);
// dheap_check(pdheap, __FILE__, __LINE__);
//
// printf("\n");
// printf("remove %lf\n", dheap_remove(pdheap));
// printf("remove %lf\n", dheap_remove(pdheap));
// printf("remove %lf\n", dheap_remove(pdheap));
// printf("remove %lf\n", dheap_remove(pdheap));
// printf("\n");
//
// dheap_print(pdheap);
// dheap_check(pdheap, __FILE__, __LINE__);
//
// dheap_free(pdheap);
// ================================================================
static char * run_all_tests() {
mu_run_test(test_slls);
mu_run_test(test_sllv_append);
mu_run_test(test_hss);
mu_run_test(test_lhmsi);
mu_run_test(test_lhms2v);
mu_run_test(test_lhmslv);
mu_run_test(test_lhmss);
mu_run_test(test_lhmsv);
mu_run_test(test_percentile_keeper);
mu_run_test(test_top_keeper);
mu_run_test(test_dheap);
return 0;
}
int main(int argc, char **argv) {
char *result = run_all_tests();
printf("\n");
if (result != 0) {
printf("Not all unit tests passed\n");
}
else {
printf("TEST_MAPS_AND_SETS: ALL UNIT TESTS PASSED\n");
}
printf("Tests passed: %d of %d\n", tests_run - tests_failed, tests_run);
printf("Assertions passed: %d of %d\n", assertions_run - assertions_failed, assertions_run);
return result != 0;
}
#endif // __TEST_MAPS_AND_SETS_MAIN__

View file

@ -0,0 +1,659 @@
#include <stdio.h>
#include <string.h>
#include "lib/minunit.h"
#include "lib/mlrutil.h"
#include "containers/slls.h"
#include "containers/sllv.h"
#include "containers/hss.h"
#include "containers/lhmsi.h"
#include "containers/lhmss.h"
#include "containers/lhmsv.h"
#include "containers/lhms2v.h"
#include "containers/lhmslv.h"
#include "containers/percentile_keeper.h"
#include "containers/top_keeper.h"
#include "containers/dheap.h"
#ifdef __TEST_MULTIPLE_CONTAINERS_MAIN__
int tests_run = 0;
int tests_failed = 0;
int assertions_run = 0;
int assertions_failed = 0;
// ----------------------------------------------------------------
static char* test_slls() {
slls_t* plist = slls_from_line(strdup(""), ',', FALSE);
mu_assert_lf(plist->length == 0);
plist = slls_from_line(strdup("a"), ',', FALSE);
mu_assert_lf(plist->length == 1);
plist = slls_from_line(strdup("c,d,a,e,b"), ',', FALSE);
mu_assert_lf(plist->length == 5);
sllse_t* pe = plist->phead;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "c")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "d")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "a")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "e")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "b")); pe = pe->pnext;
mu_assert_lf(pe == NULL);
slls_sort(plist);
mu_assert_lf(plist->length == 5);
pe = plist->phead;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "a")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "b")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "c")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "d")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->value, "e")); pe = pe->pnext;
mu_assert_lf(pe == NULL);
return NULL;
}
// ----------------------------------------------------------------
static char* test_sllv() {
sllv_t* pa = sllv_alloc();
sllv_add(pa, "a");
sllv_add(pa, "b");
sllv_add(pa, "c");
mu_assert_lf(pa->length == 3);
sllve_t* pe = pa->phead;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "a")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "b")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "c")); pe = pe->pnext;
mu_assert_lf(pe == NULL);
sllv_t* pb = sllv_alloc();
sllv_add(pb, "d");
sllv_add(pb, "e");
mu_assert_lf(pb->length == 2);
pe = pb->phead;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "d")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "e")); pe = pe->pnext;
mu_assert_lf(pe == NULL);
pa = sllv_append(pa, pb);
mu_assert_lf(pa->length == 5);
mu_assert_lf(pb->length == 2);
pe = pa->phead;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "a")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "b")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "c")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "d")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "e")); pe = pe->pnext;
mu_assert_lf(pe == NULL);
pe = pb->phead;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "d")); pe = pe->pnext;
mu_assert_lf(pe != NULL); mu_assert_lf(streq(pe->pvdata, "e")); pe = pe->pnext;
mu_assert_lf(pe == NULL);
return NULL;
}
// ----------------------------------------------------------------
static char* test_hss() {
hss_t *pset = hss_alloc();
mu_assert_lf(pset->num_occupied == 0);
hss_add(pset, "x");
mu_assert_lf(pset->num_occupied == 1);
mu_assert_lf(!hss_has(pset, "w"));
mu_assert_lf( hss_has(pset, "x"));
mu_assert_lf(!hss_has(pset, "y"));
mu_assert_lf(!hss_has(pset, "z"));
mu_assert_lf(hss_check_counts(pset));
hss_add(pset, "y");
mu_assert_lf(pset->num_occupied == 2);
mu_assert_lf(!hss_has(pset, "w"));
mu_assert_lf( hss_has(pset, "x"));
mu_assert_lf( hss_has(pset, "y"));
mu_assert_lf(!hss_has(pset, "z"));
mu_assert_lf(hss_check_counts(pset));
hss_add(pset, "x");
mu_assert_lf(pset->num_occupied == 2);
mu_assert_lf(!hss_has(pset, "w"));
mu_assert_lf( hss_has(pset, "x"));
mu_assert_lf( hss_has(pset, "y"));
mu_assert_lf(!hss_has(pset, "z"));
mu_assert_lf(hss_check_counts(pset));
hss_add(pset, "z");
mu_assert_lf(pset->num_occupied == 3);
mu_assert_lf(!hss_has(pset, "w"));
mu_assert_lf( hss_has(pset, "x"));
mu_assert_lf( hss_has(pset, "y"));
mu_assert_lf(hss_has(pset, "z"));
mu_assert_lf(hss_check_counts(pset));
hss_remove(pset, "y");
mu_assert_lf(pset->num_occupied == 2);
mu_assert_lf(!hss_has(pset, "w"));
mu_assert_lf( hss_has(pset, "x"));
mu_assert_lf(!hss_has(pset, "y"));
mu_assert_lf( hss_has(pset, "z"));
mu_assert_lf(hss_check_counts(pset));
hss_clear(pset);
mu_assert_lf(!hss_has(pset, "w"));
mu_assert_lf(!hss_has(pset, "x"));
mu_assert_lf(!hss_has(pset, "y"));
mu_assert_lf(!hss_has(pset, "z"));
mu_assert_lf(hss_check_counts(pset));
hss_free(pset);
return NULL;
}
// ----------------------------------------------------------------
static char* test_lhmsi() {
lhmsi_t *pmap = lhmsi_alloc();
mu_assert_lf(pmap->num_occupied == 0);
mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999);
mu_assert_lf(!lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "w") == -999);
mu_assert_lf(!lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "w") == -999);
mu_assert_lf(!lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "w") == -999);
mu_assert_lf(lhmsi_check_counts(pmap));
lhmsi_put(pmap, "x", 3);
mu_assert_lf(pmap->num_occupied == 1);
mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999);
mu_assert_lf( lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 3);
mu_assert_lf(!lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == -999);
mu_assert_lf(!lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == -999);
mu_assert_lf(lhmsi_check_counts(pmap));
lhmsi_put(pmap, "y", 5);
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999);
mu_assert_lf( lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 3);
mu_assert_lf( lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == 5);
mu_assert_lf(!lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == -999);
mu_assert_lf(lhmsi_check_counts(pmap));
lhmsi_put(pmap, "x", 4);
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999);
mu_assert_lf( lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 4);
mu_assert_lf( lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == 5);
mu_assert_lf(!lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == -999);
mu_assert_lf(lhmsi_check_counts(pmap));
lhmsi_put(pmap, "z", 7);
mu_assert_lf(pmap->num_occupied == 3);
mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999);
mu_assert_lf( lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 4);
mu_assert_lf( lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == 5);
mu_assert_lf(lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == 7);
mu_assert_lf(lhmsi_check_counts(pmap));
lhmsi_remove(pmap, "y");
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999);
mu_assert_lf( lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == 4);
mu_assert_lf(!lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == -999);
mu_assert_lf( lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == 7);
mu_assert_lf(lhmsi_check_counts(pmap));
lhmsi_clear(pmap);
mu_assert_lf(pmap->num_occupied == 0);
mu_assert_lf(!lhmsi_has_key(pmap, "w")); mu_assert_lf(lhmsi_get(pmap, "w") == -999);
mu_assert_lf(!lhmsi_has_key(pmap, "x")); mu_assert_lf(lhmsi_get(pmap, "x") == -999);
mu_assert_lf(!lhmsi_has_key(pmap, "y")); mu_assert_lf(lhmsi_get(pmap, "y") == -999);
mu_assert_lf(!lhmsi_has_key(pmap, "z")); mu_assert_lf(lhmsi_get(pmap, "z") == -999);
mu_assert_lf(lhmsi_check_counts(pmap));
lhmsi_free(pmap);
return NULL;
}
// ----------------------------------------------------------------
static char* test_lhmss() {
lhmss_t *pmap = lhmss_alloc();
mu_assert_lf(pmap->num_occupied == 0);
mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL);
mu_assert_lf(!lhmss_has_key(pmap, "x")); mu_assert_lf(lhmss_get(pmap, "x") == NULL);
mu_assert_lf(!lhmss_has_key(pmap, "y")); mu_assert_lf(lhmss_get(pmap, "y") == NULL);
mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL);
mu_assert_lf(lhmss_check_counts(pmap));
lhmss_put(pmap, "x", "3");
mu_assert_lf(pmap->num_occupied == 1);
mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL);
mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "3"));
mu_assert_lf(!lhmss_has_key(pmap, "y")); mu_assert_lf(lhmss_get(pmap, "y") == NULL);
mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL);
mu_assert_lf(lhmss_check_counts(pmap));
lhmss_put(pmap, "y", "5");
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL);
mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "3"));
mu_assert_lf( lhmss_has_key(pmap, "y")); mu_assert_lf(streq(lhmss_get(pmap, "y"), "5"));
mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL);
mu_assert_lf(lhmss_check_counts(pmap));
lhmss_put(pmap, "x", "4");
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL);
mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "4"));
mu_assert_lf( lhmss_has_key(pmap, "y")); mu_assert_lf(streq(lhmss_get(pmap, "y"), "5"));
mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL);
mu_assert_lf(lhmss_check_counts(pmap));
lhmss_put(pmap, "z", "7");
mu_assert_lf(pmap->num_occupied == 3);
mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL);
mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "4"));
mu_assert_lf( lhmss_has_key(pmap, "y")); mu_assert_lf(streq(lhmss_get(pmap, "y"), "5"));
mu_assert_lf( lhmss_has_key(pmap, "z")); mu_assert_lf(streq(lhmss_get(pmap, "z"), "7"));
mu_assert_lf(lhmss_check_counts(pmap));
lhmss_remove(pmap, "y");
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL);
mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "4"));
mu_assert_lf(!lhmss_has_key(pmap, "y")); mu_assert_lf(lhmss_get(pmap, "y") == NULL);
mu_assert_lf( lhmss_has_key(pmap, "z")); mu_assert_lf(streq(lhmss_get(pmap, "z"), "7"));
mu_assert_lf(lhmss_check_counts(pmap));
lhmss_free(pmap);
return NULL;
}
// ----------------------------------------------------------------
static char* test_lhmsv() {
lhmsv_t *pmap = lhmsv_alloc();
mu_assert_lf(pmap->num_occupied == 0);
mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL);
mu_assert_lf(!lhmsv_has_key(pmap, "x")); mu_assert_lf(lhmsv_get(pmap, "x") == NULL);
mu_assert_lf(!lhmsv_has_key(pmap, "y")); mu_assert_lf(lhmsv_get(pmap, "y") == NULL);
mu_assert_lf(!lhmsv_has_key(pmap, "z")); mu_assert_lf(lhmsv_get(pmap, "z") == NULL);
mu_assert_lf(lhmsv_check_counts(pmap));
lhmsv_put(pmap, "x", "3");
mu_assert_lf(pmap->num_occupied == 1);
mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL);
mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "3"));
mu_assert_lf(!lhmsv_has_key(pmap, "y")); mu_assert_lf(lhmsv_get(pmap, "y") == NULL);
mu_assert_lf(!lhmsv_has_key(pmap, "z")); mu_assert_lf(lhmsv_get(pmap, "z") == NULL);
mu_assert_lf(lhmsv_check_counts(pmap));
lhmsv_put(pmap, "y", "5");
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL);
mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "3"));
mu_assert_lf( lhmsv_has_key(pmap, "y")); mu_assert_lf(streq(lhmsv_get(pmap, "y"), "5"));
mu_assert_lf(!lhmsv_has_key(pmap, "z")); mu_assert_lf(lhmsv_get(pmap, "z") == NULL);
mu_assert_lf(lhmsv_check_counts(pmap));
lhmsv_put(pmap, "x", "4");
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL);
mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "4"));
mu_assert_lf( lhmsv_has_key(pmap, "y")); mu_assert_lf(streq(lhmsv_get(pmap, "y"), "5"));
mu_assert_lf(!lhmsv_has_key(pmap, "z")); mu_assert_lf(lhmsv_get(pmap, "z") == NULL);
mu_assert_lf(lhmsv_check_counts(pmap));
lhmsv_put(pmap, "z", "7");
mu_assert_lf(pmap->num_occupied == 3);
mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL);
mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "4"));
mu_assert_lf( lhmsv_has_key(pmap, "y")); mu_assert_lf(streq(lhmsv_get(pmap, "y"), "5"));
mu_assert_lf( lhmsv_has_key(pmap, "z")); mu_assert_lf(streq(lhmsv_get(pmap, "z"), "7"));
mu_assert_lf(lhmsv_check_counts(pmap));
lhmsv_remove(pmap, "y");
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(!lhmsv_has_key(pmap, "w")); mu_assert_lf(lhmsv_get(pmap, "w") == NULL);
mu_assert_lf( lhmsv_has_key(pmap, "x")); mu_assert_lf(streq(lhmsv_get(pmap, "x"), "4"));
mu_assert_lf(!lhmsv_has_key(pmap, "y")); mu_assert_lf(lhmsv_get(pmap, "y") == NULL);
mu_assert_lf( lhmsv_has_key(pmap, "z")); mu_assert_lf(streq(lhmsv_get(pmap, "z"), "7"));
mu_assert_lf(lhmsv_check_counts(pmap));
lhmsv_free(pmap);
return NULL;
}
// ----------------------------------------------------------------
static char* test_lhms2v() {
lhms2v_t *pmap = lhms2v_alloc();
mu_assert_lf(pmap->num_occupied == 0);
mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL);
mu_assert_lf(!lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(lhms2v_get(pmap, "a", "x") == NULL);
mu_assert_lf(!lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(lhms2v_get(pmap, "a", "y") == NULL);
mu_assert_lf(!lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(lhms2v_get(pmap, "b", "z") == NULL);
mu_assert_lf(lhms2v_check_counts(pmap));
lhms2v_put(pmap, "a", "x", "3");
mu_assert_lf(pmap->num_occupied == 1);
mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL);
mu_assert_lf( lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "3"));
mu_assert_lf(!lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(lhms2v_get(pmap, "a", "y") == NULL);
mu_assert_lf(!lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(lhms2v_get(pmap, "b", "z") == NULL);
mu_assert_lf(lhms2v_check_counts(pmap));
lhms2v_put(pmap, "a", "y", "5");
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL);
mu_assert_lf( lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "3"));
mu_assert_lf( lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "y"), "5"));
mu_assert_lf(!lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(lhms2v_get(pmap, "b", "z") == NULL);
mu_assert_lf(lhms2v_check_counts(pmap));
lhms2v_put(pmap, "a", "x", "4");
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL);
mu_assert_lf( lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "4"));
mu_assert_lf( lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "y"), "5"));
mu_assert_lf(!lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(lhms2v_get(pmap, "b", "z") == NULL);
mu_assert_lf(lhms2v_check_counts(pmap));
lhms2v_put(pmap, "b", "z", "7");
mu_assert_lf(pmap->num_occupied == 3);
mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL);
mu_assert_lf( lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "4"));
mu_assert_lf( lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "y"), "5"));
mu_assert_lf( lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(streq(lhms2v_get(pmap, "b", "z"), "7"));
mu_assert_lf(lhms2v_check_counts(pmap));
lhms2v_remove(pmap, "a", "y");
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL);
mu_assert_lf( lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(streq(lhms2v_get(pmap, "a", "x"), "4"));
mu_assert_lf(!lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(lhms2v_get(pmap, "a", "y") == NULL);
mu_assert_lf( lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(streq(lhms2v_get(pmap, "b", "z"), "7"));
mu_assert_lf(lhms2v_check_counts(pmap));
lhms2v_clear(pmap);
mu_assert_lf(pmap->num_occupied == 0);
mu_assert_lf(!lhms2v_has_key(pmap, "a", "w")); mu_assert_lf(lhms2v_get(pmap, "a", "w") == NULL);
mu_assert_lf(!lhms2v_has_key(pmap, "a", "x")); mu_assert_lf(lhms2v_get(pmap, "a", "x") == NULL);
mu_assert_lf(!lhms2v_has_key(pmap, "a", "y")); mu_assert_lf(lhms2v_get(pmap, "a", "y") == NULL);
mu_assert_lf(!lhms2v_has_key(pmap, "b", "z")); mu_assert_lf(lhms2v_get(pmap, "b", "z") == NULL);
mu_assert_lf(lhms2v_check_counts(pmap));
lhms2v_free(pmap);
return NULL;
}
// ----------------------------------------------------------------
static char* test_lhmslv() {
slls_t* aw = slls_alloc(); slls_add_no_free(aw, "a"); slls_add_no_free(aw, "w");
slls_t* ax = slls_alloc(); slls_add_no_free(ax, "a"); slls_add_no_free(ax, "x");
slls_t* ay = slls_alloc(); slls_add_no_free(ay, "a"); slls_add_no_free(ay, "y");
slls_t* bz = slls_alloc(); slls_add_no_free(bz, "b"); slls_add_no_free(bz, "z");
lhmslv_t *pmap = lhmslv_alloc();
mu_assert_lf(pmap->num_occupied == 0);
mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL);
mu_assert_lf(!lhmslv_has_key(pmap, ax)); mu_assert_lf(lhmslv_get(pmap, ax) == NULL);
mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL);
mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL);
mu_assert_lf(lhmslv_check_counts(pmap));
lhmslv_put(pmap, ax, "3");
mu_assert_lf(pmap->num_occupied == 1);
mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL);
mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "3"));
mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL);
mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL);
mu_assert_lf(lhmslv_check_counts(pmap));
lhmslv_put(pmap, ay, "5");
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL);
mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "3"));
mu_assert_lf( lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5"));
mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL);
mu_assert_lf(lhmslv_check_counts(pmap));
lhmslv_put(pmap, ax, "4");
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL);
mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4"));
mu_assert_lf( lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5"));
mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL);
mu_assert_lf(lhmslv_check_counts(pmap));
lhmslv_put(pmap, bz, "7");
mu_assert_lf(pmap->num_occupied == 3);
mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL);
mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4"));
mu_assert_lf( lhmslv_has_key(pmap, ay)); mu_assert_lf(streq(lhmslv_get(pmap, ay), "5"));
mu_assert_lf( lhmslv_has_key(pmap, bz)); mu_assert_lf(streq(lhmslv_get(pmap, bz), "7"));
mu_assert_lf(lhmslv_check_counts(pmap));
lhmslv_remove(pmap, ay);
mu_assert_lf(pmap->num_occupied == 2);
mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL);
mu_assert_lf( lhmslv_has_key(pmap, ax)); mu_assert_lf(streq(lhmslv_get(pmap, ax), "4"));
mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL);
mu_assert_lf( lhmslv_has_key(pmap, bz)); mu_assert_lf(streq(lhmslv_get(pmap, bz), "7"));
mu_assert_lf(lhmslv_check_counts(pmap));
lhmslv_clear(pmap);
mu_assert_lf(pmap->num_occupied == 0);
mu_assert_lf(!lhmslv_has_key(pmap, aw)); mu_assert_lf(lhmslv_get(pmap, aw) == NULL);
mu_assert_lf(!lhmslv_has_key(pmap, ax)); mu_assert_lf(lhmslv_get(pmap, ax) == NULL);
mu_assert_lf(!lhmslv_has_key(pmap, ay)); mu_assert_lf(lhmslv_get(pmap, ay) == NULL);
mu_assert_lf(!lhmslv_has_key(pmap, bz)); mu_assert_lf(lhmslv_get(pmap, bz) == NULL);
mu_assert_lf(lhmslv_check_counts(pmap));
lhmslv_free(pmap);
return NULL;
}
// ----------------------------------------------------------------
static char* test_percentile_keeper() {
percentile_keeper_t* ppercentile_keeper = percentile_keeper_alloc();
percentile_keeper_ingest(ppercentile_keeper, 1.0);
percentile_keeper_ingest(ppercentile_keeper, 2.0);
percentile_keeper_ingest(ppercentile_keeper, 3.0);
percentile_keeper_ingest(ppercentile_keeper, 4.0);
percentile_keeper_ingest(ppercentile_keeper, 5.0);
percentile_keeper_print(ppercentile_keeper);
double p, q;
p = 0.0;
q = percentile_keeper_emit(ppercentile_keeper, p);
printf("%4.2lf -> %7.4lf\n", p, q);
mu_assert_lf(q == 1.0);
p = 10.0;
q = percentile_keeper_emit(ppercentile_keeper, p);
printf("%4.2lf -> %7.4lf\n", p, q);
mu_assert_lf(q == 1.0);
p = 50.0;
q = percentile_keeper_emit(ppercentile_keeper, p);
printf("%4.2lf -> %7.4lf\n", p, q);
mu_assert_lf(q == 3.0);
p = 90.0;
q = percentile_keeper_emit(ppercentile_keeper, p);
printf("%4.2lf -> %7.4lf\n", p, q);
mu_assert_lf(q == 5.0);
p = 100.0;
q = percentile_keeper_emit(ppercentile_keeper, p);
printf("%4.2lf -> %7.4lf\n", p, q);
mu_assert_lf(q == 5.0);
percentile_keeper_free(ppercentile_keeper);
return NULL;
}
// ----------------------------------------------------------------
static char* test_top_keeper() {
int capacity = 3;
top_keeper_t* ptop_keeper = top_keeper_alloc(capacity);
mu_assert_lf(ptop_keeper->size == 0);
top_keeper_add(ptop_keeper, 5.0, NULL);
top_keeper_print(ptop_keeper);
mu_assert_lf(ptop_keeper->size == 1);
mu_assert_lf(ptop_keeper->top_values[0] == 5.0);
top_keeper_add(ptop_keeper, 6.0, NULL);
top_keeper_print(ptop_keeper);
mu_assert_lf(ptop_keeper->size == 2);
mu_assert_lf(ptop_keeper->top_values[0] == 6.0);
mu_assert_lf(ptop_keeper->top_values[1] == 5.0);
top_keeper_add(ptop_keeper, 4.0, NULL);
top_keeper_print(ptop_keeper);
mu_assert_lf(ptop_keeper->size == 3);
mu_assert_lf(ptop_keeper->top_values[0] == 6.0);
mu_assert_lf(ptop_keeper->top_values[1] == 5.0);
mu_assert_lf(ptop_keeper->top_values[2] == 4.0);
top_keeper_add(ptop_keeper, 2.0, NULL);
top_keeper_print(ptop_keeper);
mu_assert_lf(ptop_keeper->size == 3);
mu_assert_lf(ptop_keeper->top_values[0] == 6.0);
mu_assert_lf(ptop_keeper->top_values[1] == 5.0);
mu_assert_lf(ptop_keeper->top_values[2] == 4.0);
top_keeper_add(ptop_keeper, 7.0, NULL);
top_keeper_print(ptop_keeper);
mu_assert_lf(ptop_keeper->size == 3);
mu_assert_lf(ptop_keeper->top_values[0] == 7.0);
mu_assert_lf(ptop_keeper->top_values[1] == 6.0);
mu_assert_lf(ptop_keeper->top_values[2] == 5.0);
top_keeper_free(ptop_keeper);
return NULL;
}
// ----------------------------------------------------------------
static char* test_dheap() {
dheap_t *pdheap = dheap_alloc();
mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__));
mu_assert_lf(pdheap->n == 0);
dheap_add(pdheap, 4.25);
mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__));
mu_assert_lf(pdheap->n == 1);
dheap_add(pdheap, 3.25);
mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__));
mu_assert_lf(pdheap->n == 2);
dheap_add(pdheap, 2.25);
mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__));
mu_assert_lf(pdheap->n == 3);
dheap_add(pdheap, 6.25);
mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__));
mu_assert_lf(pdheap->n == 4);
dheap_add(pdheap, 5.25);
mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__));
mu_assert_lf(pdheap->n == 5);
dheap_add(pdheap, 8.25);
mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__));
mu_assert_lf(pdheap->n == 6);
dheap_add(pdheap, 7.25);
mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__));
mu_assert_lf(pdheap->n == 7);
dheap_print(pdheap);
mu_assert_lf(dheap_remove(pdheap) == 8.25);
mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__));
mu_assert_lf(pdheap->n == 6);
mu_assert_lf(dheap_remove(pdheap) == 7.25);
mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__));
mu_assert_lf(pdheap->n == 5);
mu_assert_lf(dheap_remove(pdheap) == 6.25);
mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__));
mu_assert_lf(pdheap->n == 4);
mu_assert_lf(dheap_remove(pdheap) == 5.25);
mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__));
mu_assert_lf(pdheap->n == 3);
mu_assert_lf(dheap_remove(pdheap) == 4.25);
mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__));
mu_assert_lf(pdheap->n == 2);
mu_assert_lf(dheap_remove(pdheap) == 3.25);
mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__));
mu_assert_lf(pdheap->n == 1);
mu_assert_lf(dheap_remove(pdheap) == 2.25);
mu_assert_lf(dheap_check(pdheap, __FILE__, __LINE__));
mu_assert_lf(pdheap->n == 0);
dheap_free(pdheap);
return NULL;
}
// ================================================================
static char * run_all_tests() {
mu_run_test(test_slls);
mu_run_test(test_sllv);
mu_run_test(test_hss);
mu_run_test(test_lhmsi);
mu_run_test(test_lhmss);
mu_run_test(test_lhmsv);
mu_run_test(test_lhms2v);
mu_run_test(test_lhmslv);
mu_run_test(test_percentile_keeper);
mu_run_test(test_top_keeper);
mu_run_test(test_dheap);
return 0;
}
int main(int argc, char **argv) {
printf("TEST_MULTIPLE_CONTAINERS ENTER\n");
char *result = run_all_tests();
printf("\n");
if (result != 0) {
printf("Not all unit tests passed\n");
}
else {
printf("TEST_MULTIPLE_CONTAINERS: ALL UNIT TESTS PASSED\n");
}
printf("Tests passed: %d of %d\n", tests_run - tests_failed, tests_run);
printf("Assertions passed: %d of %d\n", assertions_run - assertions_failed, assertions_run);
return result != 0;
}
#endif // __TEST_MULTIPLE_CONTAINERS_MAIN__

View file

@ -252,6 +252,7 @@ static char* all_tests() {
}
int main(int argc, char** argv) {
printf("TEST_PARSE_TRIE ENTER\n");
char* result = all_tests();
printf("\n");
if (result != 0) {

View file

@ -1,3 +1,4 @@
#include <stdio.h>
#include <string.h>
#include "lib/mlrutil.h"
#include "containers/top_keeper.h"
@ -76,3 +77,12 @@ void top_keeper_add(top_keeper_t* ptop_keeper, double value, lrec_t* prec) {
ptop_keeper->top_precords[destidx] = prec; // xxx copy?? xxx free on shift-off?!?
}
}
// ----------------------------------------------------------------
void top_keeper_print(top_keeper_t* ptop_keeper) {
printf("top_keeper dump:\n");
for (int i = 0; i < ptop_keeper->size; i++)
printf("[%02d] %.8lf\n", i, ptop_keeper->top_values[i]);
for (int i = ptop_keeper->size; i < ptop_keeper->capacity; i++)
printf("[%02d] ---\n", i);
}

View file

@ -17,4 +17,7 @@ top_keeper_t* top_keeper_alloc(int capacity);
void top_keeper_free(top_keeper_t* ptop_keeper);
void top_keeper_add(top_keeper_t* ptop_keeper, double value, lrec_t* prec);
// For debug/test
void top_keeper_print(top_keeper_t* ptop_keeper);
#endif // TOP_KEEPER_H

View file

@ -6,7 +6,9 @@
#include "input/file_reader_mmap.h"
#include "input/lrec_readers.h"
#include "lib/string_builder.h"
#include "input/old_peek_file_reader.h"
#include "input/byte_readers.h"
#include "input/peek_file_reader.h"
#include "containers/parse_trie.h"
#define PEEK_BUF_LEN 32
#define STRING_BUILDER_INIT_SIZE 1024
@ -263,28 +265,46 @@ static int read_file_mmap_psb(char* filename, int do_write) {
}
// ================================================================
static char* read_line_pfr_psb(old_peek_file_reader_t* pfr, string_builder_t* psb, char* irs, int irs_len) {
#define IRS_STRIDX 11
#define EOF_STRIDX 22
#define IRSEOF_STRIDX 33
static char* read_line_pfr_psb(peek_file_reader_t* pfr, string_builder_t* psb, parse_trie_t* ptrie) {
int rc, stridx, matchlen;
while (TRUE) {
if (old_pfr_at_eof(pfr)) {
if (sb_is_empty(psb))
return NULL;
else
pfr_buffer_by(pfr, ptrie->maxlen);
rc = parse_trie_match(ptrie, pfr->peekbuf, pfr->sob, pfr->npeeked, pfr->peekbuflenmask,
&stridx, &matchlen);
if (rc) {
pfr_advance_by(pfr, matchlen);
switch(stridx) {
case IRS_STRIDX:
return sb_finish(psb);
} else if (old_pfr_next_is(pfr, irs, irs_len)) {
old_pfr_advance_by(pfr, irs_len);
return sb_finish(psb);
break;
case IRSEOF_STRIDX:
return sb_finish(psb);
break;
case EOF_STRIDX:
return NULL;
break;
}
} else {
sb_append_char(psb, old_pfr_read_char(pfr));
sb_append_char(psb, pfr_read_char(pfr));
}
}
}
static int read_file_pfr_psb(char* filename, int do_write) {
FILE* fp = fopen_or_die(filename);
char* irs = "\n";
int irs_len = strlen(irs);
byte_reader_t* pbr = stdio_byte_reader_alloc();
pbr->popen_func(pbr, filename);
peek_file_reader_t* pfr = pfr_alloc(pbr, PEEK_BUF_LEN);
parse_trie_t* ptrie = parse_trie_alloc();
parse_trie_add_string(ptrie, "\n", IRS_STRIDX);
parse_trie_add_string(ptrie, "\xff", EOF_STRIDX);
parse_trie_add_string(ptrie, "\n\xff", IRSEOF_STRIDX);
old_peek_file_reader_t* pfr = old_pfr_alloc(fp, PEEK_BUF_LEN);
string_builder_t sb;
string_builder_t* psb = &sb;
sb_init(&sb, STRING_BUILDER_INIT_SIZE);
@ -292,7 +312,7 @@ static int read_file_pfr_psb(char* filename, int do_write) {
int bc = 0;
while (TRUE) {
char* line = read_line_pfr_psb(pfr, psb, irs, irs_len);
char* line = read_line_pfr_psb(pfr, psb, ptrie);
if (line == NULL)
break;
if (do_write) {
@ -302,7 +322,7 @@ static int read_file_pfr_psb(char* filename, int do_write) {
bc += strlen(line);
free(line);
}
fclose(fp);
pbr->pclose_func(pbr);
return bc;
}
@ -384,41 +404,42 @@ int main(int argc, char** argv) {
// $ ./getl ../data/big.csv 5|tee x
// $ mlr --opprint cat then sort -n t x
// type t n
// getdelim 0.118618 55888899
// getdelim 0.121467 55888899
// getdelim 0.121943 55888899
// getdelim 0.124756 55888899
// getdelim 0.127039 55888899
// getc_unlocked_fixed_len 0.167563 55888899
// getc_unlocked_fixed_len 0.167803 55888899
// getc_unlocked_fixed_len 0.168808 55888899
// getc_unlocked_fixed_len 0.168980 55888899
// getc_unlocked_fixed_len 0.176187 55888899
// getc_unlocked_psb 0.238986 55888899
// getc_unlocked_psb 0.241325 55888899
// getc_unlocked_psb 0.246466 55888899
// getc_unlocked_psb 0.247592 55888899
// getc_unlocked_psb 0.248112 55888899
// mmap_psb 0.250021 55888899
// mmap_psb 0.254118 55888899
// mmap_psb 0.257428 55888899
// mmap_psb 0.261807 55888899
// mmap_psb 0.264367 55888899
// pfr_psb 0.760035 55888900
// pfr_psb 0.765121 55888900
// pfr_psb 0.768731 55888900
// pfr_psb 0.771937 55888900
// pfr_psb 0.780460 55888900
// fgetc_fixed_len 2.516459 55888899
// fgetc_fixed_len 2.522877 55888899
// fgetc_fixed_len 2.587373 55888899
// fgetc_psb 2.590090 55888899
// fgetc_psb 2.590536 55888899
// fgetc_fixed_len 2.608356 55888899
// fgetc_psb 2.623930 55888899
// fgetc_fixed_len 2.624310 55888899
// fgetc_psb 2.637269 55888899
// type t n type t n
// getdelim 0.118618 55888899 getdelim 0.118057 55888899
// getdelim 0.121467 55888899 getdelim 0.118727 55888899
// getdelim 0.121943 55888899 getdelim 0.119609 55888899
// getdelim 0.124756 55888899 getdelim 0.122506 55888899
// getdelim 0.127039 55888899 getdelim 0.123099 55888899
// getc_unlocked_fixed_len 0.167563 55888899 getc_unlocked_fixed_len 0.168109 55888899
// getc_unlocked_fixed_len 0.167803 55888899 getc_unlocked_fixed_len 0.168392 55888899
// getc_unlocked_fixed_len 0.168808 55888899 getc_unlocked_fixed_len 0.169387 55888899
// getc_unlocked_fixed_len 0.168980 55888899 getc_unlocked_fixed_len 0.178484 55888899
// getc_unlocked_fixed_len 0.176187 55888899 getc_unlocked_fixed_len 0.182793 55888899
// getc_unlocked_psb 0.238986 55888899 getc_unlocked_psb 0.293240 55888899
// getc_unlocked_psb 0.241325 55888899 getc_unlocked_psb 0.298449 55888899
// getc_unlocked_psb 0.246466 55888899 getc_unlocked_psb 0.298508 55888899
// getc_unlocked_psb 0.247592 55888899 getc_unlocked_psb 0.301125 55888899
// getc_unlocked_psb 0.248112 55888899 mmap_psb 0.313239 55888899
// mmap_psb 0.250021 55888899 mmap_psb 0.315061 55888899
// mmap_psb 0.254118 55888899 mmap_psb 0.315517 55888899
// mmap_psb 0.257428 55888899 mmap_psb 0.316790 55888899
// mmap_psb 0.261807 55888899 mmap_psb 0.320654 55888899
// mmap_psb 0.264367 55888899 getc_unlocked_psb 0.326494 55888899
// pfr_psb 0.760035 55888900 pfr_psb 0.417141 55888899
// pfr_psb 0.765121 55888900 pfr_psb 0.439269 55888899
// pfr_psb 0.768731 55888900 pfr_psb 0.439342 55888899
// pfr_psb 0.771937 55888900 pfr_psb 0.447218 55888899
// pfr_psb 0.780460 55888900 pfr_psb 0.453839 55888899
// fgetc_fixed_len 2.516459 55888899 fgetc_psb 2.476543 55888899
// fgetc_fixed_len 2.522877 55888899 fgetc_psb 2.477130 55888899
// fgetc_fixed_len 2.587373 55888899 fgetc_psb 2.484007 55888899
// fgetc_psb 2.590090 55888899 fgetc_psb 2.484495 55888899
// fgetc_psb 2.590536 55888899 fgetc_fixed_len 2.493730 55888899
// fgetc_fixed_len 2.608356 55888899 fgetc_fixed_len 2.528333 55888899
// fgetc_psb 2.623930 55888899 fgetc_fixed_len 2.533535 55888899
// fgetc_fixed_len 2.624310 55888899 fgetc_fixed_len 2.555377 55888899
// fgetc_psb 2.637269 55888899 fgetc_fixed_len 2.736391 55888899
// fgetc_psb 2.743828 55888899
// $ mlr --opprint cat then stats1 -a min,max,stddev,mean -f t -g type then sort -n t_mean x
// type t_min t_max t_stddev t_mean
@ -430,6 +451,15 @@ int main(int argc, char** argv) {
// fgetc_fixed_len 2.516459 2.624310 0.049478 2.571875
// fgetc_psb 2.590090 2.680364 0.037489 2.624438
// type t_min t_max t_stddev t_mean
// getdelim 0.118057 0.123099 0.002271 0.120400
// getc_unlocked_fixed_len 0.168109 0.182793 0.006768 0.173433
// getc_unlocked_psb 0.293240 0.326494 0.013134 0.303563
// mmap_psb 0.313239 0.320654 0.002771 0.316252
// pfr_psb 0.417141 0.453839 0.013830 0.439362
// fgetc_psb 2.476543 2.743828 0.117803 2.533201
// fgetc_fixed_len 2.493730 2.736391 0.095892 2.569473
// ----------------------------------------------------------------
// Analysis:
// * getdelim is good; fatal flaw is single-char line-terminator
@ -441,4 +471,4 @@ int main(int argc, char** argv) {
// * getc_unlocked vs. fgetc, no-brainer for this single-threaded code.
// * string-builder is a little than fixed-length malloc, as expected
// -- it's adding value.
// ! old_peek_file_reader is where the optimization opportunities are
// ! peek_file_reader is where the optimization opportunities are

View file

@ -139,7 +139,7 @@ static slls_t* lrec_reader_csv_get_fields(lrec_reader_csv_state_t* pstate) {
pfr->peekbuf, pfr->sob, pfr->npeeked, pfr->peekbuflenmask,
&stridx, &matchlen);
#ifdef DEBUG_PARSER
pfr_dump(pfr);
pfr_print(pfr);
#endif
if (rc) {
#ifdef DEBUG_PARSER
@ -291,15 +291,15 @@ static void lrec_reader_csv_free(void* pvstate) {
}
// ----------------------------------------------------------------
lrec_reader_t* lrec_reader_csv_alloc(byte_reader_t* pbr, char irs, char ifs) {
lrec_reader_t* lrec_reader_csv_alloc(byte_reader_t* pbr, char* irs, char* ifs) {
lrec_reader_t* plrec_reader = mlr_malloc_or_die(sizeof(lrec_reader_t));
lrec_reader_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_reader_csv_state_t));
pstate->ilno = 0LL;
pstate->eof = "\xff";
pstate->irs = "\r\n"; // xxx multi-byte the cli irs/ifs/etc, and integrate here
pstate->ifs = ","; // xxx multi-byte the cli irs/ifs/etc, and integrate here
pstate->irs = irs;
pstate->ifs = ifs;
pstate->ifs_eof = mlr_paste_2_strings(pstate->ifs, "\xff");
pstate->dquote = "\"";

View file

@ -52,7 +52,7 @@ lrec_reader_t* lrec_reader_stdio_dkvp_alloc(char irs, char ifs, char ips, int al
}
// ----------------------------------------------------------------
// xxx needs checking on repeated occurrences of ps between fs occurrences. don't zero-poke there.
// xxx needs checking on repeated occurrences of ps between ifs occurrences. don't zero-poke there.
//
// xxx needs abend on null lhs.
//

View file

@ -1,35 +1,71 @@
#include "lib/mlrutil.h"
#include "lib/mlr_globals.h"
#include "input/lrec_readers.h"
#include "input/byte_readers.h"
lrec_reader_t* lrec_reader_alloc(char* fmtdesc, int use_mmap, char irs, char ifs, int allow_repeat_ifs,
char ips, int allow_repeat_ips)
static char xxx_temp_check_single_char_separator(char* name, char* value) {
if (strlen(value) != 1) {
fprintf(stderr,
"%s: multi-character separators are not yet supported for all formats. Got %s=\"%s\".\n",
MLR_GLOBALS.argv0, name, value);
exit(1);
}
return value[0];
}
lrec_reader_t* lrec_reader_alloc(char* fmtdesc, int use_mmap, char* irs, char* ifs, int allow_repeat_ifs,
char* ips, int allow_repeat_ips)
{
// xxx refactor for https://github.com/johnkerl/miller/issues/51 et al.
byte_reader_t* pbr = use_mmap ? mmap_byte_reader_alloc() : stdio_byte_reader_alloc();
if (streq(fmtdesc, "dkvp")) {
if (use_mmap)
return lrec_reader_mmap_dkvp_alloc(irs, ifs, ips, allow_repeat_ifs);
return lrec_reader_mmap_dkvp_alloc(
xxx_temp_check_single_char_separator("irs", irs),
xxx_temp_check_single_char_separator("ifs", ifs),
xxx_temp_check_single_char_separator("ips", ips),
allow_repeat_ifs);
else
return lrec_reader_stdio_dkvp_alloc(irs, ifs, ips, allow_repeat_ifs);
return lrec_reader_stdio_dkvp_alloc(
xxx_temp_check_single_char_separator("irs", irs),
xxx_temp_check_single_char_separator("ifs", ifs),
xxx_temp_check_single_char_separator("ips", ips),
allow_repeat_ifs);
} else if (streq(fmtdesc, "csv")) {
return lrec_reader_csv_alloc(pbr, irs, ifs);
} else if (streq(fmtdesc, "csvlite")) {
if (use_mmap)
return lrec_reader_mmap_csvlite_alloc(irs, ifs, allow_repeat_ifs);
return lrec_reader_mmap_csvlite_alloc(
xxx_temp_check_single_char_separator("irs", irs),
xxx_temp_check_single_char_separator("ifs", ifs),
allow_repeat_ifs);
else
return lrec_reader_stdio_csvlite_alloc(irs, ifs, allow_repeat_ifs);
return lrec_reader_stdio_csvlite_alloc(
xxx_temp_check_single_char_separator("irs", irs),
xxx_temp_check_single_char_separator("ifs", ifs),
allow_repeat_ifs);
} else if (streq(fmtdesc, "nidx")) {
if (use_mmap)
return lrec_reader_mmap_nidx_alloc(irs, ifs, allow_repeat_ifs);
return lrec_reader_mmap_nidx_alloc(
xxx_temp_check_single_char_separator("irs", irs),
xxx_temp_check_single_char_separator("ifs", ifs),
allow_repeat_ifs);
else
return lrec_reader_stdio_nidx_alloc(irs, ifs, allow_repeat_ifs);
return lrec_reader_stdio_nidx_alloc(
xxx_temp_check_single_char_separator("irs", irs),
xxx_temp_check_single_char_separator("ifs", ifs),
allow_repeat_ifs);
} else if (streq(fmtdesc, "xtab")) {
if (use_mmap)
return lrec_reader_mmap_xtab_alloc(irs, ips, TRUE/*allow_repeat_ips*/);
return lrec_reader_mmap_xtab_alloc(
xxx_temp_check_single_char_separator("irs", irs),
xxx_temp_check_single_char_separator("ips", ips),
TRUE/*allow_repeat_ips*/);
else
return lrec_reader_stdio_xtab_alloc(ips, TRUE); // xxx parameterize allow_repeat_ips
return lrec_reader_stdio_xtab_alloc(
xxx_temp_check_single_char_separator("ips", ips),
TRUE); // xxx parameterize allow_repeat_ips
} else {
return NULL;
}

View file

@ -6,12 +6,12 @@
// ----------------------------------------------------------------
// Primary entry points
// fmtdesc: "dkvp", "csv", "nidx", "xtab".
lrec_reader_t* lrec_reader_alloc(char* fmtdesc, int use_mmap, char irs, char ifs, int allow_repeat_ifs,
char ips, int allow_repeat_ips);
// Factory method. fmtdesc: "dkvp", "nidx", "csv", "csvlite", "nidx", "xtab".
lrec_reader_t* lrec_reader_alloc(char* fmtdesc, int use_mmap, char* irs, char* ifs, int allow_repeat_ifs,
char* ips, int allow_repeat_ips);
lrec_reader_t* lrec_reader_stdio_csvlite_alloc(char irs, char ifs, int allow_repeat_ifs);
lrec_reader_t* lrec_reader_csv_alloc(byte_reader_t* pbr, char irs, char ifs);
lrec_reader_t* lrec_reader_csv_alloc(byte_reader_t* pbr, char* irs, char* ifs);
lrec_reader_t* lrec_reader_stdio_dkvp_alloc(char irs, char ifs, char ips, int allow_repeat_ifs);
lrec_reader_t* lrec_reader_stdio_nidx_alloc(char irs, char ifs, int allow_repeat_ifs);
lrec_reader_t* lrec_reader_stdio_xtab_alloc(char ips, int allow_repeat_ips);

View file

@ -3,7 +3,7 @@
#include "input/peek_file_reader.h"
// ----------------------------------------------------------------
void pfr_dump(peek_file_reader_t* pfr) {
void pfr_print(peek_file_reader_t* pfr) {
printf("======================== pfr at %p\n", pfr);
printf(" peekbuflen = %d\n", pfr->peekbuflen);
printf(" npeeked = %d\n", pfr->npeeked);

View file

@ -93,6 +93,6 @@ static inline void pfr_advance_by(peek_file_reader_t* pfr, int len) {
}
// ----------------------------------------------------------------
void pfr_dump(peek_file_reader_t* pfr);
void pfr_print(peek_file_reader_t* pfr);
#endif // PEEK_FILE_READER_H

View file

@ -197,6 +197,7 @@ static char * run_all_tests() {
}
int main(int argc, char **argv) {
printf("TEST_BYTE_READERS ENTER\n");
char *result = run_all_tests();
printf("\n");
if (result != 0) {

View file

@ -41,24 +41,24 @@ static char* test_non_empty() {
peek_file_reader_t* pfr = pfr_alloc(pbr, 7);
pfr_dump(pfr); mu_assert_lf(pfr_peek_char(pfr) == 'a');
pfr_dump(pfr); mu_assert_lf(pfr_read_char(pfr) == 'a');
pfr_dump(pfr); mu_assert_lf(pfr_peek_char(pfr) == 'b');
pfr_dump(pfr); mu_assert_lf(pfr_read_char(pfr) == 'b');
pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == 'a');
pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == 'a');
pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == 'b');
pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == 'b');
pfr_dump(pfr); mu_assert_lf(pfr_peek_char(pfr) == ',');
pfr_dump(pfr); mu_assert_lf(pfr_peek_char(pfr) == ',');
pfr_dump(pfr); mu_assert_lf(pfr_read_char(pfr) == ',');
pfr_dump(pfr); pfr_buffer_by(pfr, 5);
pfr_dump(pfr); pfr_advance_by(pfr, 5);
pfr_dump(pfr); mu_assert_lf(pfr_read_char(pfr) == '2');
pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == ',');
pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == ',');
pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == ',');
pfr_print(pfr); pfr_buffer_by(pfr, 5);
pfr_print(pfr); pfr_advance_by(pfr, 5);
pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == '2');
pfr_dump(pfr); mu_assert_lf(pfr_peek_char(pfr) == '3');
pfr_dump(pfr); mu_assert_lf(pfr_peek_char(pfr) == '3');
pfr_dump(pfr); mu_assert_lf(pfr_read_char(pfr) == '3');
pfr_dump(pfr); pfr_buffer_by(pfr, 5);
pfr_dump(pfr); pfr_advance_by(pfr, 5);
pfr_dump(pfr); mu_assert_lf(pfr_read_char(pfr) == '\n');
pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == '3');
pfr_print(pfr); mu_assert_lf(pfr_peek_char(pfr) == '3');
pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == '3');
pfr_print(pfr); pfr_buffer_by(pfr, 5);
pfr_print(pfr); pfr_advance_by(pfr, 5);
pfr_print(pfr); mu_assert_lf(pfr_read_char(pfr) == '\n');
pbr->pclose_func(pbr);
pfr_free(pfr);
@ -74,6 +74,7 @@ static char * run_all_tests() {
}
int main(int argc, char **argv) {
printf("TEST_PEEK_FILE_READER ENTER\n");
char *result = run_all_tests();
printf("\n");
if (result != 0) {

View file

@ -227,10 +227,10 @@ int mlr_string_pair_hash_func(char* str1, char* str2) {
}
// ----------------------------------------------------------------
char* mlr_get_line(FILE* input_stream, char rs) {
char* mlr_get_line(FILE* input_stream, char irs) {
char* line = NULL;
size_t linecap = 0;
ssize_t linelen = getdelim(&line, &linecap, rs, input_stream);
ssize_t linelen = getdelim(&line, &linecap, irs, input_stream);
if (linelen <= 0) {
return NULL;
}

View file

@ -58,7 +58,7 @@ int mlr_string_hash_func(char *str);
int mlr_string_pair_hash_func(char* str1, char* str2);
// xxx cmt mem mgt
char* mlr_get_line(FILE* input_stream, char rs);
char* mlr_get_line(FILE* input_stream, char irs);
// portable timegm replacement
time_t mlr_timegm (struct tm *tm);

View file

@ -58,10 +58,10 @@ static char * all_tests() {
}
int main(int argc, char **argv) {
printf("TEST_MLRUTIL ENTER\n");
char *result = all_tests();
printf("\n");
if (result != 0) {
//printf("%s\n", result);
printf("Not all unit tests passed\n");
}
else {

View file

@ -78,10 +78,10 @@ static char * all_tests() {
}
int main(int argc, char **argv) {
printf("TEST_STRING_BUILDER ENTER\n");
char *result = all_tests();
printf("\n");
if (result != 0) {
//printf("%s\n", result);
printf("Not all unit tests passed\n");
}
else {

View file

@ -30,12 +30,12 @@ typedef struct _mapper_join_opts_t {
// These allow the joiner to have its own different format/delimiter for
// the left-file:
char* input_file_format;
char irs;
char ifs;
char ips;
char* irs;
char* ifs;
char* ips;
int allow_repeat_ifs;
int allow_repeat_ips;
char* ifmt;
char* ifile_fmt;
int use_mmap_for_read;
} mapper_join_opts_t;
@ -237,12 +237,12 @@ static void mapper_join_free(void* pvstate) {
static void merge_options(mapper_join_opts_t* popts) {
if (popts->input_file_format == NULL)
popts->input_file_format = MLR_GLOBALS.popts->ifmt;
if (popts->irs == OPTION_UNSPECIFIED)
popts->input_file_format = MLR_GLOBALS.popts->ifile_fmt;
if (popts->irs == NULL)
popts->irs = MLR_GLOBALS.popts->irs;
if (popts->ifs == OPTION_UNSPECIFIED)
if (popts->ifs == NULL)
popts->ifs = MLR_GLOBALS.popts->ifs;
if (popts->ips == OPTION_UNSPECIFIED)
if (popts->ips == NULL)
popts->ips = MLR_GLOBALS.popts->ips;
if (popts->allow_repeat_ifs == OPTION_UNSPECIFIED)
popts->allow_repeat_ifs = MLR_GLOBALS.popts->allow_repeat_ifs;
@ -360,9 +360,9 @@ static mapper_t* mapper_join_parse_cli(int* pargi, int argc, char** argv) {
popts->emit_right_unpairables = FALSE;
popts->input_file_format = NULL;
popts->irs = OPTION_UNSPECIFIED;
popts->ifs = OPTION_UNSPECIFIED;
popts->ips = OPTION_UNSPECIFIED;
popts->irs = NULL;
popts->ifs = NULL;
popts->ips = NULL;
popts->allow_repeat_ifs = OPTION_UNSPECIFIED;
popts->allow_repeat_ips = OPTION_UNSPECIFIED;
popts->use_mmap_for_read = OPTION_UNSPECIFIED;
@ -370,25 +370,25 @@ static mapper_t* mapper_join_parse_cli(int* pargi, int argc, char** argv) {
char* verb = argv[(*pargi)++];
ap_state_t* pstate = ap_alloc();
ap_define_string_flag(pstate, "-f", &popts->left_file_name);
ap_define_string_list_flag(pstate, "-j", &popts->poutput_join_field_names);
ap_define_string_list_flag(pstate, "-l", &popts->pleft_join_field_names);
ap_define_string_list_flag(pstate, "-r", &popts->pright_join_field_names);
ap_define_string_flag(pstate, "--lp", &popts->left_prefix);
ap_define_string_flag(pstate, "--rp", &popts->right_prefix);
ap_define_false_flag(pstate, "--np", &popts->emit_pairables);
ap_define_true_flag(pstate, "--ul", &popts->emit_left_unpairables);
ap_define_true_flag(pstate, "--ur", &popts->emit_right_unpairables);
ap_define_true_flag(pstate, "-u", &popts->allow_unsorted_input);
ap_define_string_flag(pstate, "-f", &popts->left_file_name);
ap_define_string_list_flag(pstate, "-j", &popts->poutput_join_field_names);
ap_define_string_list_flag(pstate, "-l", &popts->pleft_join_field_names);
ap_define_string_list_flag(pstate, "-r", &popts->pright_join_field_names);
ap_define_string_flag(pstate, "--lp", &popts->left_prefix);
ap_define_string_flag(pstate, "--rp", &popts->right_prefix);
ap_define_false_flag(pstate, "--np", &popts->emit_pairables);
ap_define_true_flag(pstate, "--ul", &popts->emit_left_unpairables);
ap_define_true_flag(pstate, "--ur", &popts->emit_right_unpairables);
ap_define_true_flag(pstate, "-u", &popts->allow_unsorted_input);
ap_define_string_flag(pstate, "-i", &popts->input_file_format);
ap_define_char_flag(pstate, "--irs", &popts->irs);
ap_define_char_flag(pstate, "--ifs", &popts->ifs);
ap_define_char_flag(pstate, "--ips", &popts->ips);
ap_define_true_flag(pstate, "--repifs", &popts->allow_repeat_ifs);
ap_define_true_flag(pstate, "--repips", &popts->allow_repeat_ips);
ap_define_true_flag(pstate, "--use-mmap", &popts->use_mmap_for_read);
ap_define_false_flag(pstate, "--no-mmap", &popts->use_mmap_for_read);
ap_define_string_flag(pstate, "-i", &popts->input_file_format);
ap_define_string_flag(pstate, "--irs", &popts->irs);
ap_define_string_flag(pstate, "--ifs", &popts->ifs);
ap_define_string_flag(pstate, "--ips", &popts->ips);
ap_define_true_flag(pstate, "--repifs", &popts->allow_repeat_ifs);
ap_define_true_flag(pstate, "--repips", &popts->allow_repeat_ips);
ap_define_true_flag(pstate, "--use-mmap", &popts->use_mmap_for_read);
ap_define_false_flag(pstate, "--no-mmap", &popts->use_mmap_for_read);
if (!ap_parse(pstate, verb, pargi, argc, argv)) {
mapper_join_usage(argv[0], verb);

View file

@ -1,9 +1,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef MLR_USE_MCHECK
#include <mcheck.h>
#endif
#include "cli/mlrcli.h"
#include "lib/mlrutil.h"
@ -16,13 +13,6 @@
#include "stream/stream.h"
int main(int argc, char** argv) {
#ifdef MLR_USE_MCHECK
if (mcheck(NULL) != 0) {
fprintf(stderr, "Could not set up mcheck\n");
exit(1);
}
fprintf(stderr, "Set up mcheck\n");
#endif
mlr_global_init(argv[0], NULL, NULL);
cli_opts_t* popts = parse_command_line(argc, argv);
mlr_global_init(argv[0], popts->ofmt, popts);

View file

@ -13,8 +13,8 @@ static void quote_numeric_output_func(FILE* fp, char* string, char* ors, char* o
typedef struct _lrec_writer_csv_state_t {
int onr;
char *ors; // xxx char -> char*
char *ofs; // xxx char -> char*
char *ors;
char *ofs;
int orslen;
int ofslen;
quoted_output_func_t* pquoted_output_func;
@ -78,15 +78,13 @@ static void lrec_writer_csv_free(void* pvstate) {
}
}
lrec_writer_t* lrec_writer_csv_alloc(char ors, char ofs, int oquoting) {
lrec_writer_t* lrec_writer_csv_alloc(char* ors, char* ofs, int oquoting) {
lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t));
lrec_writer_csv_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_csv_state_t));
pstate->onr = 0;
//pstate->ors = ors;
//pstate->ofs = ofs;
pstate->ors = "\r\n"; // xxx temp
pstate->ofs = ","; // xxx temp
pstate->ors = ors;
pstate->ofs = ofs;
pstate->orslen = strlen(pstate->ors);
pstate->ofslen = strlen(pstate->ofs);

View file

@ -4,9 +4,9 @@
#include "output/lrec_writers.h"
typedef struct _lrec_writer_csvlite_state_t {
int onr;
char ors;
char ofs;
int onr;
char* ors;
char* ofs;
long long num_header_lines_output;
slls_t* plast_header_output;
} lrec_writer_csvlite_state_t;
@ -18,8 +18,8 @@ static void lrec_writer_csvlite_process(FILE* output_stream, lrec_t* prec, void*
if (prec == NULL)
return;
lrec_writer_csvlite_state_t* pstate = pvstate;
char ors = pstate->ors;
char ofs = pstate->ofs;
char* ors = pstate->ors;
char* ofs = pstate->ofs;
if (pstate->plast_header_output != NULL) {
// xxx make a fcn to compare these w/o copy: put it in mixutil.
@ -27,7 +27,7 @@ static void lrec_writer_csvlite_process(FILE* output_stream, lrec_t* prec, void*
slls_free(pstate->plast_header_output);
pstate->plast_header_output = NULL;
if (pstate->num_header_lines_output > 0LL)
fputc(ors, output_stream);
fputs(ors, output_stream);
}
}
@ -35,11 +35,11 @@ static void lrec_writer_csvlite_process(FILE* output_stream, lrec_t* prec, void*
int nf = 0;
for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
if (nf > 0)
fputc(ofs, output_stream);
fputs(ofs, output_stream);
fputs(pe->key, output_stream);
nf++;
}
fputc(ors, output_stream);
fputs(ors, output_stream);
pstate->plast_header_output = mlr_copy_keys_from_record(prec);
pstate->num_header_lines_output++;
}
@ -47,11 +47,11 @@ static void lrec_writer_csvlite_process(FILE* output_stream, lrec_t* prec, void*
int nf = 0;
for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
if (nf > 0)
fputc(ofs, output_stream);
fputs(ofs, output_stream);
fputs(pe->value, output_stream);
nf++;
}
fputc(ors, output_stream);
fputs(ors, output_stream);
pstate->onr++;
lrec_free(prec); // xxx cmt mem-mgmt
@ -65,7 +65,7 @@ static void lrec_writer_csvlite_free(void* pvstate) {
}
}
lrec_writer_t* lrec_writer_csvlite_alloc(char ors, char ofs) {
lrec_writer_t* lrec_writer_csvlite_alloc(char* ors, char* ofs) {
lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t));
lrec_writer_csvlite_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_csvlite_state_t));

View file

@ -3,9 +3,9 @@
#include "output/lrec_writers.h"
typedef struct _lrec_writer_dkvp_state_t {
char rs;
char fs;
char ps;
char* ors;
char* ofs;
char* ops;
} lrec_writer_dkvp_state_t;
// ----------------------------------------------------------------
@ -13,33 +13,33 @@ static void lrec_writer_dkvp_process(FILE* output_stream, lrec_t* prec, void* pv
if (prec == NULL)
return;
lrec_writer_dkvp_state_t* pstate = pvstate;
char rs = pstate->rs;
char fs = pstate->fs;
char ps = pstate->ps;
char* ors = pstate->ors;
char* ofs = pstate->ofs;
char* ops = pstate->ops;
int nf = 0;
for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
if (nf > 0)
fputc(fs, output_stream);
fputs(ofs, output_stream);
fputs(pe->key, output_stream);
fputc(ps, output_stream);
fputs(ops, output_stream);
fputs(pe->value, output_stream);
nf++;
}
fputc(rs, output_stream);
fputs(ors, output_stream);
lrec_free(prec); // xxx cmt mem-mgmt
}
static void lrec_writer_dkvp_free(void* pvstate) {
}
lrec_writer_t* lrec_writer_dkvp_alloc(char rs, char fs, char ps) {
lrec_writer_t* lrec_writer_dkvp_alloc(char* ors, char* ofs, char* ops) {
lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t));
lrec_writer_dkvp_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_dkvp_state_t));
pstate->rs = rs;
pstate->fs = fs;
pstate->ps = ps;
pstate->ors = ors;
pstate->ofs = ofs;
pstate->ops = ops;
plrec_writer->pvstate = (void*)pstate;
plrec_writer->pprocess_func = &lrec_writer_dkvp_process;

View file

@ -3,8 +3,8 @@
#include "output/lrec_writers.h"
typedef struct _lrec_writer_nidx_state_t {
char rs;
char fs;
char* ors;
char* ofs;
} lrec_writer_nidx_state_t;
// ----------------------------------------------------------------
@ -12,29 +12,29 @@ static void lrec_writer_nidx_process(FILE* output_stream, lrec_t* prec, void* pv
if (prec == NULL)
return;
lrec_writer_nidx_state_t* pstate = pvstate;
char rs = pstate->rs;
char fs = pstate->fs;
char* ors = pstate->ors;
char* ofs = pstate->ofs;
int nf = 0;
for (lrece_t* pe = prec->phead; pe != NULL; pe = pe->pnext) {
if (nf > 0)
fputc(fs, output_stream);
fputs(ofs, output_stream);
fputs(pe->value, output_stream);
nf++;
}
fputc(rs, output_stream);
fputs(ors, output_stream);
lrec_free(prec); // xxx cmt mem-mgmt
}
static void lrec_writer_nidx_free(void* pvstate) {
}
lrec_writer_t* lrec_writer_nidx_alloc(char rs, char fs) {
lrec_writer_t* lrec_writer_nidx_alloc(char* ors, char* ofs) {
lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t));
lrec_writer_nidx_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_nidx_state_t));
pstate->rs = rs;
pstate->fs = fs;
pstate->ors = ors;
pstate->ofs = ofs;
plrec_writer->pvstate = (void*)pstate;
plrec_writer->pprocess_func = &lrec_writer_nidx_process;

View file

@ -11,9 +11,11 @@ typedef struct _lrec_writer_pprint_state_t {
slls_t* pprev_keys;
int left_align;
long long num_blocks_written;
char* ors;
char* ofs;
} lrec_writer_pprint_state_t;
static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, int left_align);
static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, char* ors, char* ofs, int left_align);
// ----------------------------------------------------------------
static void lrec_writer_pprint_process(FILE* output_stream, lrec_t* prec, void* pvstate) {
@ -31,8 +33,8 @@ static void lrec_writer_pprint_process(FILE* output_stream, lrec_t* prec, void*
if (drain) {
if (pstate->num_blocks_written > 0LL) // xxx cmt
fputc('\n', output_stream);
print_and_free_record_list(pstate->precords, output_stream, pstate->left_align);
fputs(pstate->ors, output_stream);
print_and_free_record_list(pstate->precords, output_stream, pstate->ors, pstate->ofs, pstate->left_align);
if (pstate->pprev_keys != NULL) {
slls_free(pstate->pprev_keys);
pstate->pprev_keys = NULL;
@ -48,7 +50,7 @@ static void lrec_writer_pprint_process(FILE* output_stream, lrec_t* prec, void*
}
// ----------------------------------------------------------------
static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, int left_align) {
static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, char* ors, char* ofs, int left_align) {
if (precords->length == 0)
return;
lrec_t* prec1 = precords->phead->pvdata;
@ -95,7 +97,7 @@ static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, in
fprintf(output_stream, "%s", pe->key);
}
}
fputc('\n', output_stream);
fputs(ors, output_stream);
}
j = 0;
@ -122,7 +124,7 @@ static void print_and_free_record_list(sllv_t* precords, FILE* output_stream, in
fprintf(output_stream, "%s", value);
}
}
fputc('\n', output_stream);
fputs(ors, output_stream);
lrec_free(prec); // xxx cmt mem-mgmt
}
@ -143,12 +145,14 @@ static void lrec_writer_pprint_free(void* pvstate) {
}
}
lrec_writer_t* lrec_writer_pprint_alloc(int left_align) {
lrec_writer_t* lrec_writer_pprint_alloc(char* ors, char* ofs, int left_align) {
lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t));
lrec_writer_pprint_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_pprint_state_t));
pstate->precords = sllv_alloc();
pstate->pprev_keys = NULL;
pstate->ors = ors;
pstate->ofs = ofs;
pstate->left_align = left_align;
pstate->num_blocks_written = 0LL;

View file

@ -4,6 +4,8 @@
#include "output/lrec_writers.h"
typedef struct _lrec_writer_xtab_state_t {
char* ors;
char* ofs;
long long record_count;
} lrec_writer_xtab_state_t;
@ -13,7 +15,7 @@ static void lrec_writer_xtab_process(FILE* output_stream, lrec_t* prec, void* pv
return;
lrec_writer_xtab_state_t* pstate = pvstate;
if (pstate->record_count > 0LL)
fprintf(output_stream, "\n");
fputs(pstate->ors, output_stream);
pstate->record_count++;
int max_key_width = 1;
@ -28,8 +30,8 @@ static void lrec_writer_xtab_process(FILE* output_stream, lrec_t* prec, void* pv
fprintf(output_stream, "%s", pe->key);
int d = max_key_width - strlen_for_utf8_display(pe->key);
for (int i = 0; i < d; i++)
fputc(' ', output_stream);
fprintf(output_stream, " %s\n", pe->value);
fputs(pstate->ofs, output_stream);
fprintf(output_stream, "%s%s%s", pstate->ofs, pe->value, pstate->ors);
}
lrec_free(prec); // xxx cmt mem-mgmt
}
@ -37,10 +39,12 @@ static void lrec_writer_xtab_process(FILE* output_stream, lrec_t* prec, void* pv
static void lrec_writer_xtab_free(void* pvstate) {
}
lrec_writer_t* lrec_writer_xtab_alloc() {
lrec_writer_t* lrec_writer_xtab_alloc(char* ors, char* ofs) {
lrec_writer_t* plrec_writer = mlr_malloc_or_die(sizeof(lrec_writer_t));
lrec_writer_xtab_state_t* pstate = mlr_malloc_or_die(sizeof(lrec_writer_xtab_state_t));
pstate->ors = ors;
pstate->ofs = ofs;
pstate->record_count = 0LL;
plrec_writer->pvstate = pstate;

View file

@ -2,11 +2,11 @@
#define LREC_WRITERS_H
#include "output/lrec_writer.h"
lrec_writer_t* lrec_writer_csv_alloc(char rs, char fs, int oquoting);
lrec_writer_t* lrec_writer_csvlite_alloc(char rs, char fs);
lrec_writer_t* lrec_writer_dkvp_alloc(char rs, char fs, char ps);
lrec_writer_t* lrec_writer_nidx_alloc(char rs, char fs);
lrec_writer_t* lrec_writer_pprint_alloc(int left_align);
lrec_writer_t* lrec_writer_xtab_alloc();
lrec_writer_t* lrec_writer_csv_alloc(char* ors, char* ofs, int oquoting);
lrec_writer_t* lrec_writer_csvlite_alloc(char* ors, char* ofs);
lrec_writer_t* lrec_writer_dkvp_alloc(char* ors, char* ofs, char* ops);
lrec_writer_t* lrec_writer_nidx_alloc(char* ors, char* ofs);
lrec_writer_t* lrec_writer_pprint_alloc(char* ors, char*ofs, int left_align);
lrec_writer_t* lrec_writer_xtab_alloc(char* ors, char* ofs);
#endif // LREC_WRITERS_H

5
c/test/README.md Normal file
View file

@ -0,0 +1,5 @@
There are two classes of testing for Miller:
* C source-file names starting with `test_` use MinUnit to **unit-test** various subsystems of interest. These are separate executables built and run by the build framework.
* `test/run` runs the main `mlr` executable with canned inputs, comparing actual to canned outputs, to **regression-test** Miller's end-to-end operation.

View file

@ -2208,3 +2208,11 @@ a,b,c
4,5,6
x,y"yy,z
mlr --csv --ifs semicolon --ofs pipe --irs lf --ors lflf cut -x -f b ./test/input/rfc-csv/modify-defaults.csv
a|c
1|3
4|6

View file

@ -0,0 +1,3 @@
a;b;c
1;2;3
4;;6
1 a b c
2 1 2 3
3 4 6

View file

@ -462,6 +462,7 @@ run_mlr --csv cat $indir/rfc-csv/quoted-comma-truncated.csv
run_mlr --csv cat $indir/rfc-csv/quoted-crlf.csv
run_mlr --csv cat $indir/rfc-csv/quoted-crlf-truncated.csv
run_mlr --csv cat $indir/rfc-csv/simple-truncated.csv $indir/rfc-csv/simple.csv
run_mlr --csv --ifs semicolon --ofs pipe --irs lf --ors lflf cut -x -f b $indir/rfc-csv/modify-defaults.csv
# ================================================================
# A key feature of this regression script is that it can be invoked from any

View file

@ -1,45 +1,80 @@
================================================================
BUGFIXES
! memory leak in csv reader! careful about slls data, and do not use lrec_put_no_free
-> heap-fragging?
-> redo inline-pasting but this time correctly weight the fragging effect
-> denormalize :( pointer-copying is fine for string/mmap-backed cases in the absence of dquotes;
no struping needed *at all*.
:D
================================================================
TOP OF LIST
* v2.1.0:
o perf
o rs/fs -> csv
o multichar rs/fs/ps for all formats
o optimize csv read perf
o double-quote feature -> dkvp
----------------------------------------------------------------
MAJOR: autoconfig
o make a profiler proggy-pair for getline vs. psb/pkr for simple cat
o rs/fs/ps from char to char* throughout
o parameterize csv rs/fs
o implement mmap-backed psb/pkr via vptr intf and profile that
o RFC "there may be a header" -- ?!? use nidx-style integer-numbered columns?? --no-header?
o DKVP double-quote support
i still need separate csvlite/csv on output since the former tolerates heterogeneity
----------------------------------------------------------------
MAJOR: multi-char separators for file formats other than CSV
k oxs is functionally done
* need backslash-handling/parsing ... at least, \r \n \t. and, into online help.
! ixs:
o ips & ifs: needs *p==ixs with strneq(p, ixs); also double-null poke (sos&eos)
o irs for mmap: same
o irs for stdio: it all comes down to getdelim.
! so focus on getline perf.
! maybe best idea is to re-impl getdelim with multichar irs.
- rework csv reader to look more like csvlite (which is performant)?
! temporary option is getdelim with final char of the multichar irs; strcmp backwards;
usually get it right; occasionally have to strcat/memcpy multiple such. this is
gross so don't do it unless multichar-getdelim doesn't pan out.
----------------------------------------------------------------
MAJOR: csv mem-leak/read-perf
* current option runs faster w/o free, apparently due to heap-fragging
o memory leak in csv reader! careful about slls data, and do not use lrec_put_no_free
o redo inline-pasting but this time correctly weight the fragging effect
* for stdio, needs some thought ...
* ... but for mmap, it's almost always not necessary to strdup at all:
only on escaped-double-quote case.
* denormalize the pbr & make stdio pbr & ptr-backed (mmap,UT-string) pbr.
* code-dup (yes, sadly!) the CSV reader into two & do strups in stdio
but lrec_put w/ !LREC_FREE_VALUE for ptr-backed.
* or *maybe* pbr retent/free-flags for string/mmap w/o denorm, but only
if it's both elegant and fast
! experimental/getlines.c shows that even without the heap-fragging
issue, pfr+psb is 3.5x slower than getdelim. again suggesting
multi-char-terminated getdelim might be the best line of approach.
----------------------------------------------------------------
MINOR
* define dkvp, nidx, etc @ cover x 2
? dkvp quoting ... wait until after the mmap/perf split. else, very undesirable
performance regression.
* go back and re-apply ctype/isprint portability things to new spots
* more dead-code mains ... lrec-eval; what else?
* dsls/ build outside of pwd? or just lemon $(absdir)/filenamegoeshere.y?
* configure w/o autotools? likewise manpage. etc. multiple build levels.
b mlr faq page
* --mmap @ mlr -h
* ctype ff @ bld.out
* platform os/ccomp list to mlrdoc
* -h vs. usage : stdout vs. stderr
* pprint join?
* header-length data mismatch et. al: file/line
* make an updated dependency list, esp. in light of a2x et al.
* probably its own mlrdoc page ... at least, highlighted in build page
* trie-parse to-do:
o make a power-of-two ring buffer for pfr & trie
* autoconf
* .deb
* homebrew
----------------------------------------------------------------
little:
* RFC "there may be a header" -- ?!? use nidx-style integer-numbered columns?? --no-header?
* -Wall -Wextra -Wpedantic-?? Werror=unused-but-set-variable?
* --mmap into online help ...

View file

@ -2,7 +2,7 @@ POKI_PUT_TOC_HERE
<h1>File-format awareness</h1>
Miller respects CSV headers. If you do <tt>mlr --csv-input cat *.csv</tt> then the header line is written once:
Miller respects CSV headers. If you do <tt>mlr --csv cat *.csv</tt> then the header line is written once:
<table><tr>
<td>

View file

@ -9,14 +9,12 @@ changes of field names within a single data stream.
<p/> Miller has record separator <tt>RS</tt> and field separator <tt>FS</tt>,
just as <tt>awk</tt> does. For TSV, use <tt>--fs tab</tt>; to convert TSV to
CSV, use <tt>--ifs tab --ofs ,</tt> etc. (See also
CSV, use <tt>--ifs tab --ofs comma</tt>, etc. (See also
POKI_PUT_LINK_FOR_PAGE(reference.html)HERE.)
<p/>The <tt>--csvlite</tt> option supports programmable single-byte field and record separators,
e.g. you can do TSV. Meanwhile <tt>--csv</tt> supports RFC-4180 CSV (<a href="https://tools.ietf.org/html/rfc4180">
https://tools.ietf.org/html/rfc4180</a>).
For more information about the current status of CSV support in Miller, please see
<a href="https://github.com/johnkerl/miller/releases/tag/v2.0.0">https://github.com/johnkerl/miller/releases/tag/v2.0.0</a>.
<p/>Miller&rsquo;s <tt>--csv</tt> flag supports RFC-4180 CSV (<a href="https://tools.ietf.org/html/rfc4180">
https://tools.ietf.org/html/rfc4180</a>). This includes CRLF line-terminators by default, regardless
of platform. You can use <tt>mlr --csv --rs lf</tt> for native Un*x (LF-terminated) CSV files.
<h1>Pretty-printed</h1>
Miller&rsquo;s pretty-print format is like CSV, but column-aligned. For example, compare

View file

@ -8,7 +8,7 @@ We think of CSV tables as rectangular: if there are 17 columns in the header the
<h2>CSV and pretty-print</h2>
Miller simply prints a newline and a new header when there is a schema change. When there is no schema change, you get standard CSV as a special case. Likewise, Miller reads heterogeneous CSV or pretty-print input the same way. For example:
Miller simply prints a newline and a new header when there is a schema change. When there is no schema change, you get CSV per se as a special case. Likewise, Miller reads heterogeneous CSV or pretty-print input the same way. The difference between CSV and CSV-lite is that the former is RFC4180-compliant, while the latter readily handles heterogeneous data (which is non-compliant). For example:
<table><tr><td>
POKI_RUN_COMMAND{{cat data/het.dkvp}}HERE

View file

@ -6,11 +6,11 @@
announcment</a>, by far the biggest asks were RFC-4180-compliant CSV, and
packaging (Homebrew, <tt>.deb</tt>).
<li/> Miller&rsquo;s record, field, and pair separators can only be single
<li/> Miller&rsquo;s record, field, and pair separators can be single
characters (e.g. newline, comma, equals sign), optionally allowing repeats on
input (e.g. multiple spaces treated as one). It would be nice if strings were
supported, e.g. <tt>"\n\n"</tt> paragraph-oriented record separation, or mix of
space and tab for field separation.
input (e.g. multiple spaces treated as one). Multi-character separator strings
(e.g. double-linefeed) are supported on input and output for CSV, and on output
for other formats. This is a work in progress.
<li/> String-oriented functions such as <tt>sub</tt>, and Miller&rsquo;s
<tt>filter</tt>, could be made far more powerful if a regular-expression

View file

@ -103,7 +103,7 @@ Miller commands were run with pretty-print-tabular output format.
<a id="File-format_awareness"/><h1>File-format awareness</h1>
Miller respects CSV headers. If you do <tt>mlr --csv-input cat *.csv</tt> then the header line is written once:
Miller respects CSV headers. If you do <tt>mlr --csv cat *.csv</tt> then the header line is written once:
<table><tr>
<td>

View file

@ -112,14 +112,12 @@ changes of field names within a single data stream.
<p/> Miller has record separator <tt>RS</tt> and field separator <tt>FS</tt>,
just as <tt>awk</tt> does. For TSV, use <tt>--fs tab</tt>; to convert TSV to
CSV, use <tt>--ifs tab --ofs ,</tt> etc. (See also
CSV, use <tt>--ifs tab --ofs comma</tt>, etc. (See also
<a href="reference.html">Reference</a>.)
<p/>The <tt>--csvlite</tt> option supports programmable single-byte field and record separators,
e.g. you can do TSV. Meanwhile <tt>--csv</tt> supports RFC-4180 CSV (<a href="https://tools.ietf.org/html/rfc4180">
https://tools.ietf.org/html/rfc4180</a>).
For more information about the current status of CSV support in Miller, please see
<a href="https://github.com/johnkerl/miller/releases/tag/v2.0.0">https://github.com/johnkerl/miller/releases/tag/v2.0.0</a>.
<p/>Miller&rsquo;s <tt>--csv</tt> flag supports RFC-4180 CSV (<a href="https://tools.ietf.org/html/rfc4180">
https://tools.ietf.org/html/rfc4180</a>). This includes CRLF line-terminators by default, regardless
of platform. You can use <tt>mlr --csv --rs lf</tt> for native Un*x (LF-terminated) CSV files.
<a id="Pretty-printed"/><h1>Pretty-printed</h1>
Miller&rsquo;s pretty-print format is like CSV, but column-aligned. For example, compare

View file

@ -110,7 +110,7 @@ We think of CSV tables as rectangular: if there are 17 columns in the header the
<a id="CSV_and_pretty-print"/><h2>CSV and pretty-print</h2>
Miller simply prints a newline and a new header when there is a schema change. When there is no schema change, you get standard CSV as a special case. Likewise, Miller reads heterogeneous CSV or pretty-print input the same way. For example:
Miller simply prints a newline and a new header when there is a schema change. When there is no schema change, you get CSV per se as a special case. Likewise, Miller reads heterogeneous CSV or pretty-print input the same way. The difference between CSV and CSV-lite is that the former is RFC4180-compliant, while the latter readily handles heterogeneous data (which is non-compliant). For example:
<table><tr><td>
<p/>

View file

@ -249,19 +249,22 @@ Data-format options, for input, output, or both:
--xtab --ixtab --oxtab Pretty-printed vertical-tabular
-p is a keystroke-saver for --nidx --fs space --repifs
Separator options, for input, output, or both:
--rs --irs --ors Record separators, defaulting to newline
--fs --ifs --ofs --repifs Field separators, defaulting to ","
--ps --ips --ops Pair separators, defaulting to "="
Notes (as of Miller v2.0.0):
* RS/FS/PS are used for DKVP, NIDX, and CSVLITE formats where they must be single-character.
* For CSV, PPRINT, and XTAB formats, RS/FS/PS command-line options are ignored.
--rs --irs --ors Record separators, e.g. newline
--fs --ifs --ofs --repifs Field separators, e.g. comma
--ps --ips --ops Pair separators, e.g. equals sign
Notes (as of Miller v2.1.4):
* IRS,IFS,IPS,ORS,OFS,OPS are specifiable for all file formats.
* IRS,IFS,IPS may be multi-character for CSV; they must be single-character for other formats.
The latter restriction will be lifted in a near-future release.
* ORS,OFS,OPS may be multi-character for all formats.
* DKVP, NIDX, CSVLITE, PPRINT, and XTAB formats are intended to handle platform-native text data.
In particular, this means LF line-terminators on Linux/OSX.
In particular, this means LF line-terminators by default on Linux/OSX.
* CSV is intended to handle RFC-4180-compliant data.
In particular, this means it *only* handles CRLF line-terminators.
* This will change in v2.1.0, at which point there will be a (default-off) LF-termination option
for CSV, multi-char RS/FS/PS, and double-quote support for DKVP.
Double-quoting for CSV:
In particular, this means it uses CRLF line-terminators by default.
So, you can use "--csv --rs lf" for Linux-native CSV files.
* You can use "--fs '|'", "--ips :", etc., or any of the following names for separators:
cr crcr lf lflf crlf crlfcrlf tab space comma newline pipe slash colon semicolon equals
Double-quoting for CSV output:
--quote-all Wrap all fields in double quotes
--quote-none Do not wrap any fields in double quotes, even if they have OFS or ORS in them
--quote-minimal Wrap fields in double quotes only if they have OFS or ORS in them
@ -275,7 +278,7 @@ Other options:
Output of one verb may be chained as input to another using "then", e.g.
mlr stats1 -a min,mean,max -f flag,u,v -g color then sort -f color
Please see http://johnkerl.org/miller/doc and/or http://github.com/johnkerl/miller for more information.
This is Miller version &gt;= v2.1.1.
This is Miller version &gt;= v2.1.4.
</pre>
</div>
<p/>

View file

@ -101,11 +101,11 @@ Miller commands were run with pretty-print-tabular output format.
announcment</a>, by far the biggest asks were RFC-4180-compliant CSV, and
packaging (Homebrew, <tt>.deb</tt>).
<li/> Miller&rsquo;s record, field, and pair separators can only be single
<li/> Miller&rsquo;s record, field, and pair separators can be single
characters (e.g. newline, comma, equals sign), optionally allowing repeats on
input (e.g. multiple spaces treated as one). It would be nice if strings were
supported, e.g. <tt>"\n\n"</tt> paragraph-oriented record separation, or mix of
space and tab for field separation.
input (e.g. multiple spaces treated as one). Multi-character separator strings
(e.g. double-linefeed) are supported on input and output for CSV, and on output
for other formats. This is a work in progress.
<li/> String-oriented functions such as <tt>sub</tt>, and Miller&rsquo;s
<tt>filter</tt>, could be made far more powerful if a regular-expression