From 75360dbd00e79d0d8916a30e49251672376708c8 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 1 Jun 2015 09:23:42 -0400 Subject: [PATCH] data-dir reorgs --- c/tools/gen0.c | 8 -- c/tools/mcountlines | 14 +++ c/tools/gen.c => data/generators/abixy.c | 2 - data/generators/compare-ols-pca.sh | 31 +++++++ data/generators/endless.c | 6 ++ data/generators/fuzz.rb | 14 +++ data/generators/plot-ols-pca.sh | 12 +++ .../generators/randuv-chomped.c | 3 +- language-comparisons/catm2.c | 91 +++++++++++++++++++ 9 files changed, 170 insertions(+), 11 deletions(-) delete mode 100755 c/tools/gen0.c rename c/tools/gen.c => data/generators/abixy.c (95%) create mode 100644 data/generators/compare-ols-pca.sh create mode 100755 data/generators/endless.c create mode 100755 data/generators/fuzz.rb create mode 100644 data/generators/plot-ols-pca.sh rename c/tools/gen-squares.c => data/generators/randuv-chomped.c (95%) create mode 100644 language-comparisons/catm2.c diff --git a/c/tools/gen0.c b/c/tools/gen0.c deleted file mode 100755 index 33b905a85..000000000 --- a/c/tools/gen0.c +++ /dev/null @@ -1,8 +0,0 @@ -#ifdef __GEN_MAIN__ -#include -int main(void) { - for (;;) - printf("a=1,b=2,c=3\n"); - return 0; -} -#endif // __GEN_MAIN__ diff --git a/c/tools/mcountlines b/c/tools/mcountlines index c9d1ef7c8..f208ce373 100755 --- a/c/tools/mcountlines +++ b/c/tools/mcountlines @@ -11,3 +11,17 @@ wc -l \ dsls/*.[ly] \ dsls/*wrapper*.[ch] \ | sort -n + +echo +wc -c \ + cli/*.[ch] \ + containers/*.[ch] \ + input/*.[ch] \ + lib/*.[ch] \ + mapping/*.[ch] \ + output/*.[ch] \ + stream/*.[ch] \ + dsls/*.[ly] \ + dsls/*wrapper*.[ch] \ + | sort -n | tail -n 5 + diff --git a/c/tools/gen.c b/data/generators/abixy.c similarity index 95% rename from c/tools/gen.c rename to data/generators/abixy.c index b86a5af16..435988d38 100755 --- a/c/tools/gen.c +++ b/data/generators/abixy.c @@ -1,4 +1,3 @@ -#ifdef __GEN_MAIN__ #include #include #include @@ -18,7 +17,6 @@ int main(void) { } return 0; } -#endif // __GEN_MAIN__ // a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533 // a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797 diff --git a/data/generators/compare-ols-pca.sh b/data/generators/compare-ols-pca.sh new file mode 100644 index 000000000..22219ffc0 --- /dev/null +++ b/data/generators/compare-ols-pca.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +for p in \ + -0.975 -0.950 -0.925 -0.900 \ + -0.875 -0.850 -0.825 -0.800 \ + -0.775 -0.750 -0.725 -0.700 \ + -0.675 -0.650 -0.625 -0.600 \ + -0.575 -0.550 -0.525 -0.500 \ + -0.475 -0.450 -0.425 -0.400 \ + -0.375 -0.350 -0.325 -0.300 \ + -0.275 -0.250 -0.225 -0.200 \ + -0.175 -0.150 -0.125 -0.100 \ + -0.075 -0.050 -0.025 -0.000 \ + 0.000 0.025 0.050 0.075 \ + 0.100 0.125 0.150 0.175 \ + 0.200 0.225 0.250 0.275 \ + 0.300 0.325 0.350 0.375 \ + 0.400 0.425 0.450 0.475 \ + 0.500 0.525 0.550 0.575 \ + 0.600 0.625 0.650 0.675 \ + 0.700 0.725 0.750 0.775 \ + 0.800 0.825 0.850 0.875 \ + 0.900 0.925 0.950 0.975 +do + for try in 1 2 3 4 5; do + #echo + #echo p $p + #a.out $p|mlr --oxtab stats2 -a linreg-ols,linreg-pca -f x,y + echo "p=$p,$(a.out $p | mlr stats2 -a linreg-ols,r2,linreg-pca -f x,y)" + done +done diff --git a/data/generators/endless.c b/data/generators/endless.c new file mode 100755 index 000000000..830f435c1 --- /dev/null +++ b/data/generators/endless.c @@ -0,0 +1,6 @@ +#include +int main(void) { + for (int c = 0; ; i++) + printf("a=1,b=2,c=%d\n", c); + return 0; +} diff --git a/data/generators/fuzz.rb b/data/generators/fuzz.rb new file mode 100755 index 000000000..4a61840ad --- /dev/null +++ b/data/generators/fuzz.rb @@ -0,0 +1,14 @@ +#!/usr/bin/env ruby + +alphabet = 'abcdefghijklmnopqrstuvwxyz0123456789~!#$%^&*()-_=+{}[];:"<>,./?' +nlines=50 + +alphabet = alphabet.split('') +nlines.times do + length = rand(40) + if rand < 0.1 + length = 0 + end + output = (1..length).to_a.collect{alphabet.sample}.join('') + puts output +end diff --git a/data/generators/plot-ols-pca.sh b/data/generators/plot-ols-pca.sh new file mode 100644 index 000000000..f2dd7df15 --- /dev/null +++ b/data/generators/plot-ols-pca.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +file="$1" + +# Set x_y_pca_m and x_y_pca_b as shell variables +mlr --ofs newline stats2 -a linreg-ols,linreg-pca -f x,y $file +eval $(mlr --ofs newline stats2 -a linreg-ols,linreg-pca -f x,y $file) + +# In addition to x and y, make a new yfit which is the line fit. Plot using your favorite tool. +mlr --onidx put "\$olsfit=($x_y_ols_m*\$x)+$x_y_ols_b;\$pcafit=($x_y_pca_m*\$x)+$x_y_pca_b" \ + then cut -x -f a,b,i $file \ + | pgr -p -ms 2 -title 'linreg example' -xmin -0.1 -xmax 1.1 -ymin -0.1 -ymax 1.1 -legend 'y yfit' diff --git a/c/tools/gen-squares.c b/data/generators/randuv-chomped.c similarity index 95% rename from c/tools/gen-squares.c rename to data/generators/randuv-chomped.c index b1c440f37..cd6d9f229 100755 --- a/c/tools/gen-squares.c +++ b/data/generators/randuv-chomped.c @@ -4,7 +4,8 @@ #include // Samples unit-uniform x,y pairs, conditioned on being in two subboxes the size -// of which is described by the "chomp" parameter. +// of which is described by the "chomp" parameter. Intended for compare/contrast +// of OLS and PCA linear-regression algorithms. // // 0 < chomp < 1: // diff --git a/language-comparisons/catm2.c b/language-comparisons/catm2.c new file mode 100644 index 000000000..eb8a85fd8 --- /dev/null +++ b/language-comparisons/catm2.c @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include +#include +#include + +typedef struct _file_reader_mmap_state_t { + char* sol; + char* eof; + int fd; +} file_reader_mmap_state_t; + +file_reader_mmap_state_t file_reader_mmap_open(char* file_name) { + file_reader_mmap_state_t state; + state.fd = open(file_name, O_RDONLY); + if (state.fd < 0) { + perror("open"); + exit(1); + } + struct stat stat; + if (fstat(state.fd, &stat) < 0) { + perror("fstat"); + exit(1); + } + state.sol = mmap(NULL, (size_t)stat.st_size, PROT_READ|PROT_WRITE, MAP_FILE|MAP_PRIVATE, state.fd, (off_t)0); + if (state.sol == MAP_FAILED) { + perror("mmap"); + exit(1); + } + state.eof = state.sol + stat.st_size; + return state; +} + +void file_reader_mmap_close(file_reader_mmap_state_t* pstate) { + if (close(pstate->fd) < 0) { + perror("close"); + exit(1); + } +} + +// ---------------------------------------------------------------- +static void emit(char* sol, char* eol, FILE* output_stream) { + size_t ntowrite = eol - sol; + size_t nwritten = fwrite(sol, 1, ntowrite, output_stream); + if (nwritten != ntowrite) { + perror("fwrite"); + exit(1); + } + fputc('\n', output_stream); +} + +// ---------------------------------------------------------------- +// xxx params/state: +// * ctor: char*file_name +// * reads: currptr, eofptr +// * dtor: int fd +static int do_stream(char* file_name) { + FILE* output_stream = stdout; + + file_reader_mmap_state_t state = file_reader_mmap_open(file_name); + + char* eol; + char* p = state.sol; + + while (p < state.eof) { + if (*p == '\n') { + *p = 0; + eol = p; + emit(state.sol, eol, output_stream); + p++; + state.sol = p; + } else { + p++; + } + } + + file_reader_mmap_close(&state); + + return 1; +} + +// ================================================================ +int main(int argc, char** argv) { + int ok = 1; + for (int argi = 1; argi < argc; argi++) { + ok = do_stream(argv[argi]); + } + return ok ? 0 : 1; +}