data-dir reorgs

This commit is contained in:
John Kerl 2015-06-01 09:23:42 -04:00
parent 80228e7222
commit 75360dbd00
9 changed files with 170 additions and 11 deletions

24
data/generators/abixy.c Executable file
View file

@ -0,0 +1,24 @@
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
int main(void) {
char* names[] = { "hat", "pan", "eks", "wye", "zee" };
int num_names = sizeof(names) / sizeof(names[0]);
srand(time(0) ^ getpid());
for (int i = 0; ; i++) {
int ai = rand() % num_names;
int bi = rand() % num_names;
char* a = names[ai];
char* b = names[bi];
double x = (double)rand() / (double)RAND_MAX;
double y = (double)rand() / (double)RAND_MAX;
printf("a=%s,b=%s,i=%d,x=%.18lf,y=%.18lf\n", a, b, i, x, y);
}
return 0;
}
// a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
// a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
// a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
// a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463

View file

@ -0,0 +1,31 @@
#!/bin/bash
for p in \
-0.975 -0.950 -0.925 -0.900 \
-0.875 -0.850 -0.825 -0.800 \
-0.775 -0.750 -0.725 -0.700 \
-0.675 -0.650 -0.625 -0.600 \
-0.575 -0.550 -0.525 -0.500 \
-0.475 -0.450 -0.425 -0.400 \
-0.375 -0.350 -0.325 -0.300 \
-0.275 -0.250 -0.225 -0.200 \
-0.175 -0.150 -0.125 -0.100 \
-0.075 -0.050 -0.025 -0.000 \
0.000 0.025 0.050 0.075 \
0.100 0.125 0.150 0.175 \
0.200 0.225 0.250 0.275 \
0.300 0.325 0.350 0.375 \
0.400 0.425 0.450 0.475 \
0.500 0.525 0.550 0.575 \
0.600 0.625 0.650 0.675 \
0.700 0.725 0.750 0.775 \
0.800 0.825 0.850 0.875 \
0.900 0.925 0.950 0.975
do
for try in 1 2 3 4 5; do
#echo
#echo p $p
#a.out $p|mlr --oxtab stats2 -a linreg-ols,linreg-pca -f x,y
echo "p=$p,$(a.out $p | mlr stats2 -a linreg-ols,r2,linreg-pca -f x,y)"
done
done

6
data/generators/endless.c Executable file
View file

@ -0,0 +1,6 @@
#include <stdio.h>
int main(void) {
for (int c = 0; ; i++)
printf("a=1,b=2,c=%d\n", c);
return 0;
}

14
data/generators/fuzz.rb Executable file
View file

@ -0,0 +1,14 @@
#!/usr/bin/env ruby
alphabet = 'abcdefghijklmnopqrstuvwxyz0123456789~!#$%^&*()-_=+{}[];:"<>,./?'
nlines=50
alphabet = alphabet.split('')
nlines.times do
length = rand(40)
if rand < 0.1
length = 0
end
output = (1..length).to_a.collect{alphabet.sample}.join('')
puts output
end

View file

@ -0,0 +1,12 @@
#!/bin/bash
file="$1"
# Set x_y_pca_m and x_y_pca_b as shell variables
mlr --ofs newline stats2 -a linreg-ols,linreg-pca -f x,y $file
eval $(mlr --ofs newline stats2 -a linreg-ols,linreg-pca -f x,y $file)
# In addition to x and y, make a new yfit which is the line fit. Plot using your favorite tool.
mlr --onidx put "\$olsfit=($x_y_ols_m*\$x)+$x_y_ols_b;\$pcafit=($x_y_pca_m*\$x)+$x_y_pca_b" \
then cut -x -f a,b,i $file \
| pgr -p -ms 2 -title 'linreg example' -xmin -0.1 -xmax 1.1 -ymin -0.1 -ymax 1.1 -legend 'y yfit'

123
data/generators/randuv-chomped.c Executable file
View file

@ -0,0 +1,123 @@
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
// Samples unit-uniform x,y pairs, conditioned on being in two subboxes the size
// of which is described by the "chomp" parameter. Intended for compare/contrast
// of OLS and PCA linear-regression algorithms.
//
// 0 < chomp < 1:
//
// +----------------oooooooo
// | oooooooo
// | oooooooo
// | oooooooo <-- chomp
// | |
// | |
// oooooooo | <-- 1-chomp
// oooooooo |
// oooooooo |
// oooooooo----------------+
// ^ ^
// | |
// 1-chomp chomp
//
// chomp = 0 means output all x,y pairs
//
// -1 < chomp < 0:
// 1+chomp -chomp
// | |
// v v
// oooooooo----------------+
// oooooooo |
// oooooooo |
// oooooooo | <-- -chomp
// | |
// | |
// | oooooooo <-- 1+chomp
// | oooooooo
// | oooooooo
// +----------------oooooooo
static void usage(char* argv0) {
fprintf(stderr, "Usage: %s [chomp [n]]\n", argv0);
exit(1);
}
int main(int argc, char** argv) {
double chomp = 0.5;
int n = 100000;
int mash_x = 0;
int mash_y = 0;
if (argc == 2) {
if (sscanf(argv[1], "%lf", &chomp) != 1)
usage(argv[0]);
} else if (argc == 3) {
if (sscanf(argv[1], "%lf", &chomp) != 1)
usage(argv[0]);
if (sscanf(argv[2], "%d", &n) != 1)
usage(argv[0]);
} else if (argc == 5) {
if (sscanf(argv[1], "%lf", &chomp) != 1)
usage(argv[0]);
if (sscanf(argv[2], "%d", &n) != 1)
usage(argv[0]);
if (sscanf(argv[3], "%d", &mash_x) != 1)
usage(argv[0]);
if (sscanf(argv[4], "%d", &mash_y) != 1)
usage(argv[0]);
} else {
usage(argv[0]);
}
srand(time(0) ^ getpid());
if (chomp > 0.0) {
double lo = 1.0 - chomp;
double hi = chomp;
for (int i = 0; i < n; /* increment in loop */) {
double x = (double)rand() / (double)RAND_MAX;
double y = (double)rand() / (double)RAND_MAX;
if ((x < lo && y < lo) || (x > hi && y > hi)) {
if (mash_x) {
if (x < lo)
x = 0.0;
else
x = 1.0;
}
if (mash_y) {
if (y < lo)
y = 0.0;
else
y = 1.0;
}
i++;
printf("x=%.18lf,y=%.18lf\n", x, y);
}
}
} else {
double lo = 1+chomp;
double hi = -chomp;
for (int i = 0; i < n; /* increment in loop */) {
double x = (double)rand() / (double)RAND_MAX;
double y = (double)rand() / (double)RAND_MAX;
if ((x < lo && y > hi) || (x > hi && y < lo)) {
if (mash_x) {
if (x < lo)
x = 0.0;
else
x = 1.0;
}
if (mash_y) {
if (y < lo)
y = 0.0;
else
y = 1.0;
}
i++;
printf("x=%.18lf,y=%.18lf\n", x, y);
}
}
}
return 0;
}