mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 02:14:13 +00:00
data-dir reorgs
This commit is contained in:
parent
80228e7222
commit
75360dbd00
9 changed files with 170 additions and 11 deletions
24
data/generators/abixy.c
Executable file
24
data/generators/abixy.c
Executable file
|
|
@ -0,0 +1,24 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
int main(void) {
|
||||
char* names[] = { "hat", "pan", "eks", "wye", "zee" };
|
||||
int num_names = sizeof(names) / sizeof(names[0]);
|
||||
srand(time(0) ^ getpid());
|
||||
for (int i = 0; ; i++) {
|
||||
int ai = rand() % num_names;
|
||||
int bi = rand() % num_names;
|
||||
char* a = names[ai];
|
||||
char* b = names[bi];
|
||||
double x = (double)rand() / (double)RAND_MAX;
|
||||
double y = (double)rand() / (double)RAND_MAX;
|
||||
printf("a=%s,b=%s,i=%d,x=%.18lf,y=%.18lf\n", a, b, i, x, y);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// a=pan,b=pan,i=1,x=0.3467901443380824,y=0.7268028627434533
|
||||
// a=eks,b=pan,i=2,x=0.7586799647899636,y=0.5221511083334797
|
||||
// a=wye,b=wye,i=3,x=0.20460330576630303,y=0.33831852551664776
|
||||
// a=eks,b=wye,i=4,x=0.38139939387114097,y=0.13418874328430463
|
||||
31
data/generators/compare-ols-pca.sh
Normal file
31
data/generators/compare-ols-pca.sh
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
#!/bin/bash
|
||||
|
||||
for p in \
|
||||
-0.975 -0.950 -0.925 -0.900 \
|
||||
-0.875 -0.850 -0.825 -0.800 \
|
||||
-0.775 -0.750 -0.725 -0.700 \
|
||||
-0.675 -0.650 -0.625 -0.600 \
|
||||
-0.575 -0.550 -0.525 -0.500 \
|
||||
-0.475 -0.450 -0.425 -0.400 \
|
||||
-0.375 -0.350 -0.325 -0.300 \
|
||||
-0.275 -0.250 -0.225 -0.200 \
|
||||
-0.175 -0.150 -0.125 -0.100 \
|
||||
-0.075 -0.050 -0.025 -0.000 \
|
||||
0.000 0.025 0.050 0.075 \
|
||||
0.100 0.125 0.150 0.175 \
|
||||
0.200 0.225 0.250 0.275 \
|
||||
0.300 0.325 0.350 0.375 \
|
||||
0.400 0.425 0.450 0.475 \
|
||||
0.500 0.525 0.550 0.575 \
|
||||
0.600 0.625 0.650 0.675 \
|
||||
0.700 0.725 0.750 0.775 \
|
||||
0.800 0.825 0.850 0.875 \
|
||||
0.900 0.925 0.950 0.975
|
||||
do
|
||||
for try in 1 2 3 4 5; do
|
||||
#echo
|
||||
#echo p $p
|
||||
#a.out $p|mlr --oxtab stats2 -a linreg-ols,linreg-pca -f x,y
|
||||
echo "p=$p,$(a.out $p | mlr stats2 -a linreg-ols,r2,linreg-pca -f x,y)"
|
||||
done
|
||||
done
|
||||
6
data/generators/endless.c
Executable file
6
data/generators/endless.c
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
#include <stdio.h>
|
||||
int main(void) {
|
||||
for (int c = 0; ; i++)
|
||||
printf("a=1,b=2,c=%d\n", c);
|
||||
return 0;
|
||||
}
|
||||
14
data/generators/fuzz.rb
Executable file
14
data/generators/fuzz.rb
Executable file
|
|
@ -0,0 +1,14 @@
|
|||
#!/usr/bin/env ruby
|
||||
|
||||
alphabet = 'abcdefghijklmnopqrstuvwxyz0123456789~!#$%^&*()-_=+{}[];:"<>,./?'
|
||||
nlines=50
|
||||
|
||||
alphabet = alphabet.split('')
|
||||
nlines.times do
|
||||
length = rand(40)
|
||||
if rand < 0.1
|
||||
length = 0
|
||||
end
|
||||
output = (1..length).to_a.collect{alphabet.sample}.join('')
|
||||
puts output
|
||||
end
|
||||
12
data/generators/plot-ols-pca.sh
Normal file
12
data/generators/plot-ols-pca.sh
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
#!/bin/bash
|
||||
|
||||
file="$1"
|
||||
|
||||
# Set x_y_pca_m and x_y_pca_b as shell variables
|
||||
mlr --ofs newline stats2 -a linreg-ols,linreg-pca -f x,y $file
|
||||
eval $(mlr --ofs newline stats2 -a linreg-ols,linreg-pca -f x,y $file)
|
||||
|
||||
# In addition to x and y, make a new yfit which is the line fit. Plot using your favorite tool.
|
||||
mlr --onidx put "\$olsfit=($x_y_ols_m*\$x)+$x_y_ols_b;\$pcafit=($x_y_pca_m*\$x)+$x_y_pca_b" \
|
||||
then cut -x -f a,b,i $file \
|
||||
| pgr -p -ms 2 -title 'linreg example' -xmin -0.1 -xmax 1.1 -ymin -0.1 -ymax 1.1 -legend 'y yfit'
|
||||
123
data/generators/randuv-chomped.c
Executable file
123
data/generators/randuv-chomped.c
Executable file
|
|
@ -0,0 +1,123 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
// Samples unit-uniform x,y pairs, conditioned on being in two subboxes the size
|
||||
// of which is described by the "chomp" parameter. Intended for compare/contrast
|
||||
// of OLS and PCA linear-regression algorithms.
|
||||
//
|
||||
// 0 < chomp < 1:
|
||||
//
|
||||
// +----------------oooooooo
|
||||
// | oooooooo
|
||||
// | oooooooo
|
||||
// | oooooooo <-- chomp
|
||||
// | |
|
||||
// | |
|
||||
// oooooooo | <-- 1-chomp
|
||||
// oooooooo |
|
||||
// oooooooo |
|
||||
// oooooooo----------------+
|
||||
// ^ ^
|
||||
// | |
|
||||
// 1-chomp chomp
|
||||
//
|
||||
// chomp = 0 means output all x,y pairs
|
||||
//
|
||||
// -1 < chomp < 0:
|
||||
// 1+chomp -chomp
|
||||
// | |
|
||||
// v v
|
||||
// oooooooo----------------+
|
||||
// oooooooo |
|
||||
// oooooooo |
|
||||
// oooooooo | <-- -chomp
|
||||
// | |
|
||||
// | |
|
||||
// | oooooooo <-- 1+chomp
|
||||
// | oooooooo
|
||||
// | oooooooo
|
||||
// +----------------oooooooo
|
||||
|
||||
static void usage(char* argv0) {
|
||||
fprintf(stderr, "Usage: %s [chomp [n]]\n", argv0);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
double chomp = 0.5;
|
||||
int n = 100000;
|
||||
int mash_x = 0;
|
||||
int mash_y = 0;
|
||||
if (argc == 2) {
|
||||
if (sscanf(argv[1], "%lf", &chomp) != 1)
|
||||
usage(argv[0]);
|
||||
} else if (argc == 3) {
|
||||
if (sscanf(argv[1], "%lf", &chomp) != 1)
|
||||
usage(argv[0]);
|
||||
if (sscanf(argv[2], "%d", &n) != 1)
|
||||
usage(argv[0]);
|
||||
} else if (argc == 5) {
|
||||
if (sscanf(argv[1], "%lf", &chomp) != 1)
|
||||
usage(argv[0]);
|
||||
if (sscanf(argv[2], "%d", &n) != 1)
|
||||
usage(argv[0]);
|
||||
if (sscanf(argv[3], "%d", &mash_x) != 1)
|
||||
usage(argv[0]);
|
||||
if (sscanf(argv[4], "%d", &mash_y) != 1)
|
||||
usage(argv[0]);
|
||||
} else {
|
||||
usage(argv[0]);
|
||||
}
|
||||
|
||||
srand(time(0) ^ getpid());
|
||||
if (chomp > 0.0) {
|
||||
double lo = 1.0 - chomp;
|
||||
double hi = chomp;
|
||||
for (int i = 0; i < n; /* increment in loop */) {
|
||||
double x = (double)rand() / (double)RAND_MAX;
|
||||
double y = (double)rand() / (double)RAND_MAX;
|
||||
if ((x < lo && y < lo) || (x > hi && y > hi)) {
|
||||
if (mash_x) {
|
||||
if (x < lo)
|
||||
x = 0.0;
|
||||
else
|
||||
x = 1.0;
|
||||
}
|
||||
if (mash_y) {
|
||||
if (y < lo)
|
||||
y = 0.0;
|
||||
else
|
||||
y = 1.0;
|
||||
}
|
||||
i++;
|
||||
printf("x=%.18lf,y=%.18lf\n", x, y);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
double lo = 1+chomp;
|
||||
double hi = -chomp;
|
||||
for (int i = 0; i < n; /* increment in loop */) {
|
||||
double x = (double)rand() / (double)RAND_MAX;
|
||||
double y = (double)rand() / (double)RAND_MAX;
|
||||
if ((x < lo && y > hi) || (x > hi && y < lo)) {
|
||||
if (mash_x) {
|
||||
if (x < lo)
|
||||
x = 0.0;
|
||||
else
|
||||
x = 1.0;
|
||||
}
|
||||
if (mash_y) {
|
||||
if (y < lo)
|
||||
y = 0.0;
|
||||
else
|
||||
y = 1.0;
|
||||
}
|
||||
i++;
|
||||
printf("x=%.18lf,y=%.18lf\n", x, y);
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue