mirror of
https://github.com/johnkerl/miller.git
synced 2026-01-23 02:14:13 +00:00
430 lines
10 KiB
Go
430 lines
10 KiB
Go
// ================================================================
|
|
// Non-mlrval math routines
|
|
// ================================================================
|
|
|
|
package lib
|
|
|
|
import (
|
|
"fmt"
|
|
"math"
|
|
"os"
|
|
)
|
|
|
|
// ----------------------------------------------------------------
|
|
// Some wrappers around things which aren't one-liners from math.*.
|
|
|
|
func Sgn(a float64) float64 {
|
|
if a > 0 {
|
|
return 1.0
|
|
} else if a < 0 {
|
|
return -1.0
|
|
} else if a == 0 {
|
|
return 0.0
|
|
} else {
|
|
return math.NaN()
|
|
}
|
|
}
|
|
|
|
// Normal cumulative distribution function, expressed in terms of erfc library
|
|
// function (which is awkward, but exists).
|
|
func Qnorm(x float64) float64 {
|
|
return 0.5 * math.Erfc(-x/math.Sqrt2)
|
|
}
|
|
|
|
// This is a tangent-following method not unlike Newton-Raphson:
|
|
// * We can compute qnorm(y) = integral from -infinity to y of (1/sqrt(2pi)) exp(-t^2/2) dt.
|
|
// * We can compute derivative of qnorm(y) = (1/sqrt(2pi)) exp(-y^2/2).
|
|
// * We cannot explicitly compute invqnorm(y).
|
|
// * If dx/dy = (1/sqrt(2pi)) exp(-y^2/2) then dy/dx = sqrt(2pi) exp(y^2/2).
|
|
//
|
|
// This means we *can* compute the derivative of invqnorm even though we
|
|
// can't compute the function itself. So the essence of the method is to
|
|
// follow the tangent line to form successive approximations: we have known function input x
|
|
// and unknown function output y and initial guess y0. At each step we find the intersection
|
|
// of the tangent line at y_n with the vertical line at x, to find y_{n+1}. Specificall:
|
|
//
|
|
// * Even though we can't compute y = q^-1(x) we can compute x = q(y).
|
|
// * Start with initial guess for y (y0 = 0.0 or y0 = x both are OK).
|
|
// * Find x = q(y). Since q (and therefore q^-1) are 1-1, we're done if qnorm(invqnorm(x)) is small.
|
|
// * Else iterate: using point-slope form, (y_{n+1} - y_n) / (x_{n+1} - x_n) = m = sqrt(2pi) exp(y_n^2/2).
|
|
// Here x_2 = x (the input) and x_1 = q(y_1).
|
|
// * Solve for y_{n+1} and repeat.
|
|
|
|
const INVQNORM_TOL float64 = 1e-9
|
|
const INVQNORM_MAXITER int = 30
|
|
|
|
func Invqnorm(x float64) float64 {
|
|
// Initial approximation is linear. Starting with y0 = 0.0 works just as well.
|
|
y0 := x - 0.5
|
|
if x <= 0.0 {
|
|
return 0.0
|
|
}
|
|
if x >= 1.0 {
|
|
return 0.0
|
|
}
|
|
|
|
y := y0
|
|
niter := 0
|
|
|
|
for {
|
|
|
|
backx := Qnorm(y)
|
|
err := math.Abs(x - backx)
|
|
if err < INVQNORM_TOL {
|
|
break
|
|
}
|
|
if niter > INVQNORM_MAXITER {
|
|
fmt.Fprintf(os.Stderr,
|
|
"mlr: internal coding error: max iterations %d exceeded in invqnorm.\n",
|
|
INVQNORM_MAXITER,
|
|
)
|
|
os.Exit(1)
|
|
}
|
|
m := math.Sqrt2 * math.SqrtPi * math.Exp(y*y/2.0)
|
|
delta_y := m * (x - backx)
|
|
y += delta_y
|
|
niter++
|
|
}
|
|
|
|
return y
|
|
}
|
|
|
|
const JACOBI_TOLERANCE = 1e-12
|
|
const JACOBI_MAXITER = 20
|
|
|
|
// ----------------------------------------------------------------
|
|
// Jacobi real-symmetric eigensolver. Loosely adapted from Numerical Recipes.
|
|
//
|
|
// Note: this is coded for n=2 (to implement PCA linear regression on 2
|
|
// variables) but the algorithm is quite general. Changing from 2 to n is a
|
|
// matter of updating the top and bottom of the function: function signature to
|
|
// take double** matrix, double* eigenvector_1, double* eigenvector_2, and n;
|
|
// create copy-matrix and make-identity matrix functions; free temp matrices at
|
|
// the end; etc.
|
|
|
|
func GetRealSymmetricEigensystem(
|
|
matrix [2][2]float64,
|
|
) (
|
|
eigenvalue1 float64, // Output: dominant eigenvalue
|
|
eigenvalue2 float64, // Output: less-dominant eigenvalue
|
|
eigenvector1 [2]float64, // Output: corresponding to dominant eigenvalue
|
|
eigenvector2 [2]float64, // Output: corresponding to less-dominant eigenvalue
|
|
) {
|
|
L := [2][2]float64{
|
|
{matrix[0][0], matrix[0][1]},
|
|
{matrix[1][0], matrix[1][1]},
|
|
}
|
|
V := [2][2]float64{
|
|
{1.0, 0.0},
|
|
{0.0, 1.0},
|
|
}
|
|
var P, PT_A [2][2]float64
|
|
n := 2
|
|
|
|
found := false
|
|
for iter := 0; iter < JACOBI_MAXITER; iter++ {
|
|
sum := 0.0
|
|
for i := 1; i < n; i++ {
|
|
for j := 0; j < i; j++ {
|
|
sum += math.Abs(L[i][j])
|
|
}
|
|
}
|
|
if math.Abs(sum*sum) < JACOBI_TOLERANCE {
|
|
found = true
|
|
break
|
|
}
|
|
|
|
for p := 0; p < n; p++ {
|
|
for q := p + 1; q < n; q++ {
|
|
numer := L[p][p] - L[q][q]
|
|
denom := L[p][q] + L[q][p]
|
|
if math.Abs(denom) < JACOBI_TOLERANCE {
|
|
continue
|
|
}
|
|
theta := numer / denom
|
|
signTheta := 1.0
|
|
if theta < 0 {
|
|
signTheta = -1.0
|
|
}
|
|
t := signTheta / (math.Abs(theta) + math.Sqrt(theta*theta+1))
|
|
c := 1.0 / math.Sqrt(t*t+1)
|
|
s := t * c
|
|
|
|
for pi := 0; pi < n; pi++ {
|
|
for pj := 0; pj < n; pj++ {
|
|
if pi == pj {
|
|
P[pi][pj] = 1.0
|
|
} else {
|
|
P[pi][pj] = 0.0
|
|
}
|
|
}
|
|
}
|
|
P[p][p] = c
|
|
P[p][q] = -s
|
|
P[q][p] = s
|
|
P[q][q] = c
|
|
|
|
// L = P.transpose() * L * P
|
|
// V = V * P
|
|
matmul2t(&PT_A, &P, &L)
|
|
matmul2(&L, &PT_A, &P)
|
|
matmul2(&V, &V, &P)
|
|
}
|
|
}
|
|
}
|
|
|
|
if !found {
|
|
fmt.Fprintf(os.Stderr,
|
|
"%s: Jacobi eigensolver: max iterations (%d) exceeded. Non-symmetric input?\n",
|
|
"mlr",
|
|
JACOBI_MAXITER,
|
|
)
|
|
os.Exit(1)
|
|
}
|
|
|
|
eigenvalue1 = L[0][0]
|
|
eigenvalue2 = L[1][1]
|
|
abs1 := math.Abs(eigenvalue1)
|
|
abs2 := math.Abs(eigenvalue2)
|
|
if abs1 > abs2 {
|
|
eigenvector1[0] = V[0][0] // Column 0 of V
|
|
eigenvector1[1] = V[1][0]
|
|
eigenvector2[0] = V[0][1] // Column 1 of V
|
|
eigenvector2[1] = V[1][1]
|
|
} else {
|
|
eigenvalue1, eigenvalue2 = eigenvalue2, eigenvalue1
|
|
eigenvector1[0] = V[0][1]
|
|
eigenvector1[1] = V[1][1]
|
|
eigenvector2[0] = V[0][0]
|
|
eigenvector2[1] = V[1][0]
|
|
}
|
|
|
|
return eigenvalue1, eigenvalue2, eigenvector1, eigenvector2
|
|
}
|
|
|
|
// C = A * B
|
|
func matmul2(
|
|
C *[2][2]float64, // Output
|
|
A *[2][2]float64, // Input
|
|
B *[2][2]float64, // Input
|
|
) {
|
|
var T [2][2]float64
|
|
n := 2
|
|
for i := 0; i < n; i++ {
|
|
for j := 0; j < n; j++ {
|
|
sum := 0.0
|
|
for k := 0; k < n; k++ {
|
|
sum += A[i][k] * B[k][j]
|
|
}
|
|
T[i][j] = sum
|
|
}
|
|
}
|
|
// Needs copy in case C's memory is the same as A and/or B
|
|
for i := 0; i < n; i++ {
|
|
for j := 0; j < n; j++ {
|
|
C[i][j] = T[i][j]
|
|
}
|
|
}
|
|
}
|
|
|
|
// C = A^t * B
|
|
func matmul2t(
|
|
C *[2][2]float64, // Output
|
|
A *[2][2]float64, // Input
|
|
B *[2][2]float64, // Input
|
|
) {
|
|
var T [2][2]float64
|
|
n := 2
|
|
for i := 0; i < n; i++ {
|
|
for j := 0; j < n; j++ {
|
|
sum := 0.0
|
|
for k := 0; k < n; k++ {
|
|
sum += A[k][i] * B[k][j]
|
|
}
|
|
T[i][j] = sum
|
|
}
|
|
}
|
|
// Needs copy in case C's memory is the same as A and/or B
|
|
for i := 0; i < n; i++ {
|
|
for j := 0; j < n; j++ {
|
|
C[i][j] = T[i][j]
|
|
}
|
|
}
|
|
}
|
|
|
|
// ================================================================
|
|
// Logisitic regression
|
|
//
|
|
// Real-valued x_0 .. x_{N-1}
|
|
// 0/1-valued y_0 .. y_{N-1}
|
|
// Model p(x_i == 1) as
|
|
// p(x, m, b) = 1 / (1 + exp(-m*x-b)
|
|
// which is the same as
|
|
// log(p/(1-p)) = m*x + b
|
|
// then
|
|
// p(x, m, b) = 1 / (1 + exp(-m*x-b)
|
|
// = exp(m*x+b) / (1 + exp(m*x+b)
|
|
// and
|
|
// 1-p = exp(-m*x-b) / (1 + exp(-m*x-b)
|
|
// = 1 / (1 + exp(m*x+b)
|
|
// Note for reference just below that
|
|
// dp/dm = -1 / [1 + exp(-m*x-b)]**2 * (-x) * exp(-m*x-b)
|
|
// = [x exp(-m*x-b)) ] / [1 + exp(-m*x-b)]**2
|
|
// = x * p * (1-p)
|
|
// and
|
|
// dp/db = -1 / [1 + exp(-m*x-b)]**2 * (-1) * exp(-m*x-b)
|
|
// = [exp(-m*x-b)) ] / [1 + exp(-m*x-b)]**2
|
|
// = p * (1-p)
|
|
// Write p_i for p(x_i, m, b)
|
|
//
|
|
// Maximum-likelihood equation:
|
|
// L(m, b) = prod_{i=0}^{N-1} [ p_i**y_i * (1-p_i)**(1-y_i) ]
|
|
//
|
|
// Log-likelihood equation:
|
|
// ell(m, b) = sum{i=0}^{N-1} [ y_i log(p_i) + (1-y_i) log(1-p_i) ]
|
|
// = sum{i=0}^{N-1} [ log(1-p_i) + y_i log(p_i/(1-p_i)) ]
|
|
// = sum{i=0}^{N-1} [ log(1-p_i) + y_i*(m*x_i+b) ]
|
|
// Differentiate with respect to parameters:
|
|
//
|
|
// d ell/dm = sum{i=0}^{N-1} [ -1/(1-p_i) dp_i/dm + x_i*y_i ]
|
|
// = sum{i=0}^{N-1} [ -1/(1-p_i) x_i*p_i*(1-p_i) + x_i*y_i ]
|
|
// = sum{i=0}^{N-1} [ x_i(y_i-p_i) ]
|
|
//
|
|
// d ell/db = sum{i=0}^{N-1} [ -1/(1-p_i) dp_i/db + y_i ]
|
|
// = sum{i=0}^{N-1} [ -1/(1-p_i) p_i*(1-p_i) + y_i ]
|
|
// = sum{i=0}^{N-1} [ y_i - p_i ]
|
|
//
|
|
//
|
|
// d2ell/dm2 = sum{i=0}^{N-1} [ -x_i dp_i/dm ]
|
|
// = sum{i=0}^{N-1} [ -x_i**2 * p_i * (1-p_i) ]
|
|
//
|
|
// d2ell/dmdb = sum{i=0}^{N-1} [ -x_i dp_i/db ]
|
|
// = sum{i=0}^{N-1} [ -x_i * p_i * (1-p_i) ]
|
|
//
|
|
// d2ell/dbdm = sum{i=0}^{N-1} [ -dp_i/dm ]
|
|
// = sum{i=0}^{N-1} [ -x_i * p_i * (1-p_i) ]
|
|
//
|
|
// d2ell/db2 = sum{i=0}^{N-1} [ -dp_i/db ]
|
|
// = sum{i=0}^{N-1} [ -p_i * (1-p_i) ]
|
|
//
|
|
// Newton-Raphson to minimize ell(m, b):
|
|
// * Pick m0, b0
|
|
// * [m_{j+1], b_{j+1}] = H^{-1} grad ell(m_j, b_j)
|
|
// * grad ell =
|
|
// [ d ell/dm ]
|
|
// [ d ell/db ]
|
|
// * H = Hessian of ell = Jacobian of grad ell =
|
|
// [ d2ell/dm2 d2ell/dmdb ]
|
|
// [ d2ell/dmdb d2ell/db2 ]
|
|
|
|
// p(x,m,b) for logistic regression:
|
|
func lrp(x, m, b float64) float64 {
|
|
return 1.0 / (1.0 + math.Exp(-m*x-b))
|
|
}
|
|
|
|
// 1 - p(x,m,b) for logistic regression:
|
|
func lrq(x, m, b float64) float64 {
|
|
return 1.0 / (1.0 + math.Exp(m*x+b))
|
|
}
|
|
|
|
func LogisticRegression(xs, ys []float64) (m, b float64) {
|
|
m0 := -0.001
|
|
b0 := 0.002
|
|
tol := 1e-9
|
|
maxits := 100
|
|
return logisticRegressionAux(xs, ys, m0, b0, tol, maxits)
|
|
}
|
|
|
|
// Supporting routine for mlr_logistic_regression():
|
|
func logisticRegressionAux(
|
|
xs, ys []float64,
|
|
m0, b0, tol float64,
|
|
maxits int,
|
|
) (m, b float64) {
|
|
|
|
InternalCodingErrorIf(len(xs) != len(ys))
|
|
n := len(xs)
|
|
|
|
its := 0
|
|
done := false
|
|
m = m0
|
|
b = b0
|
|
|
|
for !done {
|
|
// Compute derivatives
|
|
dldm := 0.0
|
|
dldb := 0.0
|
|
d2ldm2 := 0.0
|
|
d2ldmdb := 0.0
|
|
d2ldb2 := 0.0
|
|
ell0 := 0.0
|
|
|
|
for i := 0; i < n; i++ {
|
|
xi := xs[i]
|
|
yi := ys[i]
|
|
pi := lrp(xi, m0, b0)
|
|
qi := lrq(xi, m0, b0)
|
|
dldm += xi * (yi - pi)
|
|
dldb += yi - pi
|
|
piqi := pi * qi
|
|
xipiqi := xi * piqi
|
|
xi2piqi := xi * xipiqi
|
|
d2ldm2 -= xi2piqi
|
|
d2ldmdb -= xipiqi
|
|
d2ldb2 -= piqi
|
|
ell0 += math.Log(qi) + yi*(m0*xi+b0)
|
|
}
|
|
|
|
// Form the Hessian
|
|
ha := d2ldm2
|
|
hb := d2ldmdb
|
|
hc := d2ldmdb
|
|
hd := d2ldb2
|
|
|
|
// Invert the Hessian
|
|
D := ha*hd - hb*hc
|
|
Hinva := hd / D
|
|
Hinvb := -hb / D
|
|
Hinvc := -hc / D
|
|
Hinvd := ha / D
|
|
|
|
// Compute H^-1 times grad ell
|
|
Hinvgradm := Hinva*dldm + Hinvb*dldb
|
|
Hinvgradb := Hinvc*dldm + Hinvd*dldb
|
|
|
|
// Update [m,b]
|
|
m = m0 - Hinvgradm
|
|
b = b0 - Hinvgradb
|
|
|
|
ell := 0.0
|
|
for i := 0; i < n; i++ {
|
|
xi := xs[i]
|
|
yi := ys[i]
|
|
qi := lrq(xi, m, b)
|
|
ell += math.Log(qi) + yi*(m0*xi+b0)
|
|
}
|
|
|
|
// Check for convergence
|
|
dell := math.Max(ell, ell0)
|
|
err := 0.0
|
|
if dell != 0.0 {
|
|
err = math.Abs(ell-ell0) / dell
|
|
}
|
|
|
|
if err < tol {
|
|
done = true
|
|
}
|
|
its++
|
|
if its > maxits {
|
|
fmt.Fprintf(os.Stderr,
|
|
"mlr_logistic_regression: Newton-Raphson convergence failed after %d iterations. m=%e, b=%e.\n",
|
|
its, m, b)
|
|
os.Exit(1)
|
|
}
|
|
|
|
m0 = m
|
|
b0 = b
|
|
}
|
|
|
|
return m, b
|
|
}
|