miller/internal/pkg/lib/mlrmath.go
2021-11-11 14:15:13 -05:00

430 lines
10 KiB
Go

// ================================================================
// Non-mlrval math routines
// ================================================================
package lib
import (
"fmt"
"math"
"os"
)
// ----------------------------------------------------------------
// Some wrappers around things which aren't one-liners from math.*.
func Sgn(a float64) float64 {
if a > 0 {
return 1.0
} else if a < 0 {
return -1.0
} else if a == 0 {
return 0.0
} else {
return math.NaN()
}
}
// Normal cumulative distribution function, expressed in terms of erfc library
// function (which is awkward, but exists).
func Qnorm(x float64) float64 {
return 0.5 * math.Erfc(-x/math.Sqrt2)
}
// This is a tangent-following method not unlike Newton-Raphson:
// * We can compute qnorm(y) = integral from -infinity to y of (1/sqrt(2pi)) exp(-t^2/2) dt.
// * We can compute derivative of qnorm(y) = (1/sqrt(2pi)) exp(-y^2/2).
// * We cannot explicitly compute invqnorm(y).
// * If dx/dy = (1/sqrt(2pi)) exp(-y^2/2) then dy/dx = sqrt(2pi) exp(y^2/2).
//
// This means we *can* compute the derivative of invqnorm even though we
// can't compute the function itself. So the essence of the method is to
// follow the tangent line to form successive approximations: we have known function input x
// and unknown function output y and initial guess y0. At each step we find the intersection
// of the tangent line at y_n with the vertical line at x, to find y_{n+1}. Specificall:
//
// * Even though we can't compute y = q^-1(x) we can compute x = q(y).
// * Start with initial guess for y (y0 = 0.0 or y0 = x both are OK).
// * Find x = q(y). Since q (and therefore q^-1) are 1-1, we're done if qnorm(invqnorm(x)) is small.
// * Else iterate: using point-slope form, (y_{n+1} - y_n) / (x_{n+1} - x_n) = m = sqrt(2pi) exp(y_n^2/2).
// Here x_2 = x (the input) and x_1 = q(y_1).
// * Solve for y_{n+1} and repeat.
const INVQNORM_TOL float64 = 1e-9
const INVQNORM_MAXITER int = 30
func Invqnorm(x float64) float64 {
// Initial approximation is linear. Starting with y0 = 0.0 works just as well.
y0 := x - 0.5
if x <= 0.0 {
return 0.0
}
if x >= 1.0 {
return 0.0
}
y := y0
niter := 0
for {
backx := Qnorm(y)
err := math.Abs(x - backx)
if err < INVQNORM_TOL {
break
}
if niter > INVQNORM_MAXITER {
fmt.Fprintf(os.Stderr,
"mlr: internal coding error: max iterations %d exceeded in invqnorm.\n",
INVQNORM_MAXITER,
)
os.Exit(1)
}
m := math.Sqrt2 * math.SqrtPi * math.Exp(y*y/2.0)
delta_y := m * (x - backx)
y += delta_y
niter++
}
return y
}
const JACOBI_TOLERANCE = 1e-12
const JACOBI_MAXITER = 20
// ----------------------------------------------------------------
// Jacobi real-symmetric eigensolver. Loosely adapted from Numerical Recipes.
//
// Note: this is coded for n=2 (to implement PCA linear regression on 2
// variables) but the algorithm is quite general. Changing from 2 to n is a
// matter of updating the top and bottom of the function: function signature to
// take double** matrix, double* eigenvector_1, double* eigenvector_2, and n;
// create copy-matrix and make-identity matrix functions; free temp matrices at
// the end; etc.
func GetRealSymmetricEigensystem(
matrix [2][2]float64,
) (
eigenvalue1 float64, // Output: dominant eigenvalue
eigenvalue2 float64, // Output: less-dominant eigenvalue
eigenvector1 [2]float64, // Output: corresponding to dominant eigenvalue
eigenvector2 [2]float64, // Output: corresponding to less-dominant eigenvalue
) {
L := [2][2]float64{
{matrix[0][0], matrix[0][1]},
{matrix[1][0], matrix[1][1]},
}
V := [2][2]float64{
{1.0, 0.0},
{0.0, 1.0},
}
var P, PT_A [2][2]float64
n := 2
found := false
for iter := 0; iter < JACOBI_MAXITER; iter++ {
sum := 0.0
for i := 1; i < n; i++ {
for j := 0; j < i; j++ {
sum += math.Abs(L[i][j])
}
}
if math.Abs(sum*sum) < JACOBI_TOLERANCE {
found = true
break
}
for p := 0; p < n; p++ {
for q := p + 1; q < n; q++ {
numer := L[p][p] - L[q][q]
denom := L[p][q] + L[q][p]
if math.Abs(denom) < JACOBI_TOLERANCE {
continue
}
theta := numer / denom
signTheta := 1.0
if theta < 0 {
signTheta = -1.0
}
t := signTheta / (math.Abs(theta) + math.Sqrt(theta*theta+1))
c := 1.0 / math.Sqrt(t*t+1)
s := t * c
for pi := 0; pi < n; pi++ {
for pj := 0; pj < n; pj++ {
if pi == pj {
P[pi][pj] = 1.0
} else {
P[pi][pj] = 0.0
}
}
}
P[p][p] = c
P[p][q] = -s
P[q][p] = s
P[q][q] = c
// L = P.transpose() * L * P
// V = V * P
matmul2t(&PT_A, &P, &L)
matmul2(&L, &PT_A, &P)
matmul2(&V, &V, &P)
}
}
}
if !found {
fmt.Fprintf(os.Stderr,
"%s: Jacobi eigensolver: max iterations (%d) exceeded. Non-symmetric input?\n",
"mlr",
JACOBI_MAXITER,
)
os.Exit(1)
}
eigenvalue1 = L[0][0]
eigenvalue2 = L[1][1]
abs1 := math.Abs(eigenvalue1)
abs2 := math.Abs(eigenvalue2)
if abs1 > abs2 {
eigenvector1[0] = V[0][0] // Column 0 of V
eigenvector1[1] = V[1][0]
eigenvector2[0] = V[0][1] // Column 1 of V
eigenvector2[1] = V[1][1]
} else {
eigenvalue1, eigenvalue2 = eigenvalue2, eigenvalue1
eigenvector1[0] = V[0][1]
eigenvector1[1] = V[1][1]
eigenvector2[0] = V[0][0]
eigenvector2[1] = V[1][0]
}
return eigenvalue1, eigenvalue2, eigenvector1, eigenvector2
}
// C = A * B
func matmul2(
C *[2][2]float64, // Output
A *[2][2]float64, // Input
B *[2][2]float64, // Input
) {
var T [2][2]float64
n := 2
for i := 0; i < n; i++ {
for j := 0; j < n; j++ {
sum := 0.0
for k := 0; k < n; k++ {
sum += A[i][k] * B[k][j]
}
T[i][j] = sum
}
}
// Needs copy in case C's memory is the same as A and/or B
for i := 0; i < n; i++ {
for j := 0; j < n; j++ {
C[i][j] = T[i][j]
}
}
}
// C = A^t * B
func matmul2t(
C *[2][2]float64, // Output
A *[2][2]float64, // Input
B *[2][2]float64, // Input
) {
var T [2][2]float64
n := 2
for i := 0; i < n; i++ {
for j := 0; j < n; j++ {
sum := 0.0
for k := 0; k < n; k++ {
sum += A[k][i] * B[k][j]
}
T[i][j] = sum
}
}
// Needs copy in case C's memory is the same as A and/or B
for i := 0; i < n; i++ {
for j := 0; j < n; j++ {
C[i][j] = T[i][j]
}
}
}
// ================================================================
// Logisitic regression
//
// Real-valued x_0 .. x_{N-1}
// 0/1-valued y_0 .. y_{N-1}
// Model p(x_i == 1) as
// p(x, m, b) = 1 / (1 + exp(-m*x-b)
// which is the same as
// log(p/(1-p)) = m*x + b
// then
// p(x, m, b) = 1 / (1 + exp(-m*x-b)
// = exp(m*x+b) / (1 + exp(m*x+b)
// and
// 1-p = exp(-m*x-b) / (1 + exp(-m*x-b)
// = 1 / (1 + exp(m*x+b)
// Note for reference just below that
// dp/dm = -1 / [1 + exp(-m*x-b)]**2 * (-x) * exp(-m*x-b)
// = [x exp(-m*x-b)) ] / [1 + exp(-m*x-b)]**2
// = x * p * (1-p)
// and
// dp/db = -1 / [1 + exp(-m*x-b)]**2 * (-1) * exp(-m*x-b)
// = [exp(-m*x-b)) ] / [1 + exp(-m*x-b)]**2
// = p * (1-p)
// Write p_i for p(x_i, m, b)
//
// Maximum-likelihood equation:
// L(m, b) = prod_{i=0}^{N-1} [ p_i**y_i * (1-p_i)**(1-y_i) ]
//
// Log-likelihood equation:
// ell(m, b) = sum{i=0}^{N-1} [ y_i log(p_i) + (1-y_i) log(1-p_i) ]
// = sum{i=0}^{N-1} [ log(1-p_i) + y_i log(p_i/(1-p_i)) ]
// = sum{i=0}^{N-1} [ log(1-p_i) + y_i*(m*x_i+b) ]
// Differentiate with respect to parameters:
//
// d ell/dm = sum{i=0}^{N-1} [ -1/(1-p_i) dp_i/dm + x_i*y_i ]
// = sum{i=0}^{N-1} [ -1/(1-p_i) x_i*p_i*(1-p_i) + x_i*y_i ]
// = sum{i=0}^{N-1} [ x_i(y_i-p_i) ]
//
// d ell/db = sum{i=0}^{N-1} [ -1/(1-p_i) dp_i/db + y_i ]
// = sum{i=0}^{N-1} [ -1/(1-p_i) p_i*(1-p_i) + y_i ]
// = sum{i=0}^{N-1} [ y_i - p_i ]
//
//
// d2ell/dm2 = sum{i=0}^{N-1} [ -x_i dp_i/dm ]
// = sum{i=0}^{N-1} [ -x_i**2 * p_i * (1-p_i) ]
//
// d2ell/dmdb = sum{i=0}^{N-1} [ -x_i dp_i/db ]
// = sum{i=0}^{N-1} [ -x_i * p_i * (1-p_i) ]
//
// d2ell/dbdm = sum{i=0}^{N-1} [ -dp_i/dm ]
// = sum{i=0}^{N-1} [ -x_i * p_i * (1-p_i) ]
//
// d2ell/db2 = sum{i=0}^{N-1} [ -dp_i/db ]
// = sum{i=0}^{N-1} [ -p_i * (1-p_i) ]
//
// Newton-Raphson to minimize ell(m, b):
// * Pick m0, b0
// * [m_{j+1], b_{j+1}] = H^{-1} grad ell(m_j, b_j)
// * grad ell =
// [ d ell/dm ]
// [ d ell/db ]
// * H = Hessian of ell = Jacobian of grad ell =
// [ d2ell/dm2 d2ell/dmdb ]
// [ d2ell/dmdb d2ell/db2 ]
// p(x,m,b) for logistic regression:
func lrp(x, m, b float64) float64 {
return 1.0 / (1.0 + math.Exp(-m*x-b))
}
// 1 - p(x,m,b) for logistic regression:
func lrq(x, m, b float64) float64 {
return 1.0 / (1.0 + math.Exp(m*x+b))
}
func LogisticRegression(xs, ys []float64) (m, b float64) {
m0 := -0.001
b0 := 0.002
tol := 1e-9
maxits := 100
return logisticRegressionAux(xs, ys, m0, b0, tol, maxits)
}
// Supporting routine for mlr_logistic_regression():
func logisticRegressionAux(
xs, ys []float64,
m0, b0, tol float64,
maxits int,
) (m, b float64) {
InternalCodingErrorIf(len(xs) != len(ys))
n := len(xs)
its := 0
done := false
m = m0
b = b0
for !done {
// Compute derivatives
dldm := 0.0
dldb := 0.0
d2ldm2 := 0.0
d2ldmdb := 0.0
d2ldb2 := 0.0
ell0 := 0.0
for i := 0; i < n; i++ {
xi := xs[i]
yi := ys[i]
pi := lrp(xi, m0, b0)
qi := lrq(xi, m0, b0)
dldm += xi * (yi - pi)
dldb += yi - pi
piqi := pi * qi
xipiqi := xi * piqi
xi2piqi := xi * xipiqi
d2ldm2 -= xi2piqi
d2ldmdb -= xipiqi
d2ldb2 -= piqi
ell0 += math.Log(qi) + yi*(m0*xi+b0)
}
// Form the Hessian
ha := d2ldm2
hb := d2ldmdb
hc := d2ldmdb
hd := d2ldb2
// Invert the Hessian
D := ha*hd - hb*hc
Hinva := hd / D
Hinvb := -hb / D
Hinvc := -hc / D
Hinvd := ha / D
// Compute H^-1 times grad ell
Hinvgradm := Hinva*dldm + Hinvb*dldb
Hinvgradb := Hinvc*dldm + Hinvd*dldb
// Update [m,b]
m = m0 - Hinvgradm
b = b0 - Hinvgradb
ell := 0.0
for i := 0; i < n; i++ {
xi := xs[i]
yi := ys[i]
qi := lrq(xi, m, b)
ell += math.Log(qi) + yi*(m0*xi+b0)
}
// Check for convergence
dell := math.Max(ell, ell0)
err := 0.0
if dell != 0.0 {
err = math.Abs(ell-ell0) / dell
}
if err < tol {
done = true
}
its++
if its > maxits {
fmt.Fprintf(os.Stderr,
"mlr_logistic_regression: Newton-Raphson convergence failed after %d iterations. m=%e, b=%e.\n",
its, m, b)
os.Exit(1)
}
m0 = m
b0 = b
}
return m, b
}