miller/pkg/lib/mlrmath.go

// ================================================================
// Non-mlrval math routines
// ================================================================

package lib

import (
	"fmt"
	"math"
	"os"
)

// ----------------------------------------------------------------
// Some wrappers around things which aren't one-liners from math.*.

func Sgn(a float64) float64 {
	if a > 0 {
		return 1.0
	} else if a < 0 {
		return -1.0
	} else if a == 0 {
		return 0.0
	} else {
		return math.NaN()
	}
}

// Normal cumulative distribution function, expressed in terms of erfc library
// function (which is awkward, but exists).
func Qnorm(x float64) float64 {
	return 0.5 * math.Erfc(-x/math.Sqrt2)
}

// This is a tangent-following method not unlike Newton-Raphson:
// * We can compute qnorm(y) = integral from -infinity to y of (1/sqrt(2pi)) exp(-t^2/2) dt.
// * We can compute derivative of qnorm(y) = (1/sqrt(2pi)) exp(-y^2/2).
// * We cannot explicitly compute invqnorm(y).
// * If dx/dy = (1/sqrt(2pi)) exp(-y^2/2) then dy/dx = sqrt(2pi) exp(y^2/2).
//
// This means we *can* compute the derivative of invqnorm even though we
// can't compute the function itself. So the essence of the method is to
// follow the tangent line to form successive approximations: we have known function input x
// and unknown function output y and initial guess y0.  At each step we find the intersection
// of the tangent line at y_n with the vertical line at x, to find y_{n+1}. Specificall:
//
// * Even though we can't compute y = q^-1(x) we can compute x = q(y).
// * Start with initial guess for y (y0 = 0.0 or y0 = x both are OK).
// * Find x = q(y). Since q (and therefore q^-1) are 1-1, we're done if qnorm(invqnorm(x)) is small.
// * Else iterate: using point-slope form, (y_{n+1} - y_n) / (x_{n+1} - x_n) = m = sqrt(2pi) exp(y_n^2/2).
//   Here x_2 = x (the input) and x_1 = q(y_1).
// * Solve for y_{n+1} and repeat.

const INVQNORM_TOL float64 = 1e-9
const INVQNORM_MAXITER int = 30

func Invqnorm(x float64) float64 {
	// Initial approximation is linear. Starting with y0 = 0.0 works just as well.
	y0 := x - 0.5
	if x <= 0.0 {
		return 0.0
	}
	if x >= 1.0 {
		return 0.0
	}

	y := y0
	niter := 0

	for {

		backx := Qnorm(y)
		err := math.Abs(x - backx)
		if err < INVQNORM_TOL {
			break
		}
		if niter > INVQNORM_MAXITER {
			fmt.Fprintf(os.Stderr,
				"mlr: internal coding error: max iterations %d exceeded in invqnorm.\n",
				INVQNORM_MAXITER,
			)
			os.Exit(1)
		}
		m := math.Sqrt2 * math.SqrtPi * math.Exp(y*y/2.0)
		delta_y := m * (x - backx)
		y += delta_y
		niter++
	}

	return y
}

const JACOBI_TOLERANCE = 1e-12
const JACOBI_MAXITER = 20

// ----------------------------------------------------------------
// Jacobi real-symmetric eigensolver. Loosely adapted from Numerical Recipes.
//
// Note: this is coded for n=2 (to implement PCA linear regression on 2
// variables) but the algorithm is quite general. Changing from 2 to n is a
// matter of updating the top and bottom of the function: function signature to
// take double** matrix, double* eigenvector_1, double* eigenvector_2, and n;
// create copy-matrix and make-identity matrix functions; free temp matrices at
// the end; etc.

func GetRealSymmetricEigensystem(
	matrix [2][2]float64,
) (
	eigenvalue1 float64, // Output: dominant eigenvalue
	eigenvalue2 float64, // Output: less-dominant eigenvalue
	eigenvector1 [2]float64, // Output: corresponding to dominant eigenvalue
	eigenvector2 [2]float64, // Output: corresponding to less-dominant eigenvalue
) {
	L := [2][2]float64{
		{matrix[0][0], matrix[0][1]},
		{matrix[1][0], matrix[1][1]},
	}
	V := [2][2]float64{
		{1.0, 0.0},
		{0.0, 1.0},
	}
	var P, PT_A [2][2]float64
	n := 2

	found := false
	for iter := 0; iter < JACOBI_MAXITER; iter++ {
		sum := 0.0
		for i := 1; i < n; i++ {
			for j := 0; j < i; j++ {
				sum += math.Abs(L[i][j])
			}
		}
		if math.Abs(sum*sum) < JACOBI_TOLERANCE {
			found = true
			break
		}

		for p := 0; p < n; p++ {
			for q := p + 1; q < n; q++ {
				numer := L[p][p] - L[q][q]
				denom := L[p][q] + L[q][p]
				if math.Abs(denom) < JACOBI_TOLERANCE {
					continue
				}
				theta := numer / denom
				signTheta := 1.0
				if theta < 0 {
					signTheta = -1.0
				}
				t := signTheta / (math.Abs(theta) + math.Sqrt(theta*theta+1))
				c := 1.0 / math.Sqrt(t*t+1)
				s := t * c

				for pi := 0; pi < n; pi++ {
					for pj := 0; pj < n; pj++ {
						if pi == pj {
							P[pi][pj] = 1.0
						} else {
							P[pi][pj] = 0.0
						}
					}
				}
				P[p][p] = c
				P[p][q] = -s
				P[q][p] = s
				P[q][q] = c

				// L = P.transpose() * L * P
				// V = V * P
				matmul2t(&PT_A, &P, &L)
				matmul2(&L, &PT_A, &P)
				matmul2(&V, &V, &P)
			}
		}
	}

	if !found {
		fmt.Fprintf(os.Stderr,
			"%s: Jacobi eigensolver: max iterations (%d) exceeded.  Non-symmetric input?\n",
			"mlr",
			JACOBI_MAXITER,
		)
		os.Exit(1)
	}

	eigenvalue1 = L[0][0]
	eigenvalue2 = L[1][1]
	abs1 := math.Abs(eigenvalue1)
	abs2 := math.Abs(eigenvalue2)
	if abs1 > abs2 {
		eigenvector1[0] = V[0][0] // Column 0 of V
		eigenvector1[1] = V[1][0]
		eigenvector2[0] = V[0][1] // Column 1 of V
		eigenvector2[1] = V[1][1]
	} else {
		eigenvalue1, eigenvalue2 = eigenvalue2, eigenvalue1
		eigenvector1[0] = V[0][1]
		eigenvector1[1] = V[1][1]
		eigenvector2[0] = V[0][0]
		eigenvector2[1] = V[1][0]
	}

	return eigenvalue1, eigenvalue2, eigenvector1, eigenvector2
}

// C = A * B
func matmul2(
	C *[2][2]float64, // Output
	A *[2][2]float64, // Input
	B *[2][2]float64, // Input
) {
	var T [2][2]float64
	n := 2
	for i := 0; i < n; i++ {
		for j := 0; j < n; j++ {
			sum := 0.0
			for k := 0; k < n; k++ {
				sum += A[i][k] * B[k][j]
			}
			T[i][j] = sum
		}
	}
	// Needs copy in case C's memory is the same as A and/or B
	for i := 0; i < n; i++ {
		for j := 0; j < n; j++ {
			C[i][j] = T[i][j]
		}
	}
}

// C = A^t * B
func matmul2t(
	C *[2][2]float64, // Output
	A *[2][2]float64, // Input
	B *[2][2]float64, // Input
) {
	var T [2][2]float64
	n := 2
	for i := 0; i < n; i++ {
		for j := 0; j < n; j++ {
			sum := 0.0
			for k := 0; k < n; k++ {
				sum += A[k][i] * B[k][j]
			}
			T[i][j] = sum
		}
	}
	// Needs copy in case C's memory is the same as A and/or B
	for i := 0; i < n; i++ {
		for j := 0; j < n; j++ {
			C[i][j] = T[i][j]
		}
	}
}

// ================================================================
// Logisitic regression
//
// Real-valued x_0 .. x_{N-1}
// 0/1-valued  y_0 .. y_{N-1}
// Model p(x_i == 1)  as
//   p(x, m, b) = 1 / (1 + exp(-m*x-b)
// which is the same as
//   log(p/(1-p)) = m*x + b
// then
//   p(x, m, b) = 1 / (1 + exp(-m*x-b)
//              = exp(m*x+b) / (1 + exp(m*x+b)
// and
//   1-p        = exp(-m*x-b) / (1 + exp(-m*x-b)
//              = 1 / (1 + exp(m*x+b)
// Note for reference just below that
//   dp/dm      = -1 / [1 + exp(-m*x-b)]**2 * (-x) * exp(-m*x-b)
//              = [x exp(-m*x-b)) ] / [1 + exp(-m*x-b)]**2
//              = x * p * (1-p)
// and
//   dp/db      = -1 / [1 + exp(-m*x-b)]**2 * (-1) * exp(-m*x-b)
//              = [exp(-m*x-b)) ] / [1 + exp(-m*x-b)]**2
//              = p * (1-p)
// Write p_i for p(x_i, m, b)
//
// Maximum-likelihood equation:
//   L(m, b)    = prod_{i=0}^{N-1} [ p_i**y_i * (1-p_i)**(1-y_i) ]
//
// Log-likelihood equation:
//   ell(m, b)  = sum{i=0}^{N-1} [ y_i log(p_i) + (1-y_i) log(1-p_i) ]
//              = sum{i=0}^{N-1} [ log(1-p_i) + y_i log(p_i/(1-p_i)) ]
//              = sum{i=0}^{N-1} [ log(1-p_i) + y_i*(m*x_i+b) ]
// Differentiate with respect to parameters:
//
//   d ell/dm   = sum{i=0}^{N-1} [ -1/(1-p_i) dp_i/dm + x_i*y_i ]
//              = sum{i=0}^{N-1} [ -1/(1-p_i) x_i*p_i*(1-p_i) + x_i*y_i ]
//              = sum{i=0}^{N-1} [ x_i(y_i-p_i) ]
//
//   d ell/db   = sum{i=0}^{N-1} [ -1/(1-p_i) dp_i/db + y_i ]
//              = sum{i=0}^{N-1} [ -1/(1-p_i) p_i*(1-p_i) + y_i ]
//              = sum{i=0}^{N-1} [ y_i - p_i ]
//
//
//   d2ell/dm2  = sum{i=0}^{N-1} [ -x_i dp_i/dm ]
//              = sum{i=0}^{N-1} [ -x_i**2 * p_i * (1-p_i) ]
//
//   d2ell/dmdb = sum{i=0}^{N-1} [ -x_i dp_i/db ]
//              = sum{i=0}^{N-1} [ -x_i * p_i * (1-p_i) ]
//
//   d2ell/dbdm = sum{i=0}^{N-1} [ -dp_i/dm ]
//              = sum{i=0}^{N-1} [ -x_i * p_i * (1-p_i) ]
//
//   d2ell/db2  = sum{i=0}^{N-1} [ -dp_i/db ]
//              = sum{i=0}^{N-1} [ -p_i * (1-p_i) ]
//
// Newton-Raphson to minimize ell(m, b):
// * Pick m0, b0
// * [m_{j+1], b_{j+1}] = H^{-1} grad ell(m_j, b_j)
// * grad ell =
//   [ d ell/dm ]
//   [ d ell/db ]
// * H = Hessian of ell = Jacobian of grad ell =
//   [ d2ell/dm2  d2ell/dmdb ]
//   [ d2ell/dmdb d2ell/db2  ]

// p(x,m,b) for logistic regression:
func lrp(x, m, b float64) float64 {
	return 1.0 / (1.0 + math.Exp(-m*x-b))
}

// 1 - p(x,m,b) for logistic regression:
func lrq(x, m, b float64) float64 {
	return 1.0 / (1.0 + math.Exp(m*x+b))
}

func LogisticRegression(xs, ys []float64) (m, b float64) {
	m0 := -0.001
	b0 := 0.002
	tol := 1e-9
	maxits := 100
	return logisticRegressionAux(xs, ys, m0, b0, tol, maxits)
}

// Supporting routine for mlr_logistic_regression():
func logisticRegressionAux(
	xs, ys []float64,
	m0, b0, tol float64,
	maxits int,
) (m, b float64) {

	InternalCodingErrorIf(len(xs) != len(ys))
	n := len(xs)

	its := 0
	done := false
	m = m0
	b = b0

	for !done {
		// Compute derivatives
		dldm := 0.0
		dldb := 0.0
		d2ldm2 := 0.0
		d2ldmdb := 0.0
		d2ldb2 := 0.0
		ell0 := 0.0

		for i := 0; i < n; i++ {
			xi := xs[i]
			yi := ys[i]
			pi := lrp(xi, m0, b0)
			qi := lrq(xi, m0, b0)
			dldm += xi * (yi - pi)
			dldb += yi - pi
			piqi := pi * qi
			xipiqi := xi * piqi
			xi2piqi := xi * xipiqi
			d2ldm2 -= xi2piqi
			d2ldmdb -= xipiqi
			d2ldb2 -= piqi
			ell0 += math.Log(qi) + yi*(m0*xi+b0)
		}

		// Form the Hessian
		ha := d2ldm2
		hb := d2ldmdb
		hc := d2ldmdb
		hd := d2ldb2

		// Invert the Hessian
		D := ha*hd - hb*hc
		Hinva := hd / D
		Hinvb := -hb / D
		Hinvc := -hc / D
		Hinvd := ha / D

		// Compute H^-1 times grad ell
		Hinvgradm := Hinva*dldm + Hinvb*dldb
		Hinvgradb := Hinvc*dldm + Hinvd*dldb

		// Update [m,b]
		m = m0 - Hinvgradm
		b = b0 - Hinvgradb

		ell := 0.0
		for i := 0; i < n; i++ {
			xi := xs[i]
			yi := ys[i]
			qi := lrq(xi, m, b)
			ell += math.Log(qi) + yi*(m0*xi+b0)
		}

		// Check for convergence
		dell := math.Max(ell, ell0)
		err := 0.0
		if dell != 0.0 {
			err = math.Abs(ell-ell0) / dell
		}

		if err < tol {
			done = true
		}
		its++
		if its > maxits {
			fmt.Fprintf(os.Stderr,
				"mlr_logistic_regression: Newton-Raphson convergence failed after %d iterations. m=%e, b=%e.\n",
				its, m, b)
			os.Exit(1)
		}

		m0 = m
		b0 = b
	}

	return m, b
}