AI: Rename SampleRadius to ClusterRadius and increase cap to 0.42 #5167

Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
Michael Mayer 2025-10-26 10:08:59 +01:00
parent c53ac3353b
commit 72a9a53426
4 changed files with 8 additions and 8 deletions

View file

@ -56,7 +56,7 @@ All embeddings, regardless of origin, are normalized to unit length (‖x‖₂
- Static datasets (children/background samples) and random generators now normalize their entries after perturbation.
- `photoprism faces audit --fix` re-normalizes persisted embeddings, rekeys face IDs, and re-links markers (ID + `FaceDist`) so historical data adopts the canonical unit-length vectors.
- `Faces.Match` pre-filters matchable clusters, keeps an in-memory veto list for freshly cleared markers, and caches embeddings to avoid redundant distance checks; `BenchmarkSelectBestFace` (1024 faces) now reports a bucket size of ~16 candidates out of 1024 (≈98% fewer distance evaluations) at ≈0.55ms/op with zero allocations.
- Face clusters update their sample statistics (`Samples`, `SampleRadius`) from the latest matches via `Face.UpdateMatchStats`, avoiding stale radii during optimize loops.
- Face clusters update their sample statistics (`Samples`, `ClusterRadius`) from the latest matches via `Face.UpdateMatchStats`, avoiding stale radii during optimize loops. The radius is capped at **0.42** so automatic matches accept new embeddings up to `ClusterRadius + MatchDist` (≈0.88) away from the centroid.
- Cluster materialisation now pre-sizes buffers; `BenchmarkClusterMaterialize` reports ~14.8µs/op with 64 allocations (≈56KB) versus the legacy ~29.8µs/op with 384 allocations (≈105KB).
This guarantees that Euclidean distance comparisons are equivalent to cosine comparisons, aligning our thresholds with [FaceNet](https://maucher.pages.mi.hdm-stuttgart.de/orbook/face/faceRecognition.html) literature.

View file

@ -26,6 +26,8 @@ var (
ClusterSizeThreshold = 50
// ClusterDist is the similarity distance threshold that defines the cluster core.
ClusterDist = 0.64
// ClusterRadius is the maximum normalized distance for cluster samples.
ClusterRadius = 0.42
// MatchDist is the distance offset threshold used to match new faces with existing clusters.
MatchDist = 0.46
// CollisionDist is the minimum distance under which embeddings cannot be distinguished.
@ -34,8 +36,6 @@ var (
ClusterCore = 4
// SampleThreshold is the number of faces required before automatic clustering begins.
SampleThreshold = 2 * ClusterCore
// SampleRadius is the maximum normalized distance for cluster samples.
SampleRadius = 0.35
// Epsilon is the numeric tolerance used during cluster comparisons.
Epsilon = 0.01
// SkipChildren controls whether the clustering step omits faces from child samples by default.

View file

@ -90,8 +90,8 @@ func (m *Face) SetEmbeddings(embeddings face.Embeddings) (err error) {
}
// Limit sample radius to reduce false positives.
if m.SampleRadius > face.SampleRadius {
m.SampleRadius = face.SampleRadius
if m.SampleRadius > face.ClusterRadius {
m.SampleRadius = face.ClusterRadius
}
m.EmbeddingJSON, err = json.Marshal(m.embedding)
@ -286,8 +286,8 @@ func (m *Face) UpdateMatchStats(samples int, maxDistance float64) error {
radius := maxDistance + face.Epsilon
if radius > face.SampleRadius {
radius = face.SampleRadius
if radius > face.ClusterRadius {
radius = face.ClusterRadius
}
if radius < 0 {

View file

@ -193,7 +193,7 @@ func TestFace_SetEmbeddings(t *testing.T) {
require.NoError(t, m.SetEmbeddings(embeddings))
require.Equal(t, 2, m.Samples)
assert.InDelta(t, face.SampleRadius, m.SampleRadius, 1e-9)
assert.InDelta(t, face.ClusterRadius, m.SampleRadius, 1e-9)
})
}