AI: Rename SampleRadius to ClusterRadius and increase cap to 0.42 #5167

Signed-off-by: Michael Mayer <michael@photoprism.app>
2026-01-23 02:24:24 +00:00 · 2025-10-26 10:08:59 +01:00 · 2025-10-26 10:08:59 +01:00 · 72a9a53426
commit 72a9a53426
parent c53ac3353b
4 changed files with 8 additions and 8 deletions
--- a/internal/ai/face/README.md
+++ b/internal/ai/face/README.md
@ -56,7 +56,7 @@ All embeddings, regardless of origin, are normalized to unit length (‖x‖₂
 - Static datasets (children/background samples) and random generators now normalize their entries after perturbation.
 - `photoprism faces audit --fix` re-normalizes persisted embeddings, rekeys face IDs, and re-links markers (ID + `FaceDist`) so historical data adopts the canonical unit-length vectors.
 - `Faces.Match` pre-filters matchable clusters, keeps an in-memory veto list for freshly cleared markers, and caches embeddings to avoid redundant distance checks; `BenchmarkSelectBestFace` (1024 faces) now reports a bucket size of ~16 candidates out of 1024 (≈98 % fewer distance evaluations) at ≈0.55 ms/op with zero allocations.
- Face clusters update their sample statistics (`Samples`, `SampleRadius`) from the latest matches via `Face.UpdateMatchStats`, avoiding stale radii during optimize loops.
+- Face clusters update their sample statistics (`Samples`, `ClusterRadius`) from the latest matches via `Face.UpdateMatchStats`, avoiding stale radii during optimize loops. The radius is capped at **0.42** so automatic matches accept new embeddings up to `ClusterRadius + MatchDist` (≈0.88) away from the centroid.
 - Cluster materialisation now pre-sizes buffers; `BenchmarkClusterMaterialize` reports ~14.8 µs/op with 64 allocations (≈56 KB) versus the legacy ~29.8 µs/op with 384 allocations (≈105 KB).

 This guarantees that Euclidean distance comparisons are equivalent to cosine comparisons, aligning our thresholds with [FaceNet](https://maucher.pages.mi.hdm-stuttgart.de/orbook/face/faceRecognition.html) literature.
--- a/internal/ai/face/config.go
+++ b/internal/ai/face/config.go
@ -26,6 +26,8 @@ var (
 	ClusterSizeThreshold = 50
 	// ClusterDist is the similarity distance threshold that defines the cluster core.
 	ClusterDist = 0.64
+	// ClusterRadius is the maximum normalized distance for cluster samples.
+	ClusterRadius = 0.42
 	// MatchDist is the distance offset threshold used to match new faces with existing clusters.
 	MatchDist = 0.46
 	// CollisionDist is the minimum distance under which embeddings cannot be distinguished.
@ -34,8 +36,6 @@ var (
 	ClusterCore = 4
 	// SampleThreshold is the number of faces required before automatic clustering begins.
 	SampleThreshold = 2 * ClusterCore
-	// SampleRadius is the maximum normalized distance for cluster samples.
-	SampleRadius = 0.35
 	// Epsilon is the numeric tolerance used during cluster comparisons.
 	Epsilon = 0.01
 	// SkipChildren controls whether the clustering step omits faces from child samples by default.
--- a/internal/entity/face.go
+++ b/internal/entity/face.go
@ -90,8 +90,8 @@ func (m *Face) SetEmbeddings(embeddings face.Embeddings) (err error) {
 	}

 	// Limit sample radius to reduce false positives.
-	if m.SampleRadius > face.SampleRadius {
-		m.SampleRadius = face.SampleRadius
+	if m.SampleRadius > face.ClusterRadius {
+		m.SampleRadius = face.ClusterRadius
 	}

 	m.EmbeddingJSON, err = json.Marshal(m.embedding)
@ -286,8 +286,8 @@ func (m *Face) UpdateMatchStats(samples int, maxDistance float64) error {

 	radius := maxDistance + face.Epsilon

-	if radius > face.SampleRadius {
-		radius = face.SampleRadius
+	if radius > face.ClusterRadius {
+		radius = face.ClusterRadius
 	}

 	if radius < 0 {
--- a/internal/entity/face_test.go
+++ b/internal/entity/face_test.go
@ -193,7 +193,7 @@ func TestFace_SetEmbeddings(t *testing.T) {

 		require.NoError(t, m.SetEmbeddings(embeddings))
 		require.Equal(t, 2, m.Samples)
-		assert.InDelta(t, face.SampleRadius, m.SampleRadius, 1e-9)
+		assert.InDelta(t, face.ClusterRadius, m.SampleRadius, 1e-9)
 	})
 }