Faces: Refactor facial recognition to use new vector package #4691

This is work in progress and not finished yet.

Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
Michael Mayer 2025-02-03 16:51:41 +01:00
parent 5738d838e5
commit 08ca2a9d4b
20 changed files with 313 additions and 225 deletions

View file

@ -5,26 +5,41 @@ import (
"fmt"
"strings"
"github.com/photoprism/photoprism/pkg/clusters"
"github.com/photoprism/photoprism/pkg/vector"
)
// Embedding represents a face embedding.
type Embedding []float64
type Embedding struct {
Vector vector.Vector
}
var NullEmbedding = make(Embedding, 512)
// Dim defines the number of face embedding vector dimensions.
const Dim = 512
var (
NullVector = make(vector.Vector, Dim)
NullEmbedding = Embedding{Vector: NullVector}
)
// NewEmbedding creates a new embedding from an inference result.
func NewEmbedding(inference []float32) Embedding {
result := make(Embedding, len(inference))
var v float32
var i int
for i, v = range inference {
result[i] = float64(v)
func NewEmbedding(values interface{}) Embedding {
if values == nil {
return NullEmbedding
} else if v, err := vector.NewVector(values); err != nil {
return NullEmbedding
} else {
return Embedding{Vector: v}
}
}
return result
// Null checks if this is a null embedding.
func (m Embedding) Null() bool {
return len(m.Vector) == 0
}
// Dim returns the dimensions of the embedded vector.
func (m Embedding) Dim() int {
return len(m.Vector)
}
// Kind returns the type of face e.g. regular, kids, or ignored.
@ -50,31 +65,46 @@ func (m Embedding) CanMatch() bool {
// Dist calculates the distance to another face embedding.
func (m Embedding) Dist(other Embedding) float64 {
if len(other) == 0 || len(m) != len(other) {
if len(other.Vector) == 0 || len(m.Vector) != len(other.Vector) {
return -1
}
return clusters.EuclideanDist(m, other)
// TODO: Use CosineDist()
return m.Vector.EuclideanDist(other.Vector)
}
// Magnitude returns the face embedding vector length (magnitude).
func (m Embedding) Magnitude() float64 {
return m.Dist(NullEmbedding)
// Norm returns the face embedding vector size (magnitude),
// see https://builtin.com/data-science/vector-norms.
func (m Embedding) Norm() float64 {
return m.Vector.EuclideanNorm()
}
// MarshalJSON returns the face embedding as JSON.
func (m Embedding) MarshalJSON() ([]byte, error) {
if len(m.Vector) < 1 {
return []byte(""), nil
}
if result, err := json.Marshal(m.Vector); err != nil {
return []byte(""), err
} else {
return result, nil
}
}
// UnmarshalJSON sets the embedding vector as JSON.
func (m Embedding) UnmarshalJSON(b []byte) error {
if len(b) < 1 {
return nil
}
return json.Unmarshal(b, &m.Vector)
}
// JSON returns the face embedding as JSON bytes.
func (m Embedding) JSON() []byte {
var noResult = []byte("")
if len(m) < 1 {
return noResult
}
if result, err := json.Marshal(m); err != nil {
return noResult
} else {
return result
}
result, _ := m.MarshalJSON()
return result
}
// UnmarshalEmbedding parses a single face embedding JSON.
@ -85,7 +115,13 @@ func UnmarshalEmbedding(s string) (result Embedding, err error) {
return result, fmt.Errorf("cannot unmarshal embedding, invalid json provided")
}
err = json.Unmarshal([]byte(s), &result)
var v = make([]float64, Dim)
return result, err
err = json.Unmarshal([]byte(s), &v)
if err != nil {
return NewEmbedding(v), err
}
return NewEmbedding(v), nil
}

File diff suppressed because one or more lines are too long

View file

@ -5,22 +5,33 @@ import (
"fmt"
"strings"
"github.com/montanaflynn/stats"
"github.com/photoprism/photoprism/pkg/clusters"
"github.com/photoprism/photoprism/pkg/vector"
)
// Embeddings represents a face embedding cluster.
type Embeddings []Embedding
// NewEmbeddings creates a new embeddings from inference results.
func NewEmbeddings(inference [][]float32) Embeddings {
result := make(Embeddings, len(inference))
// NewEmbeddings creates a new embeddings from float64 slices.
func NewEmbeddings(values [][]float64) Embeddings {
result := make(Embeddings, 0, len(values))
var i int
for i = range values {
result = append(result, NewEmbedding(values[i]))
}
return result
}
// NewEmbeddingsFromInference creates a new embeddings from float32 inference slices.
func NewEmbeddingsFromInference(values [][]float32) Embeddings {
result := make(Embeddings, len(values))
var v []float32
var i int
for i, v = range inference {
for i, v = range values {
e := NewEmbedding(v)
if e.CanMatch() {
@ -37,7 +48,7 @@ func (embeddings Embeddings) Empty() bool {
return true
}
return len(embeddings[0]) < 1
return embeddings[0].Dim() < 1
}
// Count returns the number of embeddings.
@ -79,7 +90,7 @@ func (embeddings Embeddings) Float64() [][]float64 {
result := make([][]float64, len(embeddings))
for i, e := range embeddings {
result[i] = e
result[i] = e.Vector
}
return result
@ -124,6 +135,17 @@ func (embeddings Embeddings) JSON() []byte {
}
}
// MarshalJSON returns the face embeddings as JSON.
func (embeddings Embeddings) MarshalJSON() ([]byte, error) {
values := make(vector.Vectors, len(embeddings))
for i := range embeddings {
values[i] = embeddings[i].Vector
}
return json.Marshal(values)
}
// EmbeddingsMidpoint returns the embeddings vector midpoint.
func EmbeddingsMidpoint(embeddings Embeddings) (result Embedding, radius float64, count int) {
// Return if there are no embeddings.
@ -140,33 +162,31 @@ func EmbeddingsMidpoint(embeddings Embeddings) (result Embedding, radius float64
return embeddings[0], 0.0, 1
}
dim := len(embeddings[0])
dim := embeddings[0].Dim()
// No embedding values?
if dim == 0 {
return Embedding{}, 0.0, count
}
result = make(Embedding, dim)
// Create a new embedding with the given vector dimension.
result = NewEmbedding(vector.NullVector(dim))
// The mean of a set of vectors is calculated component-wise.
// Calculate mean values.
// TODO: Improve to get better matching results.
for i := 0; i < dim; i++ {
values := make(stats.Float64Data, count)
values := make(vector.Vector, count)
for j := 0; j < count; j++ {
values[j] = embeddings[j][i]
values[j] = embeddings[j].Vector[i]
}
if m, err := stats.Mean(values); err != nil {
log.Warnf("embeddings: %s", err)
} else {
result[i] = m
}
result.Vector[i] = values.Mean()
}
// Radius is the max embedding distance + 0.01 from result.
for _, emb := range embeddings {
if d := clusters.EuclideanDist(result, emb); d > radius {
if d := result.Dist(emb); d > radius {
radius = d + 0.01
}
}
@ -182,7 +202,11 @@ func UnmarshalEmbeddings(s string) (result Embeddings, err error) {
return result, fmt.Errorf("cannot unmarshal empeddings, invalid json provided")
}
err = json.Unmarshal([]byte(s), &result)
var values [][]float64
return result, err
if err = json.Unmarshal([]byte(s), &values); err != nil {
return result, err
}
return NewEmbeddings(values), nil
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -48,14 +48,14 @@ func RandomEmbeddings(n int, k Kind) (result Embeddings) {
// RandomEmbedding returns a random embedding for testing.
func RandomEmbedding() (result Embedding) {
result = make(Embedding, 512)
result = NewEmbedding(nil)
d := 64 / 512.0
for {
i := 0
for i = range result {
result[i] = RandomFloat64(0, d)
for i = range result.Vector {
result.Vector[i] = RandomFloat64(0, d)
}
if !result.SkipMatching() {
break
@ -67,14 +67,14 @@ func RandomEmbedding() (result Embedding) {
// RandomKidsEmbedding returns a random kids embedding for testing.
func RandomKidsEmbedding() (result Embedding) {
result = make(Embedding, 512)
result = NewEmbedding(nil)
d := 0.1 / 512.0
n := 1 + rand.IntN(len(KidsEmbeddings)-1)
e := KidsEmbeddings[n]
for i := range result {
result[i] = RandomFloat64(e[i], d)
for i := range result.Vector {
result.Vector[i] = RandomFloat64(e.Vector[i], d)
}
return result
@ -82,14 +82,14 @@ func RandomKidsEmbedding() (result Embedding) {
// RandomIgnoredEmbedding returns a random ignored embedding for testing.
func RandomIgnoredEmbedding() (result Embedding) {
result = make(Embedding, 512)
result = NewEmbedding(nil)
d := 0.1 / 512.0
n := 1 + rand.IntN(len(IgnoredEmbeddings)-1)
e := IgnoredEmbeddings[n]
for i := range result {
result[i] = RandomFloat64(e[i], d)
for i := range result.Vector {
result.Vector[i] = RandomFloat64(e.Vector[i], d)
}
return result

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -125,7 +125,7 @@ func (t *Net) getEmbeddings(img image.Image) Embeddings {
if len(output) < 1 {
log.Errorf("faces: inference failed, no output")
} else {
return NewEmbeddings(output[0].Value().([][]float32))
return NewEmbeddingsFromInference(output[0].Value().([][]float32))
}
return nil

View file

@ -80,7 +80,7 @@ func TestNet(t *testing.T) {
// t.Logf("FACE %d IN %s: %#v", i, fileName, f.Embeddings)
embeddings[faceIndices[baseName][i]] = f.Embeddings[0]
} else {
embeddings[faceIndices[baseName][i]] = nil
embeddings[faceIndices[baseName][i]] = NullEmbedding
}
}
}

View file

@ -83,7 +83,7 @@ func (m *Face) SetEmbeddings(embeddings face.Embeddings) (err error) {
m.embedding, m.SampleRadius, m.Samples = face.EmbeddingsMidpoint(embeddings)
if len(m.embedding) != len(face.NullEmbedding) {
if m.embedding.Dim() != face.Dim {
return fmt.Errorf("embedding has invalid number of values")
}
@ -122,7 +122,7 @@ func (m *Face) Matched() error {
func (m *Face) Embedding() face.Embedding {
if len(m.EmbeddingJSON) == 0 {
return face.Embedding{}
} else if len(m.embedding) > 0 {
} else if m.embedding.Dim() > 0 {
return m.embedding
} else if err := json.Unmarshal(m.EmbeddingJSON, &m.embedding); err != nil {
log.Errorf("failed parsing face embedding json: %s", err)
@ -142,7 +142,7 @@ func (m *Face) Match(embeddings face.Embeddings) (match bool, dist float64) {
faceEmbedding := m.Embedding()
if len(faceEmbedding) == 0 {
if faceEmbedding.Dim() == 0 {
// Should never happen.
return false, dist
}

File diff suppressed because one or more lines are too long

View file

@ -10,9 +10,9 @@ func TestFaces_Embeddings(t *testing.T) {
m := FaceFixtures.Get("joe-biden")
m1 := FaceFixtures.Get("jane-doe")
r := Faces{m, m1}.Embeddings()
len1 := len(m.Embedding())
len2 := len(m1.Embedding())
assert.Equal(t, len1+len2, len(r[0])+len(r[1]))
len1 := m.Embedding().Dim()
len2 := m1.Embedding().Dim()
assert.Equal(t, len1+len2, len(r[0].Vector)+len(r[1].Vector))
}
func TestFaces_IDs(t *testing.T) {

File diff suppressed because one or more lines are too long

View file

@ -272,7 +272,7 @@ func (m *Marker) SetFace(f *Face, dist float64) (updated bool, err error) {
// Calculate the smallest distance to embeddings.
for _, e := range m.Embeddings() {
if len(e) != len(faceEmbedding) {
if e.Dim() != faceEmbedding.Dim() {
continue
}
@ -403,11 +403,12 @@ func (m *Marker) Create() error {
// Embeddings returns parsed marker embeddings.
func (m *Marker) Embeddings() face.Embeddings {
var err error
if len(m.EmbeddingsJSON) == 0 {
return face.Embeddings{}
} else if len(m.embeddings) > 0 {
return m.embeddings
} else if err := json.Unmarshal(m.EmbeddingsJSON, &m.embeddings); err != nil {
} else if m.embeddings, err = face.UnmarshalEmbeddings(string(m.EmbeddingsJSON)); err != nil {
log.Errorf("markers: %s while parsing embeddings json", err)
}

View file

@ -431,7 +431,7 @@ func TestMarker_Embeddings(t *testing.T) {
t.Run("Success", func(t *testing.T) {
m := MarkerFixtures.Get("1000003-4")
assert.Equal(t, 0.013083286379677253, m.Embeddings()[0][0])
assert.Equal(t, 0.013083286379677253, m.Embeddings()[0].Vector[0])
})
t.Run("empty embedding", func(t *testing.T) {
m := Marker{}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long