AI: Default to the 720x720 fit thumb for generating captions #3438 #5011

These changes also auto assign labels based on the generated captions.

Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
Michael Mayer 2025-07-16 14:58:44 +02:00
parent b608eae3e5
commit c05fe19ba9
17 changed files with 308 additions and 47 deletions

View file

@ -10,28 +10,29 @@ var (
Type: ModelTypeLabels,
Name: "NASNet",
Version: ModelVersionMobile,
Resolution: 224,
Resolution: 224, // Cropped image tile with 224x224 pixels.
Tags: []string{"photoprism"},
}
NsfwModel = &Model{
Type: ModelTypeNsfw,
Name: "Nsfw",
Version: ModelVersionNone,
Resolution: 224,
Resolution: 224, // Cropped image tile with 224x224 pixels.
Tags: []string{"serve"},
}
FacenetModel = &Model{
Type: ModelTypeFace,
Name: "FaceNet",
Version: ModelVersionNone,
Resolution: 160,
Resolution: 160, // Cropped image tile with 160x160 pixels.
Tags: []string{"serve"},
}
CaptionModel = &Model{
Type: ModelTypeCaption,
Name: CaptionModelDefault,
Version: ModelVersionLatest,
Prompt: CaptionPromptDefault,
Type: ModelTypeCaption,
Name: CaptionModelDefault,
Version: ModelVersionLatest,
Resolution: 720, // Original aspect ratio, with a max size of 720 x 720 pixels.
Prompt: CaptionPromptDefault,
Service: Service{
// Uri: "http://photoprism-vision:5000/api/v1/vision/caption",
FileScheme: scheme.Data,

View file

@ -38,6 +38,6 @@ func TestThumb(t *testing.T) {
})
t.Run("Caption", func(t *testing.T) {
size := Thumb(ModelTypeCaption)
assert.Equal(t, thumb.SizeTile224, size)
assert.Equal(t, thumb.SizeFit720, size)
})
}

View file

@ -25,6 +25,7 @@ Models:
Instead, start describing the content by identifying the subjects, location, and
any actions that might be performed. Use explicit language to describe the scene
if necessary for a proper understanding.
Resolution: 720
Service:
FileScheme: data
RequestFormat: vision

View file

@ -1,11 +1,13 @@
package api
import (
"github.com/photoprism/photoprism/internal/config"
"github.com/stretchr/testify/assert"
"github.com/tidwall/gjson"
"net/http"
"testing"
"github.com/stretchr/testify/assert"
"github.com/tidwall/gjson"
"github.com/photoprism/photoprism/internal/config"
)
func TestGetClientConfig(t *testing.T) {

View file

@ -73,7 +73,7 @@ func AddPhotoLabel(router *gin.RouterGroup) {
return
}
if photoLabel.Uncertainty > frm.Uncertainty {
if photoLabel.HasID() && photoLabel.Uncertainty > frm.Uncertainty {
if updateErr := photoLabel.Updates(map[string]interface{}{
"Uncertainty": frm.Uncertainty,
"LabelSrc": entity.SrcManual,

View file

@ -218,6 +218,8 @@ func resetCache(c *config.Config) {
} else {
log.Infof("found no cache files")
}
entity.FlushCaches()
}
// resetSidecarJson removes generated *.json sidecar files.

View file

@ -60,6 +60,7 @@ import (
"github.com/photoprism/photoprism/pkg/fs"
"github.com/photoprism/photoprism/pkg/i18n"
"github.com/photoprism/photoprism/pkg/rnd"
"github.com/photoprism/photoprism/pkg/txt"
)
var initThumbsMutex sync.Mutex
@ -88,6 +89,11 @@ func init() {
LowMem = TotalMem < MinMem
}
// Disable entity cache if requested.
if txt.Bool(os.Getenv(EnvVar("disable-photolabelcache"))) {
entity.CachePhotoLabels = false
}
initThumbs()
}

View file

@ -38,3 +38,14 @@ func Log(model, action string, err error) {
log.Errorf("%s: %s (%s)", model, err, action)
}
}
// FlushCaches flushes all in-memory entity caches.
func FlushCaches() {
FlushAlbumCache()
FlushCameraCache()
FlushLensCache()
FlushCountryCache()
FlushLabelCache()
FlushPhotoLabelCache()
FlushSessionCache()
}

View file

@ -1,7 +1,9 @@
package entity
import (
"errors"
"fmt"
"sync"
"time"
gc "github.com/patrickmn/go-cache"
@ -10,45 +12,116 @@ import (
"github.com/photoprism/photoprism/pkg/txt"
)
// labelCache expiration times and cleanup interval.
// Label and PhotoLabel cache expiration times and cleanup interval.
const (
labelDefaultExpiration = 15 * time.Minute
labelErrorExpiration = 5 * time.Minute
labelCleanupInterval = 5 * time.Minute
labelCacheDefaultExpiration = 15 * time.Minute
labelCacheErrorExpiration = 5 * time.Minute
labelCacheCleanupInterval = 10 * time.Minute
photoLabelCacheExpiration = time.Hour
)
// labelCache stores Label entities for faster indexing.
var labelCache = gc.New(labelDefaultExpiration, labelCleanupInterval)
// Cache Label and PhotoLabel entities for faster indexing.
var (
CachePhotoLabels = true
labelCache = gc.New(labelCacheDefaultExpiration, labelCacheCleanupInterval)
photoLabelCache = gc.New(photoLabelCacheExpiration, labelCacheCleanupInterval)
photoLabelCacheMutex = sync.Mutex{}
)
// photoLabelCacheKey returns a string key for the photoLabelCache.
func photoLabelCacheKey(photoId, labelId uint) string {
return fmt.Sprintf("%d-%d", photoId, labelId)
}
// FlushLabelCache removes all cached Label entities from the cache.
func FlushLabelCache() {
labelCache.Flush()
}
// FlushPhotoLabelCache removes all cached PhotoLabel entities from the cache.
func FlushPhotoLabelCache() {
if !CachePhotoLabels {
return
}
photoLabelCacheMutex.Lock()
defer photoLabelCacheMutex.Unlock()
photoLabelCache.Flush()
}
// FlushCachedPhotoLabel deletes a cached PhotoLabel entity from the cache.
func FlushCachedPhotoLabel(m *PhotoLabel) {
if m == nil || !CachePhotoLabels {
return
} else if m.HasID() {
photoLabelCache.Delete(photoLabelCacheKey(m.PhotoID, m.LabelID))
}
}
// WarmPhotoLabelCache warms up the PhotoLabel cache.
func WarmPhotoLabelCache() (err error) {
if !CachePhotoLabels {
return nil
}
photoLabelCacheMutex.Lock()
defer photoLabelCacheMutex.Unlock()
var photoLabels []PhotoLabel
// Find photo label assignments.
if err = UnscopedDb().
Raw("SELECT * FROM photos_labels").
Scan(&photoLabels).Error; err != nil {
return err
}
// Cache existing label assignments.
for _, m := range photoLabels {
photoLabelCache.SetDefault(m.CacheKey(), m)
}
return nil
}
// FindLabel find the matching label based on the name provided or an error if not found.
func FindLabel(name string, cached bool) (*Label, error) {
labelSlug := txt.Slug(name)
if name == "" {
return &Label{}, errors.New("missing label name")
}
if labelSlug == "" {
return &Label{}, fmt.Errorf("invalid label slug %s", clean.LogQuote(labelSlug))
// Use the label slug as natural key cache.
cacheKey := txt.Slug(name)
if cacheKey == "" {
return &Label{}, fmt.Errorf("invalid label slug %s", clean.LogQuote(cacheKey))
}
// Return cached label, if found.
if cached {
if cacheData, ok := labelCache.Get(labelSlug); ok {
log.Tracef("label: cache hit for %s", labelSlug)
if cacheData, ok := labelCache.Get(cacheKey); ok {
log.Tracef("label: cache hit for %s", cacheKey)
if result := cacheData.(*Label); !result.HasID() {
return &Label{}, fmt.Errorf("label not found")
} else {
// Get cached data.
if result := cacheData.(*Label); result.HasID() {
// Return cached entity.
return result, nil
} else {
// Return cached "not found" error.
return &Label{}, fmt.Errorf("label not found")
}
}
}
// Fetch and cache label from database.
// Fetch and cache label.
result := &Label{}
if find := Db().First(result, "(label_slug <> '' AND label_slug = ? OR custom_slug <> '' AND custom_slug = ?)", labelSlug, labelSlug); find.RecordNotFound() {
labelCache.Set(labelSlug, result, labelErrorExpiration)
if find := Db().First(result, "(label_slug <> '' AND label_slug = ? OR custom_slug <> '' AND custom_slug = ?)", cacheKey, cacheKey); find.RecordNotFound() {
labelCache.Set(cacheKey, result, labelCacheErrorExpiration)
return result, fmt.Errorf("label not found")
} else if find.Error != nil {
labelCache.Set(labelSlug, result, labelErrorExpiration)
labelCache.Set(cacheKey, result, labelCacheErrorExpiration)
return result, find.Error
} else {
labelCache.SetDefault(result.LabelSlug, result)
@ -57,7 +130,52 @@ func FindLabel(name string, cached bool) (*Label, error) {
return result, nil
}
// FlushLabelCache removes all cached Label entities from the cache.
func FlushLabelCache() {
labelCache.Flush()
// FindPhotoLabel find a photo label assignment for the specified IDs.
func FindPhotoLabel(photoId, labelId uint, cached bool) (*PhotoLabel, error) {
if photoId == 0 {
return &PhotoLabel{}, errors.New("invalid photo id")
} else if labelId == 0 {
return &PhotoLabel{}, errors.New("invalid label id")
}
cacheKey := photoLabelCacheKey(photoId, labelId)
if cacheKey == "" {
return &PhotoLabel{}, fmt.Errorf("invalid cache key %s", clean.LogQuote(cacheKey))
}
// Return cached label, if found.
if cached && CachePhotoLabels {
if cacheData, ok := photoLabelCache.Get(cacheKey); ok {
log.Tracef("photo-label: cache hit for %s", cacheKey)
// Get cached data.
if result := cacheData.(PhotoLabel); result.HasID() {
// Return cached entity.
return &result, nil
} else {
// Return cached "not found" error.
return &PhotoLabel{}, fmt.Errorf("photo-label not found")
}
}
}
// Fetch and cache photo-label.
result := &PhotoLabel{}
if find := Db().First(result, "photo_id = ? AND label_id = ?", photoId, labelId); find.RecordNotFound() {
if CachePhotoLabels {
photoLabelCache.Set(cacheKey, *result, labelCacheErrorExpiration)
}
return result, fmt.Errorf("photo-label not found")
} else if find.Error != nil {
if CachePhotoLabels {
photoLabelCache.Set(cacheKey, *result, labelCacheErrorExpiration)
}
return result, find.Error
} else if CachePhotoLabels {
photoLabelCache.SetDefault(cacheKey, *result)
}
return result, nil
}

View file

@ -38,3 +38,54 @@ func TestFindLabel(t *testing.T) {
assert.NotNil(t, result)
})
}
func TestFindPhotoLabel(t *testing.T) {
t.Run("Success", func(t *testing.T) {
if err := WarmPhotoLabelCache(); err != nil {
t.Fatal(err)
}
// See PhotoFixtures and LabelFixtures for test data.
m := &PhotoLabel{PhotoID: 1000000, LabelID: 1000001}
cached, err := FindPhotoLabel(m.PhotoID, m.LabelID, true)
assert.NoError(t, err)
assert.Equal(t, m.LabelID, cached.LabelID)
assert.Equal(t, m.PhotoID, cached.PhotoID)
assert.Equal(t, SrcImage, cached.LabelSrc)
assert.Equal(t, 38, cached.Uncertainty)
FlushPhotoLabelCache()
cached, err = FindPhotoLabel(m.PhotoID, m.LabelID, true)
assert.NoError(t, err)
assert.Equal(t, m.LabelID, cached.LabelID)
assert.Equal(t, m.PhotoID, cached.PhotoID)
assert.Equal(t, SrcImage, cached.LabelSrc)
assert.Equal(t, 38, cached.Uncertainty)
})
t.Run("NotFound", func(t *testing.T) {
result, err := FindPhotoLabel(1, 99999999, true)
assert.Error(t, err)
assert.NotNil(t, result)
result, err = FindPhotoLabel(1, 99999999, false)
assert.Error(t, err)
assert.NotNil(t, result)
result, err = FindPhotoLabel(1, 99999999, true)
assert.Error(t, err)
assert.NotNil(t, result)
})
t.Run("InvalidID", func(t *testing.T) {
result, err := FindPhotoLabel(0, 0, true)
assert.Error(t, err)
assert.NotNil(t, result)
result, err = FindPhotoLabel(0, 0, false)
assert.Error(t, err)
assert.NotNil(t, result)
result, err = FindPhotoLabel(0, 0, true)
assert.Error(t, err)
assert.NotNil(t, result)
})
}

View file

@ -778,7 +778,7 @@ func (m *Photo) AddLabels(labels classify.Labels) {
continue
}
if photoLabel.Uncertainty > classifyLabel.Uncertainty && photoLabel.Uncertainty < 100 {
if photoLabel.HasID() && photoLabel.Uncertainty > classifyLabel.Uncertainty && photoLabel.Uncertainty < 100 {
if err := photoLabel.Updates(map[string]interface{}{
"Uncertainty": classifyLabel.Uncertainty,
"LabelSrc": classifyLabel.Source,

View file

@ -1,7 +1,6 @@
package entity
import (
"github.com/photoprism/photoprism/internal/ai/classify"
"github.com/photoprism/photoprism/pkg/txt"
)
@ -61,10 +60,22 @@ func (m *Photo) UpdateCaptionLabels() error {
return nil
} else if !m.HasCaption() {
return nil
} else if SrcPriority[m.GetCaptionSrc()] < SrcPriority[SrcMeta] {
}
captionSrcPriority := SrcPriority[m.GetCaptionSrc()]
if captionSrcPriority < SrcPriority[SrcImage] {
return nil
}
var uncertainty int
if captionSrcPriority < SrcPriority[SrcMeta] {
uncertainty = 20
} else {
uncertainty = 15
}
keywords := txt.UniqueKeywords(m.GetCaption())
var labelIds []uint
@ -76,9 +87,9 @@ func (m *Photo) UpdateCaptionLabels() error {
}
labelIds = append(labelIds, label.ID)
FirstOrCreatePhotoLabel(NewPhotoLabel(m.ID, label.ID, 15, classify.SrcCaption))
FirstOrCreatePhotoLabel(NewPhotoLabel(m.ID, label.ID, uncertainty, SrcCaption))
}
}
return Db().Where("label_src = ? AND photo_id = ? AND label_id NOT IN (?)", classify.SrcCaption, m.ID, labelIds).Delete(&PhotoLabel{}).Error
return Db().Where("label_src = ? AND photo_id = ? AND label_id NOT IN (?)", SrcCaption, m.ID, labelIds).Delete(&PhotoLabel{}).Error
}

View file

@ -1,6 +1,8 @@
package entity
import (
"github.com/jinzhu/gorm"
"github.com/photoprism/photoprism/internal/ai/classify"
)
@ -36,12 +38,26 @@ func NewPhotoLabel(photoID, labelID uint, uncertainty int, source string) *Photo
// Updates multiple columns in the database.
func (m *PhotoLabel) Updates(values interface{}) error {
return UnscopedDb().Model(m).UpdateColumns(values).Error
if err := UnscopedDb().Model(m).UpdateColumns(values).Error; err != nil {
return err
}
FlushCachedPhotoLabel(m)
return nil
}
// Update a column in the database.
func (m *PhotoLabel) Update(attr string, value interface{}) error {
return UnscopedDb().Model(m).UpdateColumn(attr, value).Error
if err := UnscopedDb().Model(m).UpdateColumn(attr, value).Error; err != nil {
return err
}
FlushCachedPhotoLabel(m)
return nil
}
// AfterUpdate flushes the label cache when a label is updated.
func (m *PhotoLabel) AfterUpdate(tx *gorm.DB) (err error) {
FlushCachedPhotoLabel(m)
return
}
// Save updates the record in the database or inserts a new record if it does not already exist.
@ -64,26 +80,52 @@ func (m *PhotoLabel) Create() error {
return Db().Create(m).Error
}
// AfterCreate sets the New column used for database callback
func (m *PhotoLabel) AfterCreate(scope *gorm.Scope) error {
FlushCachedPhotoLabel(m)
return nil
}
// Delete deletes the label reference.
func (m *PhotoLabel) Delete() error {
FlushCachedPhotoLabel(m)
return Db().Delete(m).Error
}
// AfterDelete flushes the label cache when a label is deleted.
func (m *PhotoLabel) AfterDelete(tx *gorm.DB) (err error) {
FlushCachedPhotoLabel(m)
return
}
// HasID tests if both a photo and label ID are set.
func (m *PhotoLabel) HasID() bool {
if m == nil {
return false
}
return m.PhotoID > 0 && m.LabelID > 0
}
// CacheKey returns a string key for caching the entity.
func (m *PhotoLabel) CacheKey() string {
return photoLabelCacheKey(m.PhotoID, m.LabelID)
}
// FirstOrCreatePhotoLabel returns the existing row, inserts a new row or nil in case of errors.
func FirstOrCreatePhotoLabel(m *PhotoLabel) *PhotoLabel {
if m == nil {
return nil
} else if m.PhotoID < 1 || m.LabelID < 1 {
} else if !m.HasID() {
return nil
}
result := &PhotoLabel{}
if err := Db().Where("photo_id = ? AND label_id = ?", m.PhotoID, m.LabelID).First(result).Error; err == nil {
// Try to find and return an existing label. Otherwise, create a new one and return it.
if result, err := FindPhotoLabel(m.PhotoID, m.LabelID, true); err == nil {
return result
} else if createErr := m.Create(); createErr == nil {
return m
} else if err = Db().Where("photo_id = ? AND label_id = ?", m.PhotoID, m.LabelID).First(result).Error; err == nil {
} else if result, err = FindPhotoLabel(m.PhotoID, m.LabelID, false); err == nil {
return result
} else {
log.Errorf("photo-label: %s (find or create)", createErr)

View file

@ -188,7 +188,7 @@ func IndexedFiles() (result FileMap, err error) {
// Query known duplicates.
var duplicates []File
if err := UnscopedDb().Raw("SELECT file_root, file_name, mod_time FROM duplicates").Scan(&duplicates).Error; err != nil {
if err = UnscopedDb().Raw("SELECT file_root, file_name, mod_time FROM duplicates").Scan(&duplicates).Error; err != nil {
return result, err
}
@ -199,7 +199,7 @@ func IndexedFiles() (result FileMap, err error) {
// Query indexed files.
var files []File
if err := UnscopedDb().Raw("SELECT file_root, file_name, mod_time FROM files WHERE file_missing = 0 AND deleted_at IS NULL").Scan(&files).Error; err != nil {
if err = UnscopedDb().Raw("SELECT file_root, file_name, mod_time FROM files WHERE file_missing = 0 AND deleted_at IS NULL").Scan(&files).Error; err != nil {
return result, err
}

View file

@ -40,6 +40,7 @@ func NewIndex(conf *config.Config, convert *Convert, files *Files, photos *Photo
return nil
}
// Create new indexer instance.
i := &Index{
conf: conf,
convert: convert,
@ -49,6 +50,11 @@ func NewIndex(conf *config.Config, convert *Convert, files *Files, photos *Photo
findLabels: !conf.DisableClassification(),
}
// Warm up the cache.
if err := entity.WarmPhotoLabelCache(); err != nil {
log.Warnf("index: %s (cache warm-up)", err)
}
return i
}

View file

@ -23,6 +23,7 @@ func (ind *Index) Caption(file *MediaFile) (caption *vision.CaptionResult, err e
// Get matching labels from computer vision model.
if caption, _, err = vision.Caption(vision.Files{fileName}, media.SrcLocal); err != nil {
// Failed.
} else if caption.Text != "" {
log.Infof("vision: generated caption for %s [%s]", clean.Log(file.BaseName()), time.Since(start))
}

View file

@ -80,6 +80,13 @@ func Find(pixels int) (name Name, size Size) {
// Vision returns a suitable tile size for computer vision applications.
func Vision(resolution int) (size Size) {
// If specifically requested, return the 720x720 fit size,
// which should always exist.
if resolution == SizeFit720.Width {
return SizeFit720
}
// Check existing tile sizes.
for _, size = range All {
if size.Height != size.Width {
continue
@ -92,5 +99,7 @@ func Vision(resolution int) (size Size) {
}
}
// If no other size matches,
// return the default size.
return SizeTile224
}