mirror of
https://github.com/photoprism/photoprism.git
synced 2026-01-23 02:24:24 +00:00
Merge branch 'develop' into feature/batch-edit
This commit is contained in:
commit
d2541e674a
59 changed files with 2571 additions and 216 deletions
|
|
@ -1,6 +1,6 @@
|
|||
# PhotoPrism® Repository Guidelines
|
||||
|
||||
**Last Updated:** November 12, 2025
|
||||
**Last Updated:** November 14, 2025
|
||||
|
||||
## Purpose
|
||||
|
||||
|
|
@ -17,6 +17,7 @@ Learn more: https://agents.md/
|
|||
- REST API: https://docs.photoprism.dev/ (Swagger), https://docs.photoprism.app/developer-guide/api/ (Docs)
|
||||
- Code Maps: [`CODEMAP.md`](CODEMAP.md) (Backend/Go), [`frontend/CODEMAP.md`](frontend/CODEMAP.md) (Frontend/JS)
|
||||
- Face Detection & Embeddings Notes: [`internal/ai/face/README.md`](internal/ai/face/README.md)
|
||||
- Vision Engine Guides: [`internal/ai/vision/openai/README.md`](internal/ai/vision/openai/README.md), [`internal/ai/vision/ollama/README.md`](internal/ai/vision/ollama/README.md)
|
||||
|
||||
> Quick Tip: to inspect GitHub issue details without leaving the terminal, run `curl -s https://api.github.com/repos/photoprism/photoprism/issues/<id>`.
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
PhotoPrism — Backend CODEMAP
|
||||
|
||||
**Last Updated:** November 2, 2025
|
||||
**Last Updated:** November 14, 2025
|
||||
|
||||
Purpose
|
||||
- Give agents and contributors a fast, reliable map of where things live and how they fit together, so you can add features, fix bugs, and write tests without spelunking.
|
||||
|
|
@ -35,6 +35,7 @@ High-Level Package Map (Go)
|
|||
- `internal/config` — configuration, flags/env/options, client config, DB init/migrate
|
||||
- `internal/entity` — GORM v1 models, queries, search helpers, migrations
|
||||
- `internal/photoprism` — core domain logic (indexing, import, faces, thumbnails, cleanup)
|
||||
- `internal/ai/vision` — multi-engine computer vision pipeline (models, adapters, schema). Adapter docs: [`internal/ai/vision/openai/README.md`](internal/ai/vision/openai/README.md) and [`internal/ai/vision/ollama/README.md`](internal/ai/vision/ollama/README.md).
|
||||
- `internal/workers` — background schedulers (index, vision, sync, meta, backup)
|
||||
- `internal/auth` — ACL, sessions, OIDC
|
||||
- `internal/service` — cluster/portal, maps, hub, webdav
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# Ubuntu 25.10 (Questing Quokka)
|
||||
FROM photoprism/develop:251018-questing
|
||||
FROM photoprism/develop:251113-questing
|
||||
|
||||
# Harden npm usage by default (applies to npm ci / install in dev container)
|
||||
ENV NPM_CONFIG_IGNORE_SCRIPTS=true
|
||||
|
|
|
|||
|
|
@ -388,7 +388,8 @@ services:
|
|||
## Login with "user / photoprism" and "admin / photoprism".
|
||||
keycloak:
|
||||
image: quay.io/keycloak/keycloak:25.0
|
||||
stop_grace_period: 30s
|
||||
stop_grace_period: 20s
|
||||
profiles: [ "all", "auth", "keycloak" ]
|
||||
command: "start-dev" # development mode, do not use this in production!
|
||||
links:
|
||||
- "traefik:localssl.dev"
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ msgid ""
|
|||
msgstr ""
|
||||
"Project-Id-Version: \n"
|
||||
"Report-Msgid-Bugs-To: ci@photoprism.app\n"
|
||||
"PO-Revision-Date: 2025-11-11 22:02+0000\n"
|
||||
"PO-Revision-Date: 2025-11-14 22:02+0000\n"
|
||||
"Last-Translator: dtsolakis <dtsola@eranet.gr>\n"
|
||||
"Language: el\n"
|
||||
"Content-Type: text/plain; charset=UTF-8\n"
|
||||
|
|
@ -88,7 +88,7 @@ msgstr "12 ώρες"
|
|||
#: src/component/user/edit/dialog.vue:304 src/page/settings/account.vue:168
|
||||
#: src/component/settings/passcode.vue:25
|
||||
msgid "2-Factor Authentication"
|
||||
msgstr "Αυθεντικοποίηση 2 παραγόντων"
|
||||
msgstr "Αυθεντικοποίηση 2 Παραγόντων"
|
||||
|
||||
#: src/component/user/edit/dialog.vue:303 src/options/auth.js:47
|
||||
msgid "2FA"
|
||||
|
|
@ -246,7 +246,7 @@ msgstr "Προστέθηκε"
|
|||
#: src/component/location/dialog.vue:21 src/component/location/dialog.vue:26
|
||||
#: src/component/photo/edit/details.vue:158
|
||||
msgid "Adjust Location"
|
||||
msgstr "Ρύθμιση τοποθεσίας"
|
||||
msgstr "Ρύθμιση Τοποθεσίας"
|
||||
|
||||
#: src/options/admin.js:6 src/common/util.js:798 src/options/auth.js:6
|
||||
msgid "Admin"
|
||||
|
|
@ -254,7 +254,7 @@ msgstr "Διαχειριστής"
|
|||
|
||||
#: src/page/settings.vue:86
|
||||
msgid "Advanced"
|
||||
msgstr "Σύνθετο"
|
||||
msgstr "Προηγμένες Ρυθμίσεις"
|
||||
|
||||
#: src/options/options.js:378
|
||||
msgid "After 1 day"
|
||||
|
|
@ -298,7 +298,7 @@ msgstr "Άλμπουμ"
|
|||
|
||||
#: src/page/settings/advanced.vue:193
|
||||
msgid "Album Backups"
|
||||
msgstr "Αντίγραφα ασφαλείας άλμπουμ"
|
||||
msgstr "Αντίγραφα Ασφαλείας Άλμπουμ"
|
||||
|
||||
#: src/page/albums.vue:1265
|
||||
msgid "Album created"
|
||||
|
|
@ -374,7 +374,7 @@ msgstr "Όλα τα πρωτότυπα"
|
|||
|
||||
#: src/component/photo/toolbar.vue:381 src/page/albums.vue:455
|
||||
msgid "All Years"
|
||||
msgstr "Όλα τα έτη"
|
||||
msgstr "Όλα τα Χρόνια"
|
||||
|
||||
#: src/component/share/dialog.vue:134
|
||||
msgid "Alternatively, you can upload files directly to WebDAV servers like Nextcloud."
|
||||
|
|
@ -440,7 +440,7 @@ msgstr "Έγκριση και αποθήκευση αλλαγών"
|
|||
|
||||
#: src/page/settings/account.vue:183 src/component/settings/apps.vue:25
|
||||
msgid "Apps and Devices"
|
||||
msgstr "Εφαρμογές και συσκευές"
|
||||
msgstr "Εφαρμογές και Συσκευές"
|
||||
|
||||
#: src/component/lightbox.vue:2315 src/component/photo/edit/info.vue:238
|
||||
#: src/component/photo/edit/info.vue:239
|
||||
|
|
@ -558,7 +558,7 @@ msgstr "Βιογραφικό"
|
|||
|
||||
#: src/page/settings/account.vue:212
|
||||
msgid "Birth Date"
|
||||
msgstr "Ημερομηνία γέννησης"
|
||||
msgstr "Ημερομηνία Γέννησης"
|
||||
|
||||
#: src/options/options.js:403
|
||||
msgid "Black"
|
||||
|
|
@ -697,7 +697,7 @@ msgstr "Αλλαγή Άβαταρ"
|
|||
|
||||
#: src/page/settings/account.vue:155 src/component/settings/password.vue:16
|
||||
msgid "Change Password"
|
||||
msgstr "Νέος κωδικός πρόσβασης"
|
||||
msgstr "Αλλαγή Κωδικού Πρόσβασης"
|
||||
|
||||
#: src/page/settings/general.vue:328
|
||||
msgid "Change personal profile and security settings."
|
||||
|
|
@ -733,7 +733,7 @@ msgstr "Οι αλλαγές αποθηκεύτηκαν επιτυχώς"
|
|||
|
||||
#: src/page/settings/advanced.vue:16
|
||||
msgid "Changes to the advanced settings require a restart to take effect."
|
||||
msgstr "Οι αλλαγές στις ρυθμίσεις για προχωρημένους απαιτούν επανεκκίνηση για να τεθούν σε ισχύ."
|
||||
msgstr "Οι αλλαγές στις προηγμένες ρυθμίσεις απαιτούν επανεκκίνηση για να τεθούν σε ισχύ."
|
||||
|
||||
#: src/component/photo/edit/info.vue:230 src/component/photo/edit/info.vue:231
|
||||
msgid "Checked"
|
||||
|
|
@ -988,7 +988,7 @@ msgstr "Βάση δεδομένων"
|
|||
|
||||
#: src/page/settings/advanced.vue:177
|
||||
msgid "Database Backups"
|
||||
msgstr "Αντίγραφα ασφαλείας βάσης δεδομένων"
|
||||
msgstr "Αντίγραφα Ασφαλείας Βάσης Δεδομένων"
|
||||
|
||||
#: src/locales.js:328
|
||||
msgid "Databases"
|
||||
|
|
@ -1066,7 +1066,7 @@ msgstr "Διαστάσεις"
|
|||
|
||||
#: src/page/settings/advanced.vue:84
|
||||
msgid "Disable Backups"
|
||||
msgstr "Απενεργοποίηση αντιγράφων ασφαλείας"
|
||||
msgstr "Απενεργοποίηση Αντιγράφων Ασφαλείας"
|
||||
|
||||
#: src/page/settings/advanced.vue:366
|
||||
msgid "Disable Darktable"
|
||||
|
|
@ -1094,7 +1094,7 @@ msgstr "Απενεργοποίηση των διαδραστικών παγκό
|
|||
|
||||
#: src/page/settings/advanced.vue:116
|
||||
msgid "Disable Places"
|
||||
msgstr "Απενεργοποίηση Places"
|
||||
msgstr "Απενεργοποίηση Τοποθεσιών"
|
||||
|
||||
#: src/page/settings/advanced.vue:382
|
||||
msgid "Disable RawTherapee"
|
||||
|
|
@ -1106,7 +1106,7 @@ msgstr "Απενεργοποίηση TensorFlow"
|
|||
|
||||
#: src/page/settings/advanced.vue:446
|
||||
msgid "Disable Vectors"
|
||||
msgstr "Απενεργοποίηση διανυσμάτων"
|
||||
msgstr "Απενεργοποίηση Διανυσμάτων"
|
||||
|
||||
#: src/page/settings/advanced.vue:100
|
||||
msgid "Disable WebDAV"
|
||||
|
|
@ -1142,7 +1142,7 @@ msgstr "Απόρριψη"
|
|||
#: src/page/admin/users.vue:267 src/page/settings/account.vue:76
|
||||
#: src/page/settings/account.vue:78 src/locales.js:321
|
||||
msgid "Display Name"
|
||||
msgstr "Εμφανιζόμενο όνομα"
|
||||
msgstr "Εμφανιζόμενο Όνομα"
|
||||
|
||||
#: src/page/settings/content.vue:170
|
||||
msgid "Display picture captions in search results."
|
||||
|
|
@ -1247,11 +1247,11 @@ msgstr "Διάρκεια"
|
|||
|
||||
#: src/page/settings/advanced.vue:285
|
||||
msgid "Dynamic Previews"
|
||||
msgstr "Δυναμικές προεπισκοπήσεις"
|
||||
msgstr "Δυναμικές Προεπισκοπήσεις"
|
||||
|
||||
#: src/page/settings/advanced.vue:261
|
||||
msgid "Dynamic Size Limit: %{n}px"
|
||||
msgstr "Όριο δυναμικού μεγέθους: %{n}px"
|
||||
msgstr "Όριο Δυναμικού Μεγέθους: %{n}px"
|
||||
|
||||
#: src/page/about/feedback.vue:80 src/page/about/feedback.vue:79
|
||||
msgid "E-Mail"
|
||||
|
|
@ -1369,7 +1369,7 @@ msgstr "Η εξαγωγή μεταδεδομένων με το ExifTool απαι
|
|||
|
||||
#: src/page/settings/advanced.vue:52
|
||||
msgid "Experimental Features"
|
||||
msgstr "Πειραματικά Χαρακτηριστικά"
|
||||
msgstr "Πειραματικές Λειτουργίες"
|
||||
|
||||
#: src/page/admin/sessions.vue:203 src/page/admin/sessions.vue:296
|
||||
#: src/component/service/edit.vue:69 src/component/settings/apps.vue:160
|
||||
|
|
@ -1416,7 +1416,7 @@ msgstr "Επίθετο"
|
|||
|
||||
#: src/options/options.js:222
|
||||
msgid "Fast"
|
||||
msgstr "Γρήγορα"
|
||||
msgstr "Γρήγορο"
|
||||
|
||||
#: src/component/album/edit/dialog.vue:91
|
||||
#: src/component/label/edit/dialog.vue:44
|
||||
|
|
@ -1457,7 +1457,7 @@ msgstr "Πρόγραμμα περιήγησης αρχείων"
|
|||
|
||||
#: src/page/settings/advanced.vue:354
|
||||
msgid "File Conversion"
|
||||
msgstr "Μετατροπή αρχείου"
|
||||
msgstr "Μετατροπή Αρχείων"
|
||||
|
||||
#: src/component/album/edit/dialog.vue:147 src/component/photo/toolbar.vue:424
|
||||
#: src/component/photo/toolbar.vue:435 src/component/photo/toolbar.vue:446
|
||||
|
|
@ -1537,7 +1537,7 @@ msgstr "Πλήρης πρόσβαση"
|
|||
|
||||
#: src/component/lightbox.vue:1264 src/component/lightbox.vue:1265
|
||||
msgid "Fullscreen"
|
||||
msgstr "Πλήρης οθόνη"
|
||||
msgstr "Πλήρης Οθόνη"
|
||||
|
||||
#: src/page/settings.vue:60
|
||||
msgid "General"
|
||||
|
|
@ -1655,7 +1655,7 @@ msgstr "Εικόνα"
|
|||
|
||||
#: src/page/settings/advanced.vue:301
|
||||
msgid "Image Quality"
|
||||
msgstr "Ποιότητα εικόνας"
|
||||
msgstr "Ποιότητα Εικόνας"
|
||||
|
||||
#: src/page/library.vue:74 src/page/library/import.vue:44
|
||||
#: src/page/library/import.vue:45 src/page/library/import.vue:73
|
||||
|
|
@ -1809,7 +1809,7 @@ msgstr "Ποιότητα JPEG: %{n}"
|
|||
|
||||
#: src/page/settings/advanced.vue:323
|
||||
msgid "JPEG Size Limit: %{n}px"
|
||||
msgstr "Όριο μεγέθους JPEG: %{n}px"
|
||||
msgstr "Όριο Μεγέθους JPEG: %{n}px"
|
||||
|
||||
#: src/page/library/import.vue:58
|
||||
msgid "JPEGs and thumbnails are automatically rendered as needed."
|
||||
|
|
@ -1856,7 +1856,7 @@ msgstr "Τελευταία φορά ενεργός"
|
|||
|
||||
#: src/page/admin/users.vue:276 src/locales.js:335
|
||||
msgid "Last Login"
|
||||
msgstr "Τελευταία σύνδεση"
|
||||
msgstr "Τελευταία Σύνδεση"
|
||||
|
||||
#: src/locales.js:235 src/locales.js:293
|
||||
msgid "Last page"
|
||||
|
|
@ -1938,7 +1938,7 @@ msgstr "Λίστα"
|
|||
|
||||
#: src/page/settings/content.vue:141
|
||||
msgid "List View"
|
||||
msgstr "Προβολή λίστας"
|
||||
msgstr "Προβολή σε Λίστα"
|
||||
|
||||
#: src/component/photo/view/cards.vue:139
|
||||
#: src/component/photo/view/cards.vue:280 src/component/photo/view/list.vue:94
|
||||
|
|
@ -1951,7 +1951,7 @@ msgstr "Ζωντανό"
|
|||
#: src/component/navigation.vue:222 src/component/navigation.vue:237
|
||||
#: src/component/navigation.vue:331
|
||||
msgid "Live Photos"
|
||||
msgstr "Φωτογραφίες"
|
||||
msgstr "Ζωντανές Εικόνες"
|
||||
|
||||
#: src/locales.js:307
|
||||
msgid "Load more"
|
||||
|
|
@ -2188,7 +2188,7 @@ msgstr "Νέος κωδικός πρόσβασης"
|
|||
#: src/component/photo/toolbar.vue:431 src/component/photo/toolbar.vue:441
|
||||
#: src/page/albums.vue:462
|
||||
msgid "Newest First"
|
||||
msgstr "Το νεότερο πρώτα"
|
||||
msgstr "Πρώτα τα πιο Πρόσφατα"
|
||||
|
||||
#: src/component/lightbox.vue:412 src/locales.js:297
|
||||
msgid "Next"
|
||||
|
|
@ -2285,7 +2285,7 @@ msgstr "Οι μη φωτογραφικές εικόνες και οι εικόν
|
|||
#: src/options/admin.js:51 src/options/auth.js:33 src/options/options.js:218
|
||||
#: src/options/options.js:334
|
||||
msgid "None"
|
||||
msgstr "Κανένα"
|
||||
msgstr "Καθόλου"
|
||||
|
||||
#: src/component/lightbox.vue:786 src/component/service/upload.vue:159
|
||||
#: src/component/service/upload.vue:171
|
||||
|
|
@ -2345,7 +2345,7 @@ msgstr "OK"
|
|||
#: src/component/photo/toolbar.vue:432 src/component/photo/toolbar.vue:442
|
||||
#: src/page/albums.vue:463
|
||||
msgid "Oldest First"
|
||||
msgstr "Ο παλαιότερος πρώτος"
|
||||
msgstr "Πρώτα τα πιο Παλιά"
|
||||
|
||||
#: src/component/settings/webdav.vue:17 src/component/settings/webdav.vue:18
|
||||
#: src/component/settings/webdav.vue:27 src/component/settings/webdav.vue:38
|
||||
|
|
@ -2608,7 +2608,7 @@ msgstr "ΜΜ"
|
|||
|
||||
#: src/page/settings/advanced.vue:338
|
||||
msgid "PNG Size Limit: %{n}px"
|
||||
msgstr "Όριο μεγέθους PNG: %{n}px"
|
||||
msgstr "Όριο Μεγέθους PNG: %{n}px"
|
||||
|
||||
#: src/locales.js:323
|
||||
msgid "Portal"
|
||||
|
|
@ -2642,7 +2642,7 @@ msgstr "Προεπισκόπηση"
|
|||
|
||||
#: src/page/settings/advanced.vue:222
|
||||
msgid "Preview Images"
|
||||
msgstr "Εικόνες προεπισκόπισης"
|
||||
msgstr "Εικόνες Προεπισκόπισης"
|
||||
|
||||
#: src/component/lightbox.vue:411 src/locales.js:298
|
||||
msgid "Previous"
|
||||
|
|
@ -2719,13 +2719,13 @@ msgstr "Δημιουργία ευρετηρίου όλων των πρωτοτύ
|
|||
|
||||
#: src/page/settings/advanced.vue:68
|
||||
msgid "Read-Only Mode"
|
||||
msgstr "Λειτουργία μόνο για ανάγνωση"
|
||||
msgstr "Λειτουργία Μόνο Ανάγνωσης"
|
||||
|
||||
#: src/component/album/edit/dialog.vue:145 src/component/photo/toolbar.vue:421
|
||||
#: src/component/photo/toolbar.vue:433 src/component/photo/toolbar.vue:443
|
||||
#: src/page/albums.vue:464
|
||||
msgid "Recently Added"
|
||||
msgstr "Πρόσφατα προστέθηκε"
|
||||
msgstr "Πρόσφατες Προσθήκες"
|
||||
|
||||
#: src/component/photo/toolbar.vue:422
|
||||
msgid "Recently Archived"
|
||||
|
|
@ -2742,7 +2742,7 @@ msgstr "Η αναγνώριση ξεκινά μετά την ολοκλήρωσ
|
|||
|
||||
#: src/page/settings/general.vue:88
|
||||
msgid "Recognize faces so people can be assigned and found."
|
||||
msgstr "Αναγνωρίζει πρόσωπα ώστε να μπορούν να βρεθούν συγκεκριμένα άτομα."
|
||||
msgstr "Αναγνώριση προσώπων ώστε να μπορούν να ορίζονται και να εντοπίζονται συγκεκριμένα άτομα."
|
||||
|
||||
#: src/page/people.vue:61
|
||||
msgid "Recognized"
|
||||
|
|
@ -3039,7 +3039,7 @@ msgstr "URL υπηρεσίας"
|
|||
#: src/locales.js:359 src/page/settings.vue:99
|
||||
#: src/page/settings/general.vue:267
|
||||
msgid "Services"
|
||||
msgstr "URL υπηρεσίας"
|
||||
msgstr "Υπηρεσίες"
|
||||
|
||||
#: src/locales.js:342 src/model/session.js:83 src/options/auth.js:42
|
||||
#: src/options/auth.js:91
|
||||
|
|
@ -3114,7 +3114,7 @@ msgstr "Εμφάνιση όλων των νέων προσώπων"
|
|||
|
||||
#: src/page/settings/content.vue:169
|
||||
msgid "Show Captions"
|
||||
msgstr "Εμφάνιση λεζάντων"
|
||||
msgstr "Εμφάνιση Λεζάντων"
|
||||
|
||||
#: src/page/people/new.vue:12 src/page/people/recognized.vue:45
|
||||
msgid "Show hidden"
|
||||
|
|
@ -3130,7 +3130,7 @@ msgstr "Εμφάνιση των αρχείων καταγραφής του δι
|
|||
|
||||
#: src/page/settings/content.vue:155
|
||||
msgid "Show Titles"
|
||||
msgstr "Εμφάνιση τίτλων"
|
||||
msgstr "Εμφάνιση Τίτλων"
|
||||
|
||||
#: src/model/file.js:190 src/page/settings/content.vue:221
|
||||
msgid "Sidecar"
|
||||
|
|
@ -3164,7 +3164,7 @@ msgstr "Μέγεθος"
|
|||
|
||||
#: src/component/lightbox.vue:1247 src/component/lightbox.vue:1248
|
||||
msgid "Slideshow"
|
||||
msgstr "Παρουσίαση διαφανειών"
|
||||
msgstr "Παρουσίαση"
|
||||
|
||||
#: src/options/options.js:230
|
||||
msgid "Slow"
|
||||
|
|
@ -3266,7 +3266,7 @@ msgstr "Σελίδα έναρξης"
|
|||
|
||||
#: src/page/settings/advanced.vue:244
|
||||
msgid "Static Size Limit: %{n}px"
|
||||
msgstr "Όριο στατικού μεγέθους: %{n}px"
|
||||
msgstr "Όριο Στατικού Μεγέθους: %{n}px"
|
||||
|
||||
#: src/component/photo/edit/files.vue:463
|
||||
msgid "Status"
|
||||
|
|
@ -3339,7 +3339,7 @@ msgstr "Γαλαζοπράσινο"
|
|||
|
||||
#: src/page/settings/advanced.vue:150
|
||||
msgid "TensorFlow is required for image classification, facial recognition, and detecting unsafe content."
|
||||
msgstr "Το TensorFlow απαιτείται για την ταξινόμηση εικόνων, την αναγνώριση προσώπου και την ανίχνευση μη ασφαλούς περιεχομένου."
|
||||
msgstr "Το TensorFlow απαιτείται για την ταξινόμηση εικόνων, την αναγνώριση προσώπων και την ανίχνευση μη ασφαλούς περιεχομένου."
|
||||
|
||||
#: src/options/options.js:267
|
||||
msgid "Terrain"
|
||||
|
|
@ -3443,7 +3443,7 @@ msgstr "Σήμερα"
|
|||
|
||||
#: src/component/album/toolbar.vue:28 src/component/photo/toolbar.vue:55
|
||||
msgid "Toggle View"
|
||||
msgstr "Εναλλαγή προβολής"
|
||||
msgstr "Εναλλαγή Προβολής"
|
||||
|
||||
#: src/component/share/dialog.vue:89
|
||||
msgid "Token"
|
||||
|
|
@ -3606,7 +3606,7 @@ msgstr "Διαδρομή φόρτωσης"
|
|||
|
||||
#: src/page/settings/general.vue:209
|
||||
msgid "Upload to WebDAV and share links with friends."
|
||||
msgstr "Ανεβάστε σε WebDAV και μοιραστείτε συνδέσμους με φίλους."
|
||||
msgstr "Ανεβάστε στο WebDAV και μοιραστείτε συνδέσμους με φίλους."
|
||||
|
||||
#: src/component/upload/dialog.vue:40
|
||||
msgid "Uploading %{n} of %{t}…"
|
||||
|
|
@ -3658,7 +3658,7 @@ msgstr "Διεπαφή χρήστη"
|
|||
#: src/component/service/add.vue:38 src/component/service/edit.vue:172
|
||||
#: src/component/share/dialog.vue:174 src/locales.js:320
|
||||
msgid "Username"
|
||||
msgstr "Όνομα χρήστη"
|
||||
msgstr "Όνομα Χρήστη"
|
||||
|
||||
#: src/component/navigation.vue:400 src/component/navigation.vue:401
|
||||
#: src/component/navigation.vue:425 src/component/navigation.vue:431
|
||||
|
|
|
|||
102
frontend/tests/vitest/component/lightbox.basic.test.js
Normal file
102
frontend/tests/vitest/component/lightbox.basic.test.js
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
import { mount, config as VTUConfig } from "@vue/test-utils";
|
||||
import { describe, it, expect, beforeEach } from "vitest";
|
||||
import { nextTick } from "vue";
|
||||
import PLightbox from "component/lightbox.vue";
|
||||
|
||||
const mountLightbox = () =>
|
||||
mount(PLightbox, {
|
||||
global: {
|
||||
stubs: {
|
||||
"v-dialog": true,
|
||||
"v-icon": true,
|
||||
"v-slider": true,
|
||||
"p-lightbox-menu": true,
|
||||
"p-sidebar-info": true,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
describe("PLightbox (low-mock, jsdom-friendly)", () => {
|
||||
beforeEach(() => {
|
||||
localStorage.removeItem("lightbox.info");
|
||||
sessionStorage.removeItem("lightbox.muted");
|
||||
});
|
||||
|
||||
it("toggleInfo updates info and localStorage when visible", async () => {
|
||||
const wrapper = mountLightbox();
|
||||
await wrapper.setData({ visible: true });
|
||||
|
||||
// Use exposed onShortCut to trigger info toggle (KeyI)
|
||||
await wrapper.vm.onShortCut({ code: "KeyI" });
|
||||
await nextTick();
|
||||
expect(localStorage.getItem("lightbox.info")).toBe("true");
|
||||
|
||||
await wrapper.vm.onShortCut({ code: "KeyI" });
|
||||
await nextTick();
|
||||
expect(localStorage.getItem("lightbox.info")).toBe("false");
|
||||
});
|
||||
|
||||
it("toggleMute writes sessionStorage without requiring video or exposed state", async () => {
|
||||
const wrapper = mountLightbox();
|
||||
expect(sessionStorage.getItem("lightbox.muted")).toBeNull();
|
||||
await wrapper.vm.onShortCut({ code: "KeyM" });
|
||||
expect(sessionStorage.getItem("lightbox.muted")).toBe("true");
|
||||
await wrapper.vm.onShortCut({ code: "KeyM" });
|
||||
expect(sessionStorage.getItem("lightbox.muted")).toBe("false");
|
||||
});
|
||||
|
||||
it("getPadding returns expected structure for large and small screens", async () => {
|
||||
const wrapper = mountLightbox();
|
||||
// Large viewport
|
||||
const large = wrapper.vm.$options.methods.getPadding.call(
|
||||
wrapper.vm,
|
||||
{ x: 1200, y: 800 },
|
||||
{ width: 4000, height: 3000 }
|
||||
);
|
||||
expect(large).toHaveProperty("top");
|
||||
expect(large).toHaveProperty("bottom");
|
||||
expect(large).toHaveProperty("left");
|
||||
expect(large).toHaveProperty("right");
|
||||
|
||||
// Small viewport (<= mobileBreakpoint) should yield zeros
|
||||
const small = wrapper.vm.$options.methods.getPadding.call(
|
||||
wrapper.vm,
|
||||
{ x: 360, y: 640 },
|
||||
{ width: 1200, height: 800 }
|
||||
);
|
||||
expect(small).toEqual({ top: 0, bottom: 0, left: 0, right: 0 });
|
||||
});
|
||||
|
||||
it("KeyI is ignored when dialog is not visible", async () => {
|
||||
const wrapper = mountLightbox();
|
||||
expect(localStorage.getItem("lightbox.info")).toBeNull();
|
||||
await wrapper.vm.onShortCut({ code: "KeyI" });
|
||||
expect(localStorage.getItem("lightbox.info")).toBeNull();
|
||||
});
|
||||
|
||||
it("getViewport falls back to window size without content ref", () => {
|
||||
const wrapper = mountLightbox();
|
||||
const vp = wrapper.vm.$options.methods.getViewport.call(wrapper.vm);
|
||||
expect(vp.x).toBeGreaterThan(0);
|
||||
expect(vp.y).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("menuActions marks Download action visible when allowed", () => {
|
||||
const wrapper = mountLightbox();
|
||||
const ctx = {
|
||||
$gettext: VTUConfig.global.mocks.$gettext,
|
||||
$pgettext: VTUConfig.global.mocks.$pgettext,
|
||||
// minimal state needed by menuActions visibility checks
|
||||
canManageAlbums: false,
|
||||
canArchive: false,
|
||||
canDownload: true,
|
||||
collection: null,
|
||||
context: "",
|
||||
model: {},
|
||||
};
|
||||
const actions = wrapper.vm.$options.methods.menuActions.call(ctx);
|
||||
const download = actions.find((a) => a?.name === "download");
|
||||
expect(download).toBeTruthy();
|
||||
expect(download.visible).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
@ -38,10 +38,48 @@ if (typeof global.ResizeObserver === "undefined") {
|
|||
// Configure Vue Test Utils global configuration
|
||||
config.global.mocks = {
|
||||
$gettext: (text) => text,
|
||||
$pgettext: (_ctx, text) => text,
|
||||
$isRtl: false,
|
||||
$config: {
|
||||
feature: (_name) => true,
|
||||
feature: () => true,
|
||||
get: () => false,
|
||||
getSettings: () => ({ features: { edit: true, favorites: true, download: true, archive: true } }),
|
||||
allow: () => true,
|
||||
featExperimental: () => false,
|
||||
featDevelop: () => false,
|
||||
values: {},
|
||||
dir: () => "ltr",
|
||||
},
|
||||
$event: {
|
||||
subscribe: () => "sub-id",
|
||||
subscribeOnce: () => "sub-id-once",
|
||||
unsubscribe: () => {},
|
||||
publish: () => {},
|
||||
},
|
||||
$view: {
|
||||
enter: () => {},
|
||||
leave: () => {},
|
||||
isActive: () => true,
|
||||
},
|
||||
$notify: { success: () => {}, error: () => {}, warn: () => {} },
|
||||
$fullscreen: {
|
||||
isSupported: () => true,
|
||||
isEnabled: () => false,
|
||||
request: () => Promise.resolve(),
|
||||
exit: () => Promise.resolve(),
|
||||
},
|
||||
$clipboard: { selection: [], has: () => false, toggle: () => {} },
|
||||
$util: {
|
||||
hasTouch: () => false,
|
||||
encodeHTML: (s) => s,
|
||||
sanitizeHtml: (s) => s,
|
||||
formatSeconds: (n) => String(n),
|
||||
formatRemainingSeconds: () => "0",
|
||||
videoFormat: () => "avc",
|
||||
videoFormatUrl: () => "/v.mp4",
|
||||
thumb: () => ({ src: "/t.jpg", w: 100, h: 100 }),
|
||||
},
|
||||
$api: { post: vi.fn(), delete: vi.fn(), get: vi.fn() },
|
||||
};
|
||||
|
||||
config.global.plugins = [vuetify];
|
||||
|
|
|
|||
|
|
@ -9,6 +9,9 @@ import (
|
|||
"io"
|
||||
"net/http"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/photoprism/photoprism/internal/ai/vision/ollama"
|
||||
"github.com/photoprism/photoprism/pkg/clean"
|
||||
"github.com/photoprism/photoprism/pkg/http/header"
|
||||
)
|
||||
|
|
@ -69,6 +72,10 @@ func PerformApiRequest(apiRequest *ApiRequest, uri, method, key string) (apiResp
|
|||
return nil, parseErr
|
||||
}
|
||||
|
||||
if log.IsLevelEnabled(logrus.TraceLevel) {
|
||||
log.Tracef("vision: response %s", string(body))
|
||||
}
|
||||
|
||||
return parsed, nil
|
||||
}
|
||||
|
||||
|
|
@ -89,12 +96,12 @@ func PerformApiRequest(apiRequest *ApiRequest, uri, method, key string) (apiResp
|
|||
return apiResponse, nil
|
||||
}
|
||||
|
||||
func decodeOllamaResponse(data []byte) (*ApiResponseOllama, error) {
|
||||
resp := &ApiResponseOllama{}
|
||||
func decodeOllamaResponse(data []byte) (*ollama.Response, error) {
|
||||
resp := &ollama.Response{}
|
||||
dec := json.NewDecoder(bytes.NewReader(data))
|
||||
|
||||
for {
|
||||
var chunk ApiResponseOllama
|
||||
var chunk ollama.Response
|
||||
if err := dec.Decode(&chunk); err != nil {
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import (
|
|||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/photoprism/photoprism/internal/ai/vision/ollama"
|
||||
"github.com/photoprism/photoprism/pkg/http/scheme"
|
||||
)
|
||||
|
||||
|
|
@ -49,7 +50,7 @@ func TestPerformApiRequestOllama(t *testing.T) {
|
|||
var req ApiRequest
|
||||
assert.NoError(t, json.NewDecoder(r.Body).Decode(&req))
|
||||
assert.Equal(t, FormatJSON, req.Format)
|
||||
assert.NoError(t, json.NewEncoder(w).Encode(ApiResponseOllama{
|
||||
assert.NoError(t, json.NewEncoder(w).Encode(ollama.Response{
|
||||
Model: "qwen2.5vl:latest",
|
||||
Response: `{"labels":[{"name":"test","confidence":0.9,"topicality":0.8}]}`,
|
||||
}))
|
||||
|
|
@ -72,7 +73,7 @@ func TestPerformApiRequestOllama(t *testing.T) {
|
|||
})
|
||||
t.Run("LabelsWithCodeFence", func(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
assert.NoError(t, json.NewEncoder(w).Encode(ApiResponseOllama{
|
||||
assert.NoError(t, json.NewEncoder(w).Encode(ollama.Response{
|
||||
Model: "gemma3:latest",
|
||||
Response: "```json\n{\"labels\":[{\"name\":\"lingerie\",\"confidence\":0.81,\"topicality\":0.73}]}\n```\nThe model provided additional commentary.",
|
||||
}))
|
||||
|
|
@ -95,7 +96,7 @@ func TestPerformApiRequestOllama(t *testing.T) {
|
|||
})
|
||||
t.Run("CaptionFallback", func(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
assert.NoError(t, json.NewEncoder(w).Encode(ApiResponseOllama{
|
||||
assert.NoError(t, json.NewEncoder(w).Encode(ollama.Response{
|
||||
Model: "qwen2.5vl:latest",
|
||||
Response: "plain text",
|
||||
}))
|
||||
|
|
|
|||
|
|
@ -1,10 +1,8 @@
|
|||
package vision
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/photoprism/photoprism/pkg/clean"
|
||||
"github.com/photoprism/photoprism/pkg/http/scheme"
|
||||
|
|
@ -12,53 +10,6 @@ import (
|
|||
"github.com/photoprism/photoprism/pkg/rnd"
|
||||
)
|
||||
|
||||
// ApiResponseOllama represents a Ollama API service response.
|
||||
type ApiResponseOllama struct {
|
||||
Id string `yaml:"Id,omitempty" json:"id,omitempty"`
|
||||
Code int `yaml:"Code,omitempty" json:"code,omitempty"`
|
||||
Error string `yaml:"Error,omitempty" json:"error,omitempty"`
|
||||
Model string `yaml:"Model,omitempty" json:"model,omitempty"`
|
||||
CreatedAt time.Time `yaml:"CreatedAt,omitempty" json:"created_at,omitempty"`
|
||||
Response string `yaml:"Response,omitempty" json:"response,omitempty"`
|
||||
Done bool `yaml:"Done,omitempty" json:"done,omitempty"`
|
||||
Context []int `yaml:"Context,omitempty" json:"context,omitempty"`
|
||||
TotalDuration int64 `yaml:"TotalDuration,omitempty" json:"total_duration,omitempty"`
|
||||
LoadDuration int `yaml:"LoadDuration,omitempty" json:"load_duration,omitempty"`
|
||||
PromptEvalCount int `yaml:"PromptEvalCount,omitempty" json:"prompt_eval_count,omitempty"`
|
||||
PromptEvalDuration int `yaml:"PromptEvalDuration,omitempty" json:"prompt_eval_duration,omitempty"`
|
||||
EvalCount int `yaml:"EvalCount,omitempty" json:"eval_count,omitempty"`
|
||||
EvalDuration int64 `yaml:"EvalDuration,omitempty" json:"eval_duration,omitempty"`
|
||||
Result ApiResult `yaml:"Result,omitempty" json:"result,omitempty"`
|
||||
}
|
||||
|
||||
// Err returns an error if the request has failed.
|
||||
func (r *ApiResponseOllama) Err() error {
|
||||
if r == nil {
|
||||
return errors.New("response is nil")
|
||||
}
|
||||
|
||||
if r.Code >= 400 {
|
||||
if r.Error != "" {
|
||||
return errors.New(r.Error)
|
||||
}
|
||||
|
||||
return fmt.Errorf("error %d", r.Code)
|
||||
} else if r.Result.IsEmpty() {
|
||||
return errors.New("no result")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// HasResult checks if there is at least one result in the response data.
|
||||
func (r *ApiResponseOllama) HasResult() bool {
|
||||
if r == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return !r.Result.IsEmpty()
|
||||
}
|
||||
|
||||
// NewApiRequestOllama returns a new Ollama API request with the specified images as payload.
|
||||
func NewApiRequestOllama(images Files, fileScheme scheme.Type) (*ApiRequest, error) {
|
||||
imagesData := make(Files, len(images))
|
||||
|
|
|
|||
|
|
@ -11,6 +11,8 @@ import (
|
|||
|
||||
"github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/photoprism/photoprism/internal/ai/vision/openai"
|
||||
"github.com/photoprism/photoprism/internal/ai/vision/schema"
|
||||
"github.com/photoprism/photoprism/internal/api/download"
|
||||
"github.com/photoprism/photoprism/pkg/clean"
|
||||
"github.com/photoprism/photoprism/pkg/fs"
|
||||
|
|
@ -58,6 +60,11 @@ type ApiRequestOptions struct {
|
|||
UseMmap bool `yaml:"UseMmap,omitempty" json:"use_mmap,omitempty"`
|
||||
UseMlock bool `yaml:"UseMlock,omitempty" json:"use_mlock,omitempty"`
|
||||
NumThread int `yaml:"NumThread,omitempty" json:"num_thread,omitempty"`
|
||||
MaxOutputTokens int `yaml:"MaxOutputTokens,omitempty" json:"max_output_tokens,omitempty"`
|
||||
Detail string `yaml:"Detail,omitempty" json:"detail,omitempty"`
|
||||
ForceJson bool `yaml:"ForceJson,omitempty" json:"force_json,omitempty"`
|
||||
SchemaVersion string `yaml:"SchemaVersion,omitempty" json:"schema_version,omitempty"`
|
||||
CombineOutputs string `yaml:"CombineOutputs,omitempty" json:"combine_outputs,omitempty"`
|
||||
}
|
||||
|
||||
// ApiRequestContext represents a context parameter returned from a previous request.
|
||||
|
|
@ -77,6 +84,7 @@ type ApiRequest struct {
|
|||
Context *ApiRequestContext `form:"context" yaml:"Context,omitempty" json:"context,omitempty"`
|
||||
Stream bool `form:"stream" yaml:"Stream,omitempty" json:"stream"`
|
||||
Images Files `form:"images" yaml:"Images,omitempty" json:"images,omitempty"`
|
||||
Schema json.RawMessage `form:"schema" yaml:"Schema,omitempty" json:"schema,omitempty"`
|
||||
ResponseFormat ApiFormat `form:"-" yaml:"-" json:"-"`
|
||||
}
|
||||
|
||||
|
|
@ -195,6 +203,14 @@ func (r *ApiRequest) GetResponseFormat() ApiFormat {
|
|||
|
||||
// JSON returns the request data as JSON-encoded bytes.
|
||||
func (r *ApiRequest) JSON() ([]byte, error) {
|
||||
if r == nil {
|
||||
return nil, errors.New("api request is nil")
|
||||
}
|
||||
|
||||
if r.ResponseFormat == ApiFormatOpenAI {
|
||||
return r.openAIJSON()
|
||||
}
|
||||
|
||||
return json.Marshal(*r)
|
||||
}
|
||||
|
||||
|
|
@ -229,6 +245,8 @@ func (r *ApiRequest) sanitizedForLog() ApiRequest {
|
|||
|
||||
sanitized.Url = sanitizeLogPayload(r.Url)
|
||||
|
||||
sanitized.Schema = r.Schema
|
||||
|
||||
return sanitized
|
||||
}
|
||||
|
||||
|
|
@ -287,3 +305,134 @@ func isLikelyBase64(value string) bool {
|
|||
|
||||
return true
|
||||
}
|
||||
|
||||
// openAIJSON converts the request data into an OpenAI Responses API payload.
|
||||
func (r *ApiRequest) openAIJSON() ([]byte, error) {
|
||||
detail := openai.DefaultDetail
|
||||
|
||||
if opts := r.Options; opts != nil && strings.TrimSpace(opts.Detail) != "" {
|
||||
detail = strings.TrimSpace(opts.Detail)
|
||||
}
|
||||
|
||||
messages := make([]openai.InputMessage, 0, 2)
|
||||
|
||||
if system := strings.TrimSpace(r.System); system != "" {
|
||||
messages = append(messages, openai.InputMessage{
|
||||
Role: "system",
|
||||
Type: "message",
|
||||
Content: []openai.ContentItem{
|
||||
{
|
||||
Type: openai.ContentTypeText,
|
||||
Text: system,
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
userContent := make([]openai.ContentItem, 0, len(r.Images)+1)
|
||||
|
||||
if prompt := strings.TrimSpace(r.Prompt); prompt != "" {
|
||||
userContent = append(userContent, openai.ContentItem{
|
||||
Type: openai.ContentTypeText,
|
||||
Text: prompt,
|
||||
})
|
||||
}
|
||||
|
||||
for _, img := range r.Images {
|
||||
if img == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
userContent = append(userContent, openai.ContentItem{
|
||||
Type: openai.ContentTypeImage,
|
||||
ImageURL: img,
|
||||
Detail: detail,
|
||||
})
|
||||
}
|
||||
|
||||
if len(userContent) > 0 {
|
||||
messages = append(messages, openai.InputMessage{
|
||||
Role: "user",
|
||||
Type: "message",
|
||||
Content: userContent,
|
||||
})
|
||||
}
|
||||
|
||||
if len(messages) == 0 {
|
||||
return nil, errors.New("openai request requires at least one message")
|
||||
}
|
||||
|
||||
payload := openai.HTTPRequest{
|
||||
Model: strings.TrimSpace(r.Model),
|
||||
Input: messages,
|
||||
}
|
||||
|
||||
if payload.Model == "" {
|
||||
payload.Model = openai.DefaultModel
|
||||
}
|
||||
|
||||
if strings.HasPrefix(strings.ToLower(payload.Model), "gpt-5") {
|
||||
payload.Reasoning = &openai.Reasoning{Effort: "low"}
|
||||
}
|
||||
|
||||
if opts := r.Options; opts != nil {
|
||||
if opts.MaxOutputTokens > 0 {
|
||||
payload.MaxOutputTokens = opts.MaxOutputTokens
|
||||
}
|
||||
|
||||
if opts.Temperature > 0 {
|
||||
payload.Temperature = opts.Temperature
|
||||
}
|
||||
|
||||
if opts.TopP > 0 {
|
||||
payload.TopP = opts.TopP
|
||||
}
|
||||
|
||||
if opts.PresencePenalty != 0 {
|
||||
payload.PresencePenalty = opts.PresencePenalty
|
||||
}
|
||||
|
||||
if opts.FrequencyPenalty != 0 {
|
||||
payload.FrequencyPenalty = opts.FrequencyPenalty
|
||||
}
|
||||
}
|
||||
|
||||
if format := buildOpenAIResponseFormat(r); format != nil {
|
||||
payload.Text = &openai.TextOptions{
|
||||
Format: format,
|
||||
}
|
||||
}
|
||||
|
||||
return json.Marshal(payload)
|
||||
}
|
||||
|
||||
// buildOpenAIResponseFormat determines which response_format to send to OpenAI.
|
||||
func buildOpenAIResponseFormat(r *ApiRequest) *openai.ResponseFormat {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
opts := r.Options
|
||||
hasSchema := len(r.Schema) > 0
|
||||
|
||||
if !hasSchema && (opts == nil || !opts.ForceJson) {
|
||||
return nil
|
||||
}
|
||||
|
||||
result := &openai.ResponseFormat{}
|
||||
|
||||
if hasSchema {
|
||||
result.Type = openai.ResponseFormatJSONSchema
|
||||
result.Schema = r.Schema
|
||||
|
||||
if opts != nil && strings.TrimSpace(opts.SchemaVersion) != "" {
|
||||
result.Name = strings.TrimSpace(opts.SchemaVersion)
|
||||
} else {
|
||||
result.Name = schema.JsonSchemaName(r.Schema, openai.DefaultSchemaVersion)
|
||||
}
|
||||
} else {
|
||||
result.Type = openai.ResponseFormatJSONObject
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,7 +53,11 @@ func captionInternal(images Files, mediaSrc media.Src) (result *CaptionResult, m
|
|||
|
||||
apiRequest.System = model.GetSystemPrompt()
|
||||
apiRequest.Prompt = model.GetPrompt()
|
||||
apiRequest.Options = model.GetOptions()
|
||||
|
||||
if apiRequest.Options == nil {
|
||||
apiRequest.Options = model.GetOptions()
|
||||
}
|
||||
|
||||
apiRequest.WriteLog()
|
||||
|
||||
if apiResponse, err = PerformApiRequest(apiRequest, uri, method, model.EndpointKey()); err != nil {
|
||||
|
|
|
|||
|
|
@ -58,14 +58,15 @@ func init() {
|
|||
RegisterEngineAlias(EngineVision, EngineInfo{
|
||||
RequestFormat: ApiFormatVision,
|
||||
ResponseFormat: ApiFormatVision,
|
||||
FileScheme: string(scheme.Data),
|
||||
FileScheme: scheme.Data,
|
||||
DefaultResolution: DefaultResolution,
|
||||
})
|
||||
|
||||
RegisterEngineAlias(openai.EngineName, EngineInfo{
|
||||
Uri: "https://api.openai.com/v1/responses",
|
||||
RequestFormat: ApiFormatOpenAI,
|
||||
ResponseFormat: ApiFormatOpenAI,
|
||||
FileScheme: string(scheme.Data),
|
||||
FileScheme: scheme.Data,
|
||||
DefaultResolution: openai.DefaultResolution,
|
||||
})
|
||||
}
|
||||
|
|
@ -79,6 +80,7 @@ func RegisterEngine(format ApiFormat, engine Engine) {
|
|||
|
||||
// EngineInfo describes metadata that can be associated with an engine alias.
|
||||
type EngineInfo struct {
|
||||
Uri string
|
||||
RequestFormat ApiFormat
|
||||
ResponseFormat ApiFormat
|
||||
FileScheme string
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ func init() {
|
|||
RegisterEngineAlias(ollama.EngineName, EngineInfo{
|
||||
RequestFormat: ApiFormatOllama,
|
||||
ResponseFormat: ApiFormatOllama,
|
||||
FileScheme: string(scheme.Base64),
|
||||
FileScheme: scheme.Base64,
|
||||
DefaultResolution: ollama.DefaultResolution,
|
||||
})
|
||||
|
||||
|
|
@ -72,7 +72,7 @@ func (ollamaDefaults) SchemaTemplate(model *Model) string {
|
|||
|
||||
switch model.Type {
|
||||
case ModelTypeLabels:
|
||||
return ollama.LabelsSchema(model.PromptContains("nsfw"))
|
||||
return ollama.SchemaLabels(model.PromptContains("nsfw"))
|
||||
}
|
||||
|
||||
return ""
|
||||
|
|
@ -134,64 +134,99 @@ func (ollamaParser) Parse(ctx context.Context, req *ApiRequest, raw []byte, stat
|
|||
return nil, err
|
||||
}
|
||||
|
||||
result := &ApiResponse{
|
||||
response := &ApiResponse{
|
||||
Id: req.GetId(),
|
||||
Code: status,
|
||||
Model: &Model{Name: ollamaResp.Model},
|
||||
Result: ApiResult{
|
||||
Labels: append([]LabelResult{}, ollamaResp.Result.Labels...),
|
||||
Caption: func() *CaptionResult {
|
||||
if ollamaResp.Result.Caption != nil {
|
||||
copyCaption := *ollamaResp.Result.Caption
|
||||
return ©Caption
|
||||
}
|
||||
return nil
|
||||
}(),
|
||||
Labels: convertOllamaLabels(ollamaResp.Result.Labels),
|
||||
Caption: convertOllamaCaption(ollamaResp.Result.Caption),
|
||||
},
|
||||
}
|
||||
|
||||
parsedLabels := len(result.Result.Labels) > 0
|
||||
parsedLabels := len(response.Result.Labels) > 0
|
||||
|
||||
if !parsedLabels && strings.TrimSpace(ollamaResp.Response) != "" && req.Format == FormatJSON {
|
||||
if labels, parseErr := parseOllamaLabels(ollamaResp.Response); parseErr != nil {
|
||||
log.Debugf("vision: %s (parse ollama labels)", clean.Error(parseErr))
|
||||
// Qwen3-VL models stream their JSON payload in the "Thinking" field.
|
||||
fallbackJSON := strings.TrimSpace(ollamaResp.Response)
|
||||
if fallbackJSON == "" {
|
||||
fallbackJSON = strings.TrimSpace(ollamaResp.Thinking)
|
||||
}
|
||||
|
||||
if !parsedLabels && fallbackJSON != "" && (req.Format == FormatJSON || strings.HasPrefix(fallbackJSON, "{")) {
|
||||
if labels, parseErr := parseOllamaLabels(fallbackJSON); parseErr != nil {
|
||||
log.Warnf("vision: %s (parse ollama labels)", clean.Error(parseErr))
|
||||
} else if len(labels) > 0 {
|
||||
result.Result.Labels = append(result.Result.Labels, labels...)
|
||||
response.Result.Labels = append(response.Result.Labels, labels...)
|
||||
parsedLabels = true
|
||||
}
|
||||
}
|
||||
|
||||
if parsedLabels {
|
||||
filtered := result.Result.Labels[:0]
|
||||
for i := range result.Result.Labels {
|
||||
if result.Result.Labels[i].Confidence <= 0 {
|
||||
result.Result.Labels[i].Confidence = ollama.LabelConfidenceDefault
|
||||
filtered := response.Result.Labels[:0]
|
||||
for i := range response.Result.Labels {
|
||||
if response.Result.Labels[i].Confidence <= 0 {
|
||||
response.Result.Labels[i].Confidence = ollama.LabelConfidenceDefault
|
||||
}
|
||||
|
||||
if result.Result.Labels[i].Topicality <= 0 {
|
||||
result.Result.Labels[i].Topicality = result.Result.Labels[i].Confidence
|
||||
if response.Result.Labels[i].Topicality <= 0 {
|
||||
response.Result.Labels[i].Topicality = response.Result.Labels[i].Confidence
|
||||
}
|
||||
|
||||
// Apply thresholds and canonicalize the name.
|
||||
normalizeLabelResult(&result.Result.Labels[i])
|
||||
normalizeLabelResult(&response.Result.Labels[i])
|
||||
|
||||
if result.Result.Labels[i].Name == "" {
|
||||
if response.Result.Labels[i].Name == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if result.Result.Labels[i].Source == "" {
|
||||
result.Result.Labels[i].Source = entity.SrcOllama
|
||||
if response.Result.Labels[i].Source == "" {
|
||||
response.Result.Labels[i].Source = entity.SrcOllama
|
||||
}
|
||||
|
||||
filtered = append(filtered, result.Result.Labels[i])
|
||||
filtered = append(filtered, response.Result.Labels[i])
|
||||
}
|
||||
result.Result.Labels = filtered
|
||||
response.Result.Labels = filtered
|
||||
} else if caption := strings.TrimSpace(ollamaResp.Response); caption != "" {
|
||||
result.Result.Caption = &CaptionResult{
|
||||
response.Result.Caption = &CaptionResult{
|
||||
Text: caption,
|
||||
Source: entity.SrcOllama,
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
return response, nil
|
||||
}
|
||||
|
||||
func convertOllamaLabels(payload []ollama.LabelPayload) []LabelResult {
|
||||
if len(payload) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
labels := make([]LabelResult, len(payload))
|
||||
|
||||
for i := range payload {
|
||||
labels[i] = LabelResult{
|
||||
Name: payload[i].Name,
|
||||
Source: payload[i].Source,
|
||||
Priority: payload[i].Priority,
|
||||
Confidence: payload[i].Confidence,
|
||||
Topicality: payload[i].Topicality,
|
||||
Categories: payload[i].Categories,
|
||||
NSFW: payload[i].NSFW,
|
||||
NSFWConfidence: payload[i].NSFWConfidence,
|
||||
}
|
||||
}
|
||||
|
||||
return labels
|
||||
}
|
||||
|
||||
func convertOllamaCaption(payload *ollama.CaptionPayload) *CaptionResult {
|
||||
if payload == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &CaptionResult{
|
||||
Text: payload.Text,
|
||||
Source: payload.Source,
|
||||
Confidence: payload.Confidence,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,9 +10,9 @@ import (
|
|||
|
||||
func TestOllamaDefaultConfidenceApplied(t *testing.T) {
|
||||
req := &ApiRequest{Format: FormatJSON}
|
||||
payload := ApiResponseOllama{
|
||||
Result: ApiResult{
|
||||
Labels: []LabelResult{{Name: "forest path", Confidence: 0, Topicality: 0}},
|
||||
payload := ollama.Response{
|
||||
Result: ollama.ResultPayload{
|
||||
Labels: []ollama.LabelPayload{{Name: "forest path", Confidence: 0, Topicality: 0}},
|
||||
},
|
||||
}
|
||||
raw, err := json.Marshal(payload)
|
||||
|
|
@ -37,3 +37,46 @@ func TestOllamaDefaultConfidenceApplied(t *testing.T) {
|
|||
t.Fatalf("expected topicality to default to confidence, got %.2f", resp.Result.Labels[0].Topicality)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOllamaParserFallbacks(t *testing.T) {
|
||||
t.Run("ThinkingFieldJSON", func(t *testing.T) {
|
||||
req := &ApiRequest{Format: FormatJSON}
|
||||
payload := ollama.Response{
|
||||
Thinking: `{"labels":[{"name":"cat","confidence":0.9,"topicality":0.8}]}`,
|
||||
}
|
||||
raw, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal: %v", err)
|
||||
}
|
||||
|
||||
parser := ollamaParser{}
|
||||
resp, err := parser.Parse(context.Background(), req, raw, 200)
|
||||
if err != nil {
|
||||
t.Fatalf("parse failed: %v", err)
|
||||
}
|
||||
|
||||
if len(resp.Result.Labels) != 1 || resp.Result.Labels[0].Name != "Cat" {
|
||||
t.Fatalf("expected cat label, got %+v", resp.Result.Labels)
|
||||
}
|
||||
})
|
||||
t.Run("JsonPrefixedResponse", func(t *testing.T) {
|
||||
req := &ApiRequest{} // no explicit format
|
||||
payload := ollama.Response{
|
||||
Response: `{"labels":[{"name":"cat","confidence":0.91,"topicality":0.81}]}`,
|
||||
}
|
||||
raw, err := json.Marshal(payload)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal: %v", err)
|
||||
}
|
||||
|
||||
parser := ollamaParser{}
|
||||
resp, err := parser.Parse(context.Background(), req, raw, 200)
|
||||
if err != nil {
|
||||
t.Fatalf("parse failed: %v", err)
|
||||
}
|
||||
|
||||
if len(resp.Result.Labels) != 1 || resp.Result.Labels[0].Name != "Cat" {
|
||||
t.Fatalf("expected cat label, got %+v", resp.Result.Labels)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,18 +1,342 @@
|
|||
package vision
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/photoprism/photoprism/internal/ai/vision/openai"
|
||||
"github.com/photoprism/photoprism/internal/entity"
|
||||
"github.com/photoprism/photoprism/pkg/clean"
|
||||
"github.com/photoprism/photoprism/pkg/http/scheme"
|
||||
)
|
||||
|
||||
// init registers the OpenAI engine alias so models can set Engine: "openai"
|
||||
// and inherit sensible defaults (request/response formats, file scheme, and
|
||||
// preferred thumbnail resolution).
|
||||
// openaiDefaults provides canned prompts, schema templates, and options for OpenAI engines.
|
||||
type openaiDefaults struct{}
|
||||
|
||||
// openaiBuilder prepares ApiRequest objects for OpenAI's Responses API.
|
||||
type openaiBuilder struct{}
|
||||
|
||||
// openaiParser converts Responses API payloads into ApiResponse instances.
|
||||
type openaiParser struct{}
|
||||
|
||||
func init() {
|
||||
RegisterEngineAlias(openai.EngineName, EngineInfo{
|
||||
RequestFormat: ApiFormatOpenAI,
|
||||
ResponseFormat: ApiFormatOpenAI,
|
||||
FileScheme: string(scheme.Base64),
|
||||
DefaultResolution: openai.DefaultResolution,
|
||||
RegisterEngine(ApiFormatOpenAI, Engine{
|
||||
Builder: openaiBuilder{},
|
||||
Parser: openaiParser{},
|
||||
Defaults: openaiDefaults{},
|
||||
})
|
||||
}
|
||||
|
||||
// SystemPrompt returns the default OpenAI system prompt for the specified model type.
|
||||
func (openaiDefaults) SystemPrompt(model *Model) string {
|
||||
if model == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
switch model.Type {
|
||||
case ModelTypeCaption:
|
||||
return openai.CaptionSystem
|
||||
case ModelTypeLabels:
|
||||
return openai.LabelSystem
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// UserPrompt returns the default OpenAI user prompt for the specified model type.
|
||||
func (openaiDefaults) UserPrompt(model *Model) string {
|
||||
if model == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
switch model.Type {
|
||||
case ModelTypeCaption:
|
||||
return openai.CaptionPrompt
|
||||
case ModelTypeLabels:
|
||||
if DetectNSFWLabels {
|
||||
return openai.LabelPromptNSFW
|
||||
}
|
||||
return openai.LabelPromptDefault
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// SchemaTemplate returns the JSON schema template for the model, if applicable.
|
||||
func (openaiDefaults) SchemaTemplate(model *Model) string {
|
||||
if model == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
switch model.Type {
|
||||
case ModelTypeLabels:
|
||||
return string(openai.SchemaLabels(model.PromptContains("nsfw")))
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
// Options returns default OpenAI request options for the model.
|
||||
func (openaiDefaults) Options(model *Model) *ApiRequestOptions {
|
||||
if model == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
switch model.Type {
|
||||
case ModelTypeCaption:
|
||||
/*
|
||||
Options:
|
||||
Detail: low
|
||||
MaxOutputTokens: 512
|
||||
Temperature: 0.1
|
||||
TopP: 0.9
|
||||
(Sampling values are zeroed for GPT-5 models in openaiBuilder.Build.)
|
||||
*/
|
||||
return &ApiRequestOptions{
|
||||
Detail: openai.DefaultDetail,
|
||||
MaxOutputTokens: openai.CaptionMaxTokens,
|
||||
Temperature: openai.DefaultTemperature,
|
||||
TopP: openai.DefaultTopP,
|
||||
}
|
||||
case ModelTypeLabels:
|
||||
/*
|
||||
Options:
|
||||
Detail: low
|
||||
MaxOutputTokens: 1024
|
||||
Temperature: 0.1
|
||||
ForceJson: true
|
||||
SchemaVersion: "photoprism_vision_labels_v1"
|
||||
(Sampling values are zeroed for GPT-5 models in openaiBuilder.Build.)
|
||||
*/
|
||||
return &ApiRequestOptions{
|
||||
Detail: openai.DefaultDetail,
|
||||
MaxOutputTokens: openai.LabelsMaxTokens,
|
||||
Temperature: openai.DefaultTemperature,
|
||||
TopP: openai.DefaultTopP,
|
||||
ForceJson: true,
|
||||
}
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Build constructs an OpenAI request payload using base64-encoded thumbnails.
|
||||
func (openaiBuilder) Build(ctx context.Context, model *Model, files Files) (*ApiRequest, error) {
|
||||
if model == nil {
|
||||
return nil, ErrInvalidModel
|
||||
}
|
||||
|
||||
dataReq, err := NewApiRequestImages(files, scheme.Data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req := &ApiRequest{
|
||||
Id: dataReq.Id,
|
||||
Images: append(Files(nil), dataReq.Images...),
|
||||
ResponseFormat: ApiFormatOpenAI,
|
||||
}
|
||||
|
||||
if opts := model.GetOptions(); opts != nil {
|
||||
req.Options = cloneOptions(opts)
|
||||
if model.Type == ModelTypeCaption {
|
||||
// Captions default to plain text responses; structured JSON is optional.
|
||||
req.Options.ForceJson = false
|
||||
if req.Options.MaxOutputTokens < openai.CaptionMaxTokens {
|
||||
req.Options.MaxOutputTokens = openai.CaptionMaxTokens
|
||||
}
|
||||
} else if model.Type == ModelTypeLabels {
|
||||
if req.Options.MaxOutputTokens < openai.LabelsMaxTokens {
|
||||
req.Options.MaxOutputTokens = openai.LabelsMaxTokens
|
||||
}
|
||||
}
|
||||
|
||||
if strings.HasPrefix(strings.ToLower(strings.TrimSpace(model.Name)), "gpt-5") {
|
||||
req.Options.Temperature = 0
|
||||
req.Options.TopP = 0
|
||||
}
|
||||
}
|
||||
|
||||
if schema := strings.TrimSpace(model.SchemaTemplate()); schema != "" {
|
||||
if raw, parseErr := parseOpenAISchema(schema); parseErr != nil {
|
||||
log.Warnf("vision: failed to parse OpenAI schema template (%s)", clean.Error(parseErr))
|
||||
} else {
|
||||
req.Schema = raw
|
||||
}
|
||||
}
|
||||
|
||||
return req, nil
|
||||
}
|
||||
|
||||
// Parse converts an OpenAI Responses API payload into the internal ApiResponse representation.
|
||||
func (openaiParser) Parse(ctx context.Context, req *ApiRequest, raw []byte, status int) (*ApiResponse, error) {
|
||||
if status >= 300 {
|
||||
if msg := openai.ParseErrorMessage(raw); msg != "" {
|
||||
return nil, fmt.Errorf("openai: %s", msg)
|
||||
}
|
||||
return nil, fmt.Errorf("openai: status %d", status)
|
||||
}
|
||||
|
||||
var resp openai.Response
|
||||
if err := json.Unmarshal(raw, &resp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if resp.Error != nil && resp.Error.Message != "" {
|
||||
return nil, errors.New(resp.Error.Message)
|
||||
}
|
||||
|
||||
result := ApiResult{}
|
||||
if jsonPayload := resp.FirstJSON(); len(jsonPayload) > 0 {
|
||||
if err := populateOpenAIJSONResult(&result, jsonPayload); err != nil {
|
||||
log.Debugf("vision: %s (parse openai json payload)", clean.Error(err))
|
||||
}
|
||||
}
|
||||
|
||||
if result.Caption == nil {
|
||||
if text := resp.FirstText(); text != "" {
|
||||
trimmed := strings.TrimSpace(text)
|
||||
var parsedJSON bool
|
||||
|
||||
if len(trimmed) > 0 && (trimmed[0] == '{' || trimmed[0] == '[') {
|
||||
if err := populateOpenAIJSONResult(&result, json.RawMessage(trimmed)); err != nil {
|
||||
log.Debugf("vision: %s (parse openai json text payload)", clean.Error(err))
|
||||
} else {
|
||||
parsedJSON = true
|
||||
}
|
||||
}
|
||||
|
||||
if !parsedJSON && trimmed != "" {
|
||||
result.Caption = &CaptionResult{
|
||||
Text: trimmed,
|
||||
Source: entity.SrcOpenAI,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var responseID string
|
||||
if req != nil {
|
||||
responseID = req.GetId()
|
||||
}
|
||||
|
||||
modelName := strings.TrimSpace(resp.Model)
|
||||
if modelName == "" && req != nil {
|
||||
modelName = strings.TrimSpace(req.Model)
|
||||
}
|
||||
|
||||
return &ApiResponse{
|
||||
Id: responseID,
|
||||
Code: status,
|
||||
Model: &Model{Name: modelName},
|
||||
Result: result,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// parseOpenAISchema validates the provided JSON schema and returns it as a raw message.
|
||||
func parseOpenAISchema(schema string) (json.RawMessage, error) {
|
||||
var raw json.RawMessage
|
||||
if err := json.Unmarshal([]byte(schema), &raw); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return normalizeOpenAISchema(raw)
|
||||
}
|
||||
|
||||
// normalizeOpenAISchema upgrades legacy label schema definitions so they comply with
|
||||
// OpenAI's json_schema format requirements.
|
||||
func normalizeOpenAISchema(raw json.RawMessage) (json.RawMessage, error) {
|
||||
if len(raw) == 0 {
|
||||
return raw, nil
|
||||
}
|
||||
|
||||
var doc map[string]any
|
||||
if err := json.Unmarshal(raw, &doc); err != nil {
|
||||
// Fallback to the original payload if it isn't a JSON object.
|
||||
return raw, nil
|
||||
}
|
||||
|
||||
if t, ok := doc["type"]; ok {
|
||||
if typeStr, ok := t.(string); ok && strings.TrimSpace(typeStr) != "" {
|
||||
return raw, nil
|
||||
}
|
||||
}
|
||||
|
||||
if _, ok := doc["properties"]; ok {
|
||||
return raw, nil
|
||||
}
|
||||
|
||||
labels, ok := doc["labels"]
|
||||
if !ok {
|
||||
return raw, nil
|
||||
}
|
||||
|
||||
nsfw := false
|
||||
|
||||
if items, ok := labels.([]any); ok && len(items) > 0 {
|
||||
if first, ok := items[0].(map[string]any); ok {
|
||||
if _, hasNSFW := first["nsfw"]; hasNSFW {
|
||||
nsfw = true
|
||||
}
|
||||
if _, hasNSFWConfidence := first["nsfw_confidence"]; hasNSFWConfidence {
|
||||
nsfw = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return openai.SchemaLabels(nsfw), nil
|
||||
}
|
||||
|
||||
// populateOpenAIJSONResult unmarshals a structured OpenAI response into ApiResult fields.
|
||||
func populateOpenAIJSONResult(result *ApiResult, payload json.RawMessage) error {
|
||||
if result == nil || len(payload) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var envelope struct {
|
||||
Caption *struct {
|
||||
Text string `json:"text"`
|
||||
Confidence float32 `json:"confidence"`
|
||||
} `json:"caption"`
|
||||
Labels []LabelResult `json:"labels"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(payload, &envelope); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if envelope.Caption != nil {
|
||||
text := strings.TrimSpace(envelope.Caption.Text)
|
||||
if text != "" {
|
||||
result.Caption = &CaptionResult{
|
||||
Text: text,
|
||||
Confidence: envelope.Caption.Confidence,
|
||||
Source: entity.SrcOpenAI,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(envelope.Labels) > 0 {
|
||||
filtered := envelope.Labels[:0]
|
||||
|
||||
for i := range envelope.Labels {
|
||||
if envelope.Labels[i].Source == "" {
|
||||
envelope.Labels[i].Source = entity.SrcOpenAI
|
||||
}
|
||||
|
||||
normalizeLabelResult(&envelope.Labels[i])
|
||||
|
||||
if envelope.Labels[i].Name == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
filtered = append(filtered, envelope.Labels[i])
|
||||
}
|
||||
|
||||
result.Labels = append(result.Labels, filtered...)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
|||
337
internal/ai/vision/engine_openai_test.go
Normal file
337
internal/ai/vision/engine_openai_test.go
Normal file
|
|
@ -0,0 +1,337 @@
|
|||
package vision
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/photoprism/photoprism/internal/ai/vision/openai"
|
||||
"github.com/photoprism/photoprism/internal/ai/vision/schema"
|
||||
"github.com/photoprism/photoprism/internal/entity"
|
||||
)
|
||||
|
||||
func TestOpenAIBuilderBuild(t *testing.T) {
|
||||
model := &Model{
|
||||
Type: ModelTypeLabels,
|
||||
Name: openai.DefaultModel,
|
||||
Engine: openai.EngineName,
|
||||
}
|
||||
model.ApplyEngineDefaults()
|
||||
|
||||
request, err := openaiBuilder{}.Build(context.Background(), model, Files{examplesPath + "/chameleon_lime.jpg"})
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, request)
|
||||
|
||||
assert.Equal(t, ApiFormatOpenAI, request.ResponseFormat)
|
||||
assert.NotEmpty(t, request.Images)
|
||||
assert.NotNil(t, request.Options)
|
||||
assert.Equal(t, openai.DefaultDetail, request.Options.Detail)
|
||||
assert.True(t, request.Options.ForceJson)
|
||||
assert.GreaterOrEqual(t, request.Options.MaxOutputTokens, openai.LabelsMaxTokens)
|
||||
}
|
||||
|
||||
func TestOpenAIBuilderBuildCaptionDisablesForceJSON(t *testing.T) {
|
||||
model := &Model{
|
||||
Type: ModelTypeCaption,
|
||||
Name: openai.DefaultModel,
|
||||
Engine: openai.EngineName,
|
||||
Options: &ApiRequestOptions{ForceJson: true},
|
||||
}
|
||||
model.ApplyEngineDefaults()
|
||||
|
||||
request, err := openaiBuilder{}.Build(context.Background(), model, Files{examplesPath + "/chameleon_lime.jpg"})
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, request)
|
||||
require.NotNil(t, request.Options)
|
||||
assert.False(t, request.Options.ForceJson)
|
||||
assert.GreaterOrEqual(t, request.Options.MaxOutputTokens, openai.CaptionMaxTokens)
|
||||
}
|
||||
|
||||
func TestApiRequestJSONForOpenAI(t *testing.T) {
|
||||
req := &ApiRequest{
|
||||
Model: "gpt-5-mini",
|
||||
System: "system",
|
||||
Prompt: "describe the scene",
|
||||
Images: []string{""},
|
||||
ResponseFormat: ApiFormatOpenAI,
|
||||
Options: &ApiRequestOptions{
|
||||
Detail: openai.DefaultDetail,
|
||||
MaxOutputTokens: 128,
|
||||
Temperature: 0.2,
|
||||
TopP: 0.8,
|
||||
ForceJson: true,
|
||||
},
|
||||
Schema: json.RawMessage(`{"type":"object","properties":{"caption":{"type":"object"}}}`),
|
||||
}
|
||||
|
||||
payload, err := req.JSON()
|
||||
require.NoError(t, err)
|
||||
|
||||
var decoded struct {
|
||||
Model string `json:"model"`
|
||||
Input []struct {
|
||||
Role string `json:"role"`
|
||||
Content []struct {
|
||||
Type string `json:"type"`
|
||||
} `json:"content"`
|
||||
} `json:"input"`
|
||||
Text struct {
|
||||
Format struct {
|
||||
Type string `json:"type"`
|
||||
Name string `json:"name"`
|
||||
Schema json.RawMessage `json:"schema"`
|
||||
Strict bool `json:"strict"`
|
||||
} `json:"format"`
|
||||
} `json:"text"`
|
||||
Reasoning struct {
|
||||
Effort string `json:"effort"`
|
||||
} `json:"reasoning"`
|
||||
MaxOutputTokens int `json:"max_output_tokens"`
|
||||
}
|
||||
|
||||
require.NoError(t, json.Unmarshal(payload, &decoded))
|
||||
assert.Equal(t, "gpt-5-mini", decoded.Model)
|
||||
require.Len(t, decoded.Input, 2)
|
||||
assert.Equal(t, "system", decoded.Input[0].Role)
|
||||
assert.Equal(t, openai.ResponseFormatJSONSchema, decoded.Text.Format.Type)
|
||||
assert.Equal(t, schema.JsonSchemaName(decoded.Text.Format.Schema, openai.DefaultSchemaVersion), decoded.Text.Format.Name)
|
||||
assert.False(t, decoded.Text.Format.Strict)
|
||||
assert.NotNil(t, decoded.Text.Format.Schema)
|
||||
assert.Equal(t, "low", decoded.Reasoning.Effort)
|
||||
assert.Equal(t, 128, decoded.MaxOutputTokens)
|
||||
}
|
||||
|
||||
func TestApiRequestJSONForOpenAIDefaultSchemaName(t *testing.T) {
|
||||
req := &ApiRequest{
|
||||
Model: "gpt-5-mini",
|
||||
Images: []string{""},
|
||||
ResponseFormat: ApiFormatOpenAI,
|
||||
Options: &ApiRequestOptions{
|
||||
Detail: openai.DefaultDetail,
|
||||
MaxOutputTokens: 64,
|
||||
ForceJson: true,
|
||||
},
|
||||
Schema: json.RawMessage(`{"type":"object"}`),
|
||||
}
|
||||
|
||||
payload, err := req.JSON()
|
||||
require.NoError(t, err)
|
||||
|
||||
var decoded struct {
|
||||
Text struct {
|
||||
Format struct {
|
||||
Name string `json:"name"`
|
||||
} `json:"format"`
|
||||
} `json:"text"`
|
||||
}
|
||||
|
||||
require.NoError(t, json.Unmarshal(payload, &decoded))
|
||||
assert.Equal(t, schema.JsonSchemaName(req.Schema, openai.DefaultSchemaVersion), decoded.Text.Format.Name)
|
||||
}
|
||||
|
||||
func TestOpenAIParserParsesJSONFromTextPayload(t *testing.T) {
|
||||
respPayload := `{
|
||||
"id": "resp_123",
|
||||
"model": "gpt-5-mini",
|
||||
"output": [{
|
||||
"role": "assistant",
|
||||
"content": [{
|
||||
"type": "output_text",
|
||||
"text": "{\"labels\":[{\"name\":\"deer\",\"confidence\":0.98,\"topicality\":0.99}]}"
|
||||
}]
|
||||
}]
|
||||
}`
|
||||
|
||||
req := &ApiRequest{
|
||||
Id: "test",
|
||||
Model: "gpt-5-mini",
|
||||
ResponseFormat: ApiFormatOpenAI,
|
||||
}
|
||||
|
||||
resp, err := openaiParser{}.Parse(context.Background(), req, []byte(respPayload), http.StatusOK)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, resp)
|
||||
require.Len(t, resp.Result.Labels, 1)
|
||||
assert.Equal(t, "Deer", resp.Result.Labels[0].Name)
|
||||
assert.Nil(t, resp.Result.Caption)
|
||||
}
|
||||
|
||||
func TestParseOpenAISchemaLegacyUpgrade(t *testing.T) {
|
||||
legacy := `{
|
||||
"labels": [{
|
||||
"name": "",
|
||||
"confidence": 0,
|
||||
"topicality": 0
|
||||
}]
|
||||
}`
|
||||
|
||||
raw, err := parseOpenAISchema(legacy)
|
||||
require.NoError(t, err)
|
||||
|
||||
var decoded map[string]any
|
||||
require.NoError(t, json.Unmarshal(raw, &decoded))
|
||||
|
||||
assert.Equal(t, "object", decoded["type"])
|
||||
|
||||
props, ok := decoded["properties"].(map[string]any)
|
||||
require.True(t, ok)
|
||||
labels, ok := props["labels"].(map[string]any)
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "array", labels["type"])
|
||||
}
|
||||
|
||||
func TestParseOpenAISchemaLegacyUpgradeNSFW(t *testing.T) {
|
||||
legacy := `{
|
||||
"labels": [{
|
||||
"name": "",
|
||||
"confidence": 0,
|
||||
"topicality": 0,
|
||||
"nsfw": false,
|
||||
"nsfw_confidence": 0
|
||||
}]
|
||||
}`
|
||||
|
||||
raw, err := parseOpenAISchema(legacy)
|
||||
require.NoError(t, err)
|
||||
|
||||
var decoded map[string]any
|
||||
require.NoError(t, json.Unmarshal(raw, &decoded))
|
||||
|
||||
props := decoded["properties"].(map[string]any)
|
||||
labels := props["labels"].(map[string]any)
|
||||
items := labels["items"].(map[string]any)
|
||||
_, hasNSFW := items["properties"].(map[string]any)["nsfw"]
|
||||
assert.True(t, hasNSFW)
|
||||
}
|
||||
|
||||
func TestPerformApiRequestOpenAISuccess(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
var reqPayload struct {
|
||||
Model string `json:"model"`
|
||||
}
|
||||
assert.NoError(t, json.NewDecoder(r.Body).Decode(&reqPayload))
|
||||
assert.Equal(t, "gpt-5-mini", reqPayload.Model)
|
||||
|
||||
response := map[string]any{
|
||||
"id": "resp_123",
|
||||
"model": "gpt-5-mini",
|
||||
"output": []any{
|
||||
map[string]any{
|
||||
"role": "assistant",
|
||||
"content": []any{
|
||||
map[string]any{
|
||||
"type": "output_json",
|
||||
"json": map[string]any{
|
||||
"caption": map[string]any{
|
||||
"text": "A cat rests on a windowsill.",
|
||||
"confidence": 0.91,
|
||||
},
|
||||
"labels": []map[string]any{
|
||||
{
|
||||
"name": "cat",
|
||||
"confidence": 0.92,
|
||||
"topicality": 0.88,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
assert.NoError(t, json.NewEncoder(w).Encode(response))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
req := &ApiRequest{
|
||||
Id: "test",
|
||||
Model: "gpt-5-mini",
|
||||
Images: []string{""},
|
||||
ResponseFormat: ApiFormatOpenAI,
|
||||
Options: &ApiRequestOptions{
|
||||
Detail: openai.DefaultDetail,
|
||||
},
|
||||
Schema: json.RawMessage(`{"type":"object"}`),
|
||||
}
|
||||
|
||||
resp, err := PerformApiRequest(req, server.URL, http.MethodPost, "secret")
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, resp)
|
||||
|
||||
require.NotNil(t, resp.Result.Caption)
|
||||
assert.Equal(t, entity.SrcOpenAI, resp.Result.Caption.Source)
|
||||
assert.Equal(t, "A cat rests on a windowsill.", resp.Result.Caption.Text)
|
||||
|
||||
require.Len(t, resp.Result.Labels, 1)
|
||||
assert.Equal(t, entity.SrcOpenAI, resp.Result.Labels[0].Source)
|
||||
assert.Equal(t, "Cat", resp.Result.Labels[0].Name)
|
||||
}
|
||||
|
||||
func TestPerformApiRequestOpenAITextFallback(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
response := map[string]any{
|
||||
"id": "resp_456",
|
||||
"model": "gpt-5-mini",
|
||||
"output": []any{
|
||||
map[string]any{
|
||||
"role": "assistant",
|
||||
"content": []any{
|
||||
map[string]any{
|
||||
"type": "output_text",
|
||||
"text": "Two hikers reach the summit at sunset.",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
assert.NoError(t, json.NewEncoder(w).Encode(response))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
req := &ApiRequest{
|
||||
Id: "fallback",
|
||||
Model: "gpt-5-mini",
|
||||
Images: []string{""},
|
||||
ResponseFormat: ApiFormatOpenAI,
|
||||
Options: &ApiRequestOptions{
|
||||
Detail: openai.DefaultDetail,
|
||||
},
|
||||
Schema: nil,
|
||||
}
|
||||
|
||||
resp, err := PerformApiRequest(req, server.URL, http.MethodPost, "")
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, resp.Result.Caption)
|
||||
assert.Equal(t, "Two hikers reach the summit at sunset.", resp.Result.Caption.Text)
|
||||
assert.Equal(t, entity.SrcOpenAI, resp.Result.Caption.Source)
|
||||
}
|
||||
|
||||
func TestPerformApiRequestOpenAIError(t *testing.T) {
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusBadRequest)
|
||||
_ = json.NewEncoder(w).Encode(map[string]any{
|
||||
"error": map[string]any{
|
||||
"message": "Invalid image payload",
|
||||
},
|
||||
})
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
req := &ApiRequest{
|
||||
Id: "error",
|
||||
Model: "gpt-5-mini",
|
||||
ResponseFormat: ApiFormatOpenAI,
|
||||
Schema: nil,
|
||||
Images: []string{""},
|
||||
}
|
||||
|
||||
_, err := PerformApiRequest(req, server.URL, http.MethodPost, "")
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "Invalid image payload")
|
||||
}
|
||||
|
|
@ -96,8 +96,10 @@ func labelsInternal(images Files, mediaSrc media.Src, labelSrc entity.Src) (resu
|
|||
apiRequest.Prompt = prompt
|
||||
}
|
||||
|
||||
if options := model.GetOptions(); options != nil {
|
||||
apiRequest.Options = options
|
||||
if apiRequest.Options == nil {
|
||||
if options := model.GetOptions(); options != nil {
|
||||
apiRequest.Options = options
|
||||
}
|
||||
}
|
||||
|
||||
apiRequest.WriteLog()
|
||||
|
|
|
|||
|
|
@ -154,9 +154,11 @@ func (m *Model) EndpointKey() (key string) {
|
|||
|
||||
if key = m.Service.EndpointKey(); key != "" {
|
||||
return key
|
||||
} else {
|
||||
return ServiceKey
|
||||
}
|
||||
|
||||
ensureEnv()
|
||||
|
||||
return strings.TrimSpace(os.ExpandEnv(ServiceKey))
|
||||
}
|
||||
|
||||
// EndpointFileScheme returns the endpoint API request file scheme type. Nil
|
||||
|
|
@ -348,6 +350,26 @@ func mergeOptionDefaults(target, defaults *ApiRequestOptions) {
|
|||
if len(target.Stop) == 0 && len(defaults.Stop) > 0 {
|
||||
target.Stop = append([]string(nil), defaults.Stop...)
|
||||
}
|
||||
|
||||
if target.MaxOutputTokens <= 0 && defaults.MaxOutputTokens > 0 {
|
||||
target.MaxOutputTokens = defaults.MaxOutputTokens
|
||||
}
|
||||
|
||||
if strings.TrimSpace(target.Detail) == "" && strings.TrimSpace(defaults.Detail) != "" {
|
||||
target.Detail = strings.TrimSpace(defaults.Detail)
|
||||
}
|
||||
|
||||
if !target.ForceJson && defaults.ForceJson {
|
||||
target.ForceJson = true
|
||||
}
|
||||
|
||||
if target.SchemaVersion == "" && defaults.SchemaVersion != "" {
|
||||
target.SchemaVersion = defaults.SchemaVersion
|
||||
}
|
||||
|
||||
if target.CombineOutputs == "" && defaults.CombineOutputs != "" {
|
||||
target.CombineOutputs = defaults.CombineOutputs
|
||||
}
|
||||
}
|
||||
|
||||
func normalizeOptions(opts *ApiRequestOptions) {
|
||||
|
|
@ -422,6 +444,10 @@ func (m *Model) ApplyEngineDefaults() {
|
|||
}
|
||||
|
||||
if info, ok := EngineInfoFor(engine); ok {
|
||||
if m.Service.Uri == "" {
|
||||
m.Service.Uri = info.Uri
|
||||
}
|
||||
|
||||
if m.Service.RequestFormat == "" {
|
||||
m.Service.RequestFormat = info.RequestFormat
|
||||
}
|
||||
|
|
@ -439,6 +465,10 @@ func (m *Model) ApplyEngineDefaults() {
|
|||
}
|
||||
}
|
||||
|
||||
if engine == openai.EngineName && strings.TrimSpace(m.Service.Key) == "" {
|
||||
m.Service.Key = "${OPENAI_API_KEY}"
|
||||
}
|
||||
|
||||
m.Engine = engine
|
||||
}
|
||||
|
||||
|
|
@ -490,7 +520,7 @@ func (m *Model) SchemaTemplate() string {
|
|||
}
|
||||
|
||||
if m.schema == "" {
|
||||
m.schema = visionschema.Labels(m.PromptContains("nsfw"))
|
||||
m.schema = visionschema.LabelsJson(m.PromptContains("nsfw"))
|
||||
}
|
||||
}
|
||||
})
|
||||
|
|
|
|||
|
|
@ -1,13 +1,17 @@
|
|||
package vision
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/photoprism/photoprism/internal/ai/tensorflow"
|
||||
"github.com/photoprism/photoprism/internal/ai/vision/ollama"
|
||||
"github.com/photoprism/photoprism/internal/ai/vision/openai"
|
||||
"github.com/photoprism/photoprism/internal/entity"
|
||||
"github.com/photoprism/photoprism/pkg/http/scheme"
|
||||
)
|
||||
|
||||
func TestModelGetOptionsDefaultsOllamaLabels(t *testing.T) {
|
||||
|
|
@ -108,6 +112,85 @@ func TestModelApplyEngineDefaultsSetsResolution(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestModelApplyEngineDefaultsSetsServiceDefaults(t *testing.T) {
|
||||
t.Run("OpenAIEngine", func(t *testing.T) {
|
||||
model := &Model{
|
||||
Type: ModelTypeCaption,
|
||||
Engine: openai.EngineName,
|
||||
}
|
||||
|
||||
model.ApplyEngineDefaults()
|
||||
|
||||
assert.Equal(t, "https://api.openai.com/v1/responses", model.Service.Uri)
|
||||
assert.Equal(t, ApiFormatOpenAI, model.Service.RequestFormat)
|
||||
assert.Equal(t, ApiFormatOpenAI, model.Service.ResponseFormat)
|
||||
assert.Equal(t, scheme.Data, model.Service.FileScheme)
|
||||
})
|
||||
t.Run("PreserveExistingService", func(t *testing.T) {
|
||||
model := &Model{
|
||||
Type: ModelTypeCaption,
|
||||
Engine: openai.EngineName,
|
||||
Service: Service{
|
||||
Uri: "https://custom.example",
|
||||
FileScheme: scheme.Base64,
|
||||
RequestFormat: ApiFormatOpenAI,
|
||||
},
|
||||
}
|
||||
|
||||
model.ApplyEngineDefaults()
|
||||
|
||||
assert.Equal(t, "https://custom.example", model.Service.Uri)
|
||||
assert.Equal(t, scheme.Base64, model.Service.FileScheme)
|
||||
})
|
||||
}
|
||||
|
||||
func TestModelEndpointKeyOpenAIFallbacks(t *testing.T) {
|
||||
t.Run("EnvFile", func(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
path := filepath.Join(dir, "openai.key")
|
||||
if err := os.WriteFile(path, []byte("from-file\n"), 0o600); err != nil {
|
||||
t.Fatalf("write key file: %v", err)
|
||||
}
|
||||
|
||||
t.Setenv("OPENAI_API_KEY", "")
|
||||
t.Setenv("OPENAI_API_KEY_FILE", path)
|
||||
|
||||
model := &Model{Type: ModelTypeCaption, Engine: openai.EngineName}
|
||||
model.ApplyEngineDefaults()
|
||||
|
||||
if got := model.EndpointKey(); got != "from-file" {
|
||||
t.Fatalf("expected file key, got %q", got)
|
||||
}
|
||||
})
|
||||
t.Run("CustomPlaceholder", func(t *testing.T) {
|
||||
t.Setenv("OPENAI_API_KEY", "env-secret")
|
||||
|
||||
model := &Model{Type: ModelTypeCaption, Engine: openai.EngineName}
|
||||
model.ApplyEngineDefaults()
|
||||
if got := model.EndpointKey(); got != "env-secret" {
|
||||
t.Fatalf("expected env secret, got %q", got)
|
||||
}
|
||||
|
||||
model.Service.Key = "${CUSTOM_KEY}"
|
||||
t.Setenv("CUSTOM_KEY", "custom-secret")
|
||||
if got := model.EndpointKey(); got != "custom-secret" {
|
||||
t.Fatalf("expected custom secret, got %q", got)
|
||||
}
|
||||
})
|
||||
t.Run("GlobalFallback", func(t *testing.T) {
|
||||
prev := ServiceKey
|
||||
ServiceKey = "${GLOBAL_KEY}"
|
||||
defer func() { ServiceKey = prev }()
|
||||
|
||||
t.Setenv("GLOBAL_KEY", "global-secret")
|
||||
|
||||
model := &Model{}
|
||||
if got := model.EndpointKey(); got != "global-secret" {
|
||||
t.Fatalf("expected global secret, got %q", got)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestModelGetSource(t *testing.T) {
|
||||
t.Run("NilModel", func(t *testing.T) {
|
||||
var model *Model
|
||||
|
|
@ -115,21 +198,18 @@ func TestModelGetSource(t *testing.T) {
|
|||
t.Fatalf("expected SrcAuto for nil model, got %s", src)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("EngineAlias", func(t *testing.T) {
|
||||
model := &Model{Engine: ollama.EngineName}
|
||||
if src := model.GetSource(); src != entity.SrcOllama {
|
||||
t.Fatalf("expected SrcOllama, got %s", src)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("RequestFormat", func(t *testing.T) {
|
||||
model := &Model{Service: Service{RequestFormat: ApiFormatOpenAI}}
|
||||
if src := model.GetSource(); src != entity.SrcOpenAI {
|
||||
t.Fatalf("expected SrcOpenAI, got %s", src)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("DefaultImage", func(t *testing.T) {
|
||||
model := &Model{}
|
||||
if src := model.GetSource(); src != entity.SrcImage {
|
||||
|
|
|
|||
152
internal/ai/vision/ollama/README.md
Normal file
152
internal/ai/vision/ollama/README.md
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
## PhotoPrism — Ollama Engine Integration
|
||||
|
||||
**Last Updated:** November 14, 2025
|
||||
|
||||
### Overview
|
||||
|
||||
This package provides PhotoPrism’s native adapter for Ollama-compatible multimodal models. It lets Caption, Labels, and future Generate workflows call locally hosted models without changing worker logic, reusing the shared API client (`internal/ai/vision/api_client.go`) and result types (`LabelResult`, `CaptionResult`). Requests stay inside your infrastructure, rely on base64 thumbnails, and honor the same ACL, timeout, and logging hooks as the default TensorFlow engines.
|
||||
|
||||
#### Context & Constraints
|
||||
|
||||
- Engine defaults live in `internal/ai/vision/ollama` and are applied whenever a model sets `Engine: ollama`. Aliases map to `ApiFormatOllama`, `scheme.Base64`, and a default 720 px thumbnail.
|
||||
- Responses may arrive as newline-delimited JSON chunks. `decodeOllamaResponse` keeps the most recent chunk, while `parseOllamaLabels` replays plain JSON strings found in `response`.
|
||||
- Structured JSON is optional for captions but enforced for labels when `Format: json` (default for label models targeting the Ollama engine).
|
||||
- The adapter never overwrites TensorFlow defaults. If an Ollama call fails, downstream code still has Nasnet, NSFW, and Face models available.
|
||||
- Workers assume a single-image payload per request. Run `photoprism vision run` to validate multi-image prompts before changing that invariant.
|
||||
|
||||
#### Goals
|
||||
|
||||
- Let operators opt into local, private LLMs for captions and labels via `vision.yml`.
|
||||
- Provide safe defaults (prompts, schema, sampling) so most deployments only need to specify `Name`, `Engine`, and `Service.Uri`.
|
||||
- Surface reproducible logs, metrics, and CLI commands that make it easy to compare Ollama output against TensorFlow/OpenAI engines.
|
||||
|
||||
#### Non-Goals
|
||||
|
||||
- Managing Ollama itself (model downloads, GPU scheduling, or authentication). Use the Compose profiles provided in the repository.
|
||||
- Adding new HTTP endpoints or bypassing the existing `photoprism vision` CLI.
|
||||
- Replacing TensorFlow workers—Ollama engines are additive and opt-in.
|
||||
|
||||
### Architecture & Request Flow
|
||||
|
||||
1. **Model Selection** — `Config.Model(ModelType)` returns the top-most enabled entry. When `Engine: ollama`, `ApplyEngineDefaults()` fills in the request/response format, base64 file scheme, and a 720 px resolution unless overridden.
|
||||
2. **Request Build** — `ollamaBuilder.Build` wraps thumbnails with `NewApiRequestOllama`, which encodes them as base64 strings. `Model.Model()` resolves the exact Ollama tag (`gemma3:4b`, `qwen2.5vl:7b`, etc.).
|
||||
3. **Transport** — `PerformApiRequest` uses a single HTTP POST (default timeout 10 min). Authentication is optional; provide `Service.Key` if you proxy through an API gateway.
|
||||
4. **Parsing** — `ollamaParser.Parse` converts payloads into `ApiResponse`. It normalizes confidences (`LabelConfidenceDefault = 0.5` when missing), copies NSFW scores, and canonicalizes label names via `normalizeLabelResult`.
|
||||
5. **Persistence** — `entity.SrcOllama` is stamped on labels/captions so UI badges and audits reflect the new source.
|
||||
|
||||
### Prompt, Schema, & Options Guidance
|
||||
|
||||
- **System Prompts**
|
||||
- Labels: `LabelSystem` enforces single-word nouns. Set `System` to override; assign `LabelSystemSimple` when you need descriptive phrases.
|
||||
- Captions: no system prompt by default; rely on user prompt or set one explicitly for stylistic needs.
|
||||
- **User Prompts**
|
||||
- Captions use `CaptionPrompt`, which requests one sentence in active voice.
|
||||
- Labels default to `LabelPromptDefault`; when `DetectNSFWLabels` is true, the adapter swaps in `LabelPromptNSFW`.
|
||||
- For stricter noun enforcement, set `Prompt` to `LabelPromptStrict`.
|
||||
- **Schemas**
|
||||
- Labels rely on `schema.LabelsJson(nsfw)` (simple JSON template). Setting `Format: json` auto-attaches a reminder (`model.SchemaInstructions()`).
|
||||
- Override via `Schema` (inline YAML) or `SchemaFile`. `PHOTOPRISM_VISION_LABEL_SCHEMA_FILE` always wins if present.
|
||||
- **Options**
|
||||
- Labels: default `Temperature` equals `DefaultTemperature` (0.1 unless configured), `TopP=0.9`, `Stop=["\n\n"]`.
|
||||
- Captions: only `Temperature` is set; other parameters inherit global defaults.
|
||||
- Custom `Options` merge with engine defaults. Leave `ForceJson=true` for labels so PhotoPrism can reject malformed payloads early.
|
||||
|
||||
### Supported Ollama Vision Models
|
||||
|
||||
| Model (Ollama Tag) | Size & Footprint | Strengths | JSON & Language Notes | When To Use |
|
||||
|-------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `gemma3:4b / 12b / 27b` | 4B/12B/27B parameters, ~3.3 GB → 17 GB downloads, 128 K context | Multimodal text+image reasoning with SigLIP encoder, handles OCR/long documents, supports tool/function calling | Emits structured JSON reliably; >140 languages with strong default English output | High-quality captions + multilingual labels when you have ≥12 GB VRAM (4B works on 8 GB with Q4_K_M) |
|
||||
| `qwen2.5vl:7b` | 8.29 B params (Q4_K_M) ≈6 GB download, 125 K context | Excellent charts, GUI grounding, DocVQA, multi-image reasoning, agentic tool use | JSON mode tuned for schema compliance; supports 20+ languages with strong Chinese/English parity | Label extraction for mixed-language archives or UI/diagram analysis |
|
||||
| `qwen3-vl:2b / 4b / 8b` | Dense 2B/4B/8B tiers (~3 GB, ~3.5 GB, ~6 GB downloads) with native 256 K context extendable to 1 M; fits single 12–24 GB GPUs or high-end CPUs (2B) | Spatial + video reasoning upgrades (Interleaved-MRoPE, DeepStack), 32-language OCR, GUI/agent control, long-document ingest | Emits JSON reliably when prompts specify schema; multilingual captions/labels with Thinking variants boosting STEM reasoning | General-purpose captions/labels when you need long-context doc/video support without cloud APIs; 2B for CPU/edge, 4B as balanced default, 8B when accuracy outweighs latency |
|
||||
| `llama3.2-vision:11b` | 11 B params, ~7.8 GB download, requires ≥8 GB VRAM; 90 B variant needs ≥64 GB | Strong general reasoning, captioning, OCR, supported by Meta ecosystem tooling | Vision tasks officially supported in English; text-only tasks cover eight major languages | Keep captions consistent with Meta-compatible prompts or when teams already standardize on Llama 3.x |
|
||||
| `minicpm-v:8b-2.6` | 8 B params, ~5.5 GB download, 32 K context | Optimized for edge GPUs, high OCR accuracy, multi-image/video support, low token count (≈640 tokens for 1.8 MP) | Multilingual (EN/ZH/DE/FR/IT/KR). Emits concise JSON but may need stricter stopping sequences | Memory-constrained deployments that still require NSFW/OCR-aware label output |
|
||||
|
||||
> Tip: pull models inside the dev container with `docker compose --profile ollama up -d` and then `docker compose exec ollama ollama pull gemma3:4b`. Keep the profile stopped when you do not need extra GPU/CPU load.
|
||||
|
||||
> Qwen3-VL models stream their JSON payload via the `thinking` field. PhotoPrism v2025.11+ captures this automatically; if you run older builds, upgrade before enabling these models or responses will appear empty.
|
||||
|
||||
### Configuration
|
||||
|
||||
#### Environment Variables
|
||||
|
||||
- `PHOTOPRISM_VISION_LABEL_SCHEMA_FILE` — Absolute path to a JSON snippet that overrides the default label schema (applies to every Ollama label model).
|
||||
- `PHOTOPRISM_VISION_YAML` — Custom `vision.yml` path. Keep it synced in Git if you automate deployments.
|
||||
- `OLLAMA_HOST`, `OLLAMA_MODELS`, `OLLAMA_MAX_QUEUE`, `OLLAMA_NUM_PARALLEL`, etc. — Provided in `compose*.yaml` to tune the Ollama daemon. Adjust `OLLAMA_KEEP_ALIVE` if you want models to stay loaded between worker batches.
|
||||
- `PHOTOPRISM_LOG_LEVEL=trace` — Enables verbose request/response previews (truncated to avoid leaking images). Use temporarily when debugging parsing issues.
|
||||
|
||||
#### `vision.yml` Example
|
||||
|
||||
```yaml
|
||||
Models:
|
||||
- Type: labels
|
||||
Name: qwen2.5vl:7b
|
||||
Engine: ollama
|
||||
Run: newly-indexed
|
||||
Resolution: 720
|
||||
Format: json
|
||||
Options:
|
||||
Temperature: 0.05
|
||||
Stop: ["\n\n"]
|
||||
ForceJson: true
|
||||
Service:
|
||||
Uri: http://ollama:11434/api/generate
|
||||
RequestFormat: ollama
|
||||
ResponseFormat: ollama
|
||||
FileScheme: base64
|
||||
|
||||
- Type: caption
|
||||
Name: gemma3:4b
|
||||
Engine: ollama
|
||||
Disabled: false
|
||||
Options:
|
||||
Temperature: 0.2
|
||||
Service:
|
||||
Uri: http://ollama:11434/api/generate
|
||||
```
|
||||
|
||||
Guidelines:
|
||||
|
||||
- Place new entries after the default TensorFlow models so they take precedence while Nasnet/NSFW remain as fallbacks.
|
||||
- Always specify the exact Ollama tag (`model:version`) so upgrades are deliberate.
|
||||
- Keep option flags before positional arguments in CLI snippets (`photoprism vision run -m labels --count 1`).
|
||||
- If you proxy requests (e.g., through Traefik), set `Service.Key` to `Bearer <token>` and configure the proxy to inject/validate it.
|
||||
|
||||
### Operational Checklist
|
||||
|
||||
- **Scheduling** — Use `Run: newly-indexed` for incremental runs, `Run: manual` for ad-hoc CLI calls, or `Run: on-schedule` when paired with the scheduler. Leave `Run: auto` if you want the worker to decide based on other model states.
|
||||
- **Timeouts & Retries** — Default timeout is 10 minutes (`ServiceTimeout`). Ollama streaming responses complete faster in practice; if you need stricter SLAs, wrap `photoprism vision run` in a job runner and retry failed batches manually.
|
||||
- **Fallbacks** — Keep Nasnet configured even when Ollama labels are primary. `labels.go` stops at the first successful engine, so duplicates are avoided.
|
||||
- **Security** — When exposing Ollama beyond localhost, terminate TLS at Traefik and enable API keys. Never return full JSON payloads in logs; rely on trace mode only for debugging and sanitize before sharing.
|
||||
- **Model Storage** — Bind-mount `./storage/services/ollama:/root/.ollama` (see Compose) so pulled models survive container restarts. Run `docker compose exec ollama ollama list` during deployments to verify availability.
|
||||
|
||||
### Observability & Testing
|
||||
|
||||
- **CLI Smoke Tests**
|
||||
- Captions: `photoprism vision run -m caption --count 5 --force`.
|
||||
- Labels: `photoprism vision run -m labels --count 5 --force`.
|
||||
- After each run, check `photoprism vision ls` for `source=ollama`.
|
||||
- **Unit Tests**
|
||||
- `go test ./internal/ai/vision/ollama ./internal/ai/vision -run Ollama -count=1` covers transport parsing and model defaults.
|
||||
- Add fixtures under `internal/ai/vision/testdata` when capturing new response shapes; keep files small and anonymized.
|
||||
- **Logging**
|
||||
- Set `PHOTOPRISM_LOG_LEVEL=debug` to watch summary lines (“processed labels/caption via ollama”).
|
||||
- Use `log.Trace` sparingly; it prints truncated JSON blobs for troubleshooting.
|
||||
- **Metrics**
|
||||
- `/api/v1/metrics` exposes counts per label source; scrape after a batch to compare throughput with TensorFlow/OpenAI runs.
|
||||
|
||||
### Code Map
|
||||
|
||||
- `internal/ai/vision/ollama/*.go` — Engine defaults, schema helpers, transport structs.
|
||||
- `internal/ai/vision/engine_ollama.go` — Builder/parser glue plus label/caption normalization.
|
||||
- `internal/ai/vision/api_ollama.go` — Base64 payload builder.
|
||||
- `internal/ai/vision/api_client.go` — Streaming decoder shared among engines.
|
||||
- `internal/ai/vision/models.go` — Default caption model definition (`gemma3`).
|
||||
- `compose*.yaml` — Ollama service profile, Traefik labels, and persistent volume wiring.
|
||||
- `frontend/src/common/util.js` — Maps `src="ollama"` to the correct badge; keep it updated when adding new source strings.
|
||||
|
||||
### Next Steps
|
||||
|
||||
- [ ] Add formal schema validation (JSON Schema or JTD) so malformed label responses fail fast before normalization.
|
||||
- [ ] Support multiple thumbnails per request once core workflows confirm the API contract (requires worker + UI changes).
|
||||
- [ ] Emit per-model latency and success metrics from the vision worker to simplify tuning when several Ollama engines run side-by-side.
|
||||
- [ ] Mirror any loader changes into PhotoPrism Plus/Pro templates to keep splash + browser checks consistent after enabling external engines.
|
||||
|
|
@ -1,7 +1,5 @@
|
|||
package ollama
|
||||
|
||||
import "github.com/photoprism/photoprism/internal/ai/vision/schema"
|
||||
|
||||
const (
|
||||
// CaptionPrompt instructs Ollama caption models to emit a single, active-voice sentence.
|
||||
CaptionPrompt = "Create a caption with exactly one sentence in the active voice that describes the main visual content. Begin with the main subject and clear action. Avoid text formatting, meta-language, and filler words."
|
||||
|
|
@ -22,12 +20,3 @@ const (
|
|||
// DefaultResolution is the default thumbnail size submitted to Ollama models.
|
||||
DefaultResolution = 720
|
||||
)
|
||||
|
||||
// LabelsSchema returns the canonical label schema string consumed by Ollama models.
|
||||
func LabelsSchema(nsfw bool) string {
|
||||
if nsfw {
|
||||
return schema.LabelsNSFW
|
||||
} else {
|
||||
return schema.LabelsDefault
|
||||
}
|
||||
}
|
||||
|
|
|
|||
14
internal/ai/vision/ollama/schema.go
Normal file
14
internal/ai/vision/ollama/schema.go
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
package ollama
|
||||
|
||||
import (
|
||||
"github.com/photoprism/photoprism/internal/ai/vision/schema"
|
||||
)
|
||||
|
||||
// SchemaLabels returns the canonical label schema string consumed by Ollama models.
|
||||
//
|
||||
// Related documentation and references:
|
||||
// - https://www.alibabacloud.com/help/en/model-studio/json-mode
|
||||
// - https://www.json.org/json-en.html
|
||||
func SchemaLabels(nsfw bool) string {
|
||||
return schema.LabelsJson(nsfw)
|
||||
}
|
||||
80
internal/ai/vision/ollama/transport.go
Normal file
80
internal/ai/vision/ollama/transport.go
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
package ollama
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Response encapsulates the subset of the Ollama generate API response we care about.
|
||||
type Response struct {
|
||||
ID string `yaml:"Id,omitempty" json:"id,omitempty"`
|
||||
Code int `yaml:"Code,omitempty" json:"code,omitempty"`
|
||||
Error string `yaml:"Error,omitempty" json:"error,omitempty"`
|
||||
Model string `yaml:"Model,omitempty" json:"model,omitempty"`
|
||||
CreatedAt time.Time `yaml:"CreatedAt,omitempty" json:"created_at,omitempty"`
|
||||
Response string `yaml:"Response,omitempty" json:"response,omitempty"`
|
||||
Thinking string `yaml:"Thinking,omitempty" json:"thinking,omitempty"`
|
||||
Done bool `yaml:"Done,omitempty" json:"done,omitempty"`
|
||||
Context []int `yaml:"Context,omitempty" json:"context,omitempty"`
|
||||
TotalDuration int64 `yaml:"TotalDuration,omitempty" json:"total_duration,omitempty"`
|
||||
LoadDuration int `yaml:"LoadDuration,omitempty" json:"load_duration,omitempty"`
|
||||
PromptEvalCount int `yaml:"PromptEvalCount,omitempty" json:"prompt_eval_count,omitempty"`
|
||||
PromptEvalDuration int `yaml:"PromptEvalDuration,omitempty" json:"prompt_eval_duration,omitempty"`
|
||||
EvalCount int `yaml:"EvalCount,omitempty" json:"eval_count,omitempty"`
|
||||
EvalDuration int64 `yaml:"EvalDuration,omitempty" json:"eval_duration,omitempty"`
|
||||
Result ResultPayload `yaml:"Result,omitempty" json:"result,omitempty"`
|
||||
}
|
||||
|
||||
// Err returns an error if the request has failed.
|
||||
func (r *Response) Err() error {
|
||||
if r == nil {
|
||||
return errors.New("response is nil")
|
||||
}
|
||||
|
||||
if r.Code >= 400 {
|
||||
if r.Error != "" {
|
||||
return errors.New(r.Error)
|
||||
}
|
||||
|
||||
return fmt.Errorf("error %d", r.Code)
|
||||
} else if len(r.Result.Labels) == 0 && r.Result.Caption == nil {
|
||||
return errors.New("no result")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// HasResult checks if there is at least one result in the response data.
|
||||
func (r *Response) HasResult() bool {
|
||||
if r == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return len(r.Result.Labels) > 0 || r.Result.Caption != nil
|
||||
}
|
||||
|
||||
// ResultPayload mirrors the structure returned by Ollama for result data.
|
||||
type ResultPayload struct {
|
||||
Labels []LabelPayload `json:"labels"`
|
||||
Caption *CaptionPayload `json:"caption,omitempty"`
|
||||
}
|
||||
|
||||
// LabelPayload represents a single label object emitted by the Ollama adapter.
|
||||
type LabelPayload struct {
|
||||
Name string `json:"name"`
|
||||
Source string `json:"source,omitempty"`
|
||||
Priority int `json:"priority,omitempty"`
|
||||
Confidence float32 `json:"confidence,omitempty"`
|
||||
Topicality float32 `json:"topicality,omitempty"`
|
||||
Categories []string `json:"categories,omitempty"`
|
||||
NSFW bool `json:"nsfw,omitempty"`
|
||||
NSFWConfidence float32 `json:"nsfw_confidence,omitempty"`
|
||||
}
|
||||
|
||||
// CaptionPayload represents the caption object emitted by the Ollama adapter.
|
||||
type CaptionPayload struct {
|
||||
Text string `json:"text"`
|
||||
Source string `json:"source,omitempty"`
|
||||
Confidence float32 `json:"confidence,omitempty"`
|
||||
}
|
||||
90
internal/ai/vision/ollama/transport_test.go
Normal file
90
internal/ai/vision/ollama/transport_test.go
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
package ollama
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestResponseErr(t *testing.T) {
|
||||
t.Run("NilResponse", func(t *testing.T) {
|
||||
if err := (*Response)(nil).Err(); err == nil || err.Error() != "response is nil" {
|
||||
t.Fatalf("expected nil-response error, got %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("HTTPErrorWithMessage", func(t *testing.T) {
|
||||
resp := &Response{Code: 429, Error: "too many requests"}
|
||||
if err := resp.Err(); err == nil || err.Error() != "too many requests" {
|
||||
t.Fatalf("expected message error, got %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("HTTPErrorWithoutMessage", func(t *testing.T) {
|
||||
resp := &Response{Code: 500}
|
||||
if err := resp.Err(); err == nil || err.Error() != "error 500" {
|
||||
t.Fatalf("expected formatted error, got %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("NoResult", func(t *testing.T) {
|
||||
resp := &Response{Code: 200}
|
||||
if err := resp.Err(); err == nil || err.Error() != "no result" {
|
||||
t.Fatalf("expected no-result error, got %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("HasLabels", func(t *testing.T) {
|
||||
resp := &Response{
|
||||
Code: 200,
|
||||
Result: ResultPayload{Labels: []LabelPayload{{Name: "sky"}}},
|
||||
Model: "qwen",
|
||||
}
|
||||
if err := resp.Err(); err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("HasCaption", func(t *testing.T) {
|
||||
resp := &Response{
|
||||
Code: 200,
|
||||
Result: ResultPayload{Caption: &CaptionPayload{Text: "Caption"}},
|
||||
}
|
||||
if err := resp.Err(); err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestResponseHasResult(t *testing.T) {
|
||||
if (*Response)(nil).HasResult() {
|
||||
t.Fatal("nil response should not have result")
|
||||
}
|
||||
|
||||
resp := &Response{}
|
||||
if resp.HasResult() {
|
||||
t.Fatal("expected false when result payload is empty")
|
||||
}
|
||||
|
||||
resp.Result.Labels = []LabelPayload{{Name: "sun"}}
|
||||
if !resp.HasResult() {
|
||||
t.Fatal("expected true when labels present")
|
||||
}
|
||||
|
||||
resp.Result.Labels = nil
|
||||
resp.Result.Caption = &CaptionPayload{Text: "Sky", Confidence: 0.9}
|
||||
if !resp.HasResult() {
|
||||
t.Fatal("expected true when caption present")
|
||||
}
|
||||
}
|
||||
|
||||
func TestResponseJSONTagsAreOptional(t *testing.T) {
|
||||
// Guard against accidental breaking changes to essential fields
|
||||
resp := Response{
|
||||
ID: "test",
|
||||
Model: "ollama",
|
||||
CreatedAt: time.Now(),
|
||||
}
|
||||
if resp.ID == "" || resp.Model == "" {
|
||||
t.Fatalf("response fields should persist, got %+v", resp)
|
||||
}
|
||||
}
|
||||
128
internal/ai/vision/openai/README.md
Normal file
128
internal/ai/vision/openai/README.md
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
## PhotoPrism — OpenAI API Integration
|
||||
|
||||
**Last Updated:** November 14, 2025
|
||||
|
||||
### Overview
|
||||
|
||||
This package contains PhotoPrism’s adapter for the OpenAI Responses API. It enables existing caption and label workflows (`GenerateCaption`, `GenerateLabels`, and the `photoprism vision run` CLI) to call OpenAI models alongside TensorFlow and Ollama without changing worker or API code. The implementation focuses on predictable results, structured outputs, and clear observability so operators can opt in gradually.
|
||||
|
||||
#### Context & Constraints
|
||||
|
||||
- OpenAI requests flow through the existing vision client (`internal/ai/vision/api_client.go`) and must honour PhotoPrism’s timeout, logging, and ACL rules.
|
||||
- Structured outputs are preferred but the adapter must gracefully handle free-form text; `output_text` responses are parsed both as JSON and as plain captions.
|
||||
- Costs should remain predictable: requests are limited to a single 720 px thumbnail (`detail=low`) and capped token budgets (512 caption, 1024 labels).
|
||||
- Secrets are supplied per model (`Service.Key`) with fallbacks to `OPENAI_API_KEY` / `_FILE`. Logs must redact sensitive data.
|
||||
|
||||
#### Goals
|
||||
|
||||
- Provide drop-in OpenAI support for captions and labels using `vision.yml`.
|
||||
- Keep configuration ergonomic by auto-populating prompts, schema names, token limits, and sampling defaults.
|
||||
- Expose enough logging and tests so operators can compare OpenAI output with existing engines before enabling it broadly.
|
||||
|
||||
#### Non-Goals
|
||||
|
||||
- Introducing a new `generate` model type or combined caption/label endpoint (reserved for a later phase).
|
||||
- Replacing the default TensorFlow models; they remain active as fallbacks.
|
||||
- Managing OpenAI billing or quota dashboards beyond surfacing token counts in logs and metrics.
|
||||
|
||||
### Prompt, Model, & Schema Guidance
|
||||
|
||||
- **Models:** The adapter targets GPT‑5 vision tiers (e.g. `gpt-5-nano`, `gpt-5-mini`). These models support image inputs, structured outputs, and deterministic settings. Set `Name` to the exact provider identifier so defaults are applied correctly. Caption models share the same configuration surface and run through the same adapter.
|
||||
- **Prompts:** Defaults live in `defaults.go`. Captions use a single-sentence instruction; labels use `LabelPromptDefault` (or `LabelPromptNSFW` when PhotoPrism requests NSFW metadata). Custom prompts should retain schema reminders so structured outputs stay valid.
|
||||
- **Schemas:** Labels use the JSON schema returned by `schema.LabelsJsonSchema(nsfw)`; the response format name is derived via `schema.JsonSchemaName` (e.g. `photoprism_vision_labels_v1`). Captions omit schemas unless operators explicitly request a structured format.
|
||||
- **When to keep defaults:** For most deployments, leaving `System`, `Prompt`, `Schema`, and `Options` unset yields stable output with minimal configuration. Override them only when domain-specific language or custom scoring is necessary, and add regression tests alongside.
|
||||
|
||||
Budget-conscious operators can experiment with lighter prompts or lower-resolution thumbnails, but should keep token limits and determinism settings intact to avoid unexpected bills and UI churn.
|
||||
|
||||
#### Performance & Cost Estimates
|
||||
|
||||
- **Token budgets:** Captions request up to 512 output tokens; labels request up to 1024. Input tokens are typically ≤700 for a single 720 px thumbnail plus prompts.
|
||||
- **Latency:** GPT‑5 nano/mini vision calls typically complete in 3–8 s, depending on OpenAI region. Including reasoning metadata (`reasoning.effort=low`) has negligible impact but improves traceability.
|
||||
- **Costs:** Consult OpenAI’s pricing for the selected model. Multiply input/output tokens by the published rate. PhotoPrism currently sends one image per request to keep costs linear with photo count.
|
||||
|
||||
### Configuration
|
||||
|
||||
#### Environment Variables
|
||||
|
||||
- `OPENAI_API_KEY` / `OPENAI_API_KEY_FILE` — fallback credentials when a model’s `Service.Key` is unset.
|
||||
- Existing `PHOTOPRISM_VISION_*` variables remain authoritative (see the [Getting Started Guide](https://docs.photoprism.app/getting-started/config-options/#computer-vision) for full lists).
|
||||
|
||||
#### `vision.yml` Examples
|
||||
|
||||
```yaml
|
||||
Models:
|
||||
- Type: caption
|
||||
Name: gpt-5-nano
|
||||
Engine: openai
|
||||
Disabled: false # opt in manually
|
||||
Resolution: 720 # optional; default is 720
|
||||
Options:
|
||||
Detail: low # optional; defaults to low
|
||||
MaxOutputTokens: 512
|
||||
Service:
|
||||
Uri: https://api.openai.com/v1/responses
|
||||
FileScheme: data
|
||||
Key: ${OPENAI_API_KEY}
|
||||
|
||||
- Type: labels
|
||||
Name: gpt-5-mini
|
||||
Engine: openai
|
||||
Disabled: false
|
||||
Resolution: 720
|
||||
Options:
|
||||
Detail: low
|
||||
MaxOutputTokens: 1024
|
||||
ForceJson: true # redundant but explicit
|
||||
Service:
|
||||
Uri: https://api.openai.com/v1/responses
|
||||
FileScheme: data
|
||||
Key: ${OPENAI_API_KEY}
|
||||
```
|
||||
|
||||
Keep TensorFlow entries in place so PhotoPrism falls back when the external service is unavailable.
|
||||
|
||||
#### Defaults
|
||||
|
||||
- File scheme: `data:` URLs (base64) for all OpenAI models.
|
||||
- Resolution: 720 px thumbnails (`vision.Thumb(ModelTypeCaption|Labels)`).
|
||||
- Options: `MaxOutputTokens` raised to 512 (caption) / 1024 (labels); `ForceJson=false` for captions, `true` for labels; `reasoning.effort="low"`.
|
||||
- Sampling: `Temperature` and `TopP` set to `0` for `gpt-5*` models; inherited values (0.1/0.9) remain for other engines. `openaiBuilder.Build` performs this override while preserving the struct defaults for non-OpenAI adapters.
|
||||
- Schema naming: Automatically derived via `schema.JsonSchemaName`, so operators may omit `SchemaVersion`.
|
||||
|
||||
### Documentation
|
||||
|
||||
- Label Generation: <https://docs.photoprism.app/developer-guide/vision/label-generation/>
|
||||
- Caption Generation: <https://docs.photoprism.app/developer-guide/vision/caption-generation/>
|
||||
- Vision CLI Commands: <https://docs.photoprism.app/developer-guide/vision/cli/>
|
||||
|
||||
### Implementation Details
|
||||
|
||||
#### Core Concepts
|
||||
|
||||
- **Structured outputs:** PhotoPrism leverages OpenAI’s structured output capability as documented at <https://platform.openai.com/docs/guides/structured-outputs>. When a JSON schema is supplied, the adapter emits `text.format` with `type: "json_schema"` and a schema name derived from the content. The parser then prefers `output_json`, but also attempts to decode `output_text` payloads that contain JSON objects.
|
||||
- **Deterministic sampling:** GPT‑5 models are run with `temperature=0` and `top_p=0` to minimise variance, while still allowing developers to override values in `vision.yml` if needed.
|
||||
- **Reasoning metadata:** Requests include `reasoning.effort="low"` so OpenAI returns structured reasoning usage counters, helping operators track token consumption.
|
||||
- **Worker summaries:** The vision worker now logs either “updated …” or “processed … (no metadata changes detected)”, making reruns easy to audit.
|
||||
|
||||
#### Rate Limiting
|
||||
|
||||
OpenAI calls respect the existing `limiter.Auth` configuration used by the vision service. Failed requests surface standard HTTP errors and are not automatically retried; operators should ensure they have adequate account limits and consider external rate limiting when sharing credentials.
|
||||
|
||||
#### Testing & Validation
|
||||
|
||||
1. Unit tests: `go test ./internal/ai/vision/openai ./internal/ai/vision -run OpenAI -count=1`. Fixtures under `internal/ai/vision/openai/testdata/` replay real Responses payloads (captions and labels).
|
||||
2. CLI smoke test: `photoprism vision run -m labels --count 1 --force` with trace logging enabled to inspect sanitised Responses.
|
||||
3. Compare worker summaries and label sources (`openai`) in the UI or via `photoprism vision ls`.
|
||||
|
||||
#### Code Map
|
||||
|
||||
- **Adapter & defaults:** `internal/ai/vision/openai` (defaults, schema helpers, transport, tests).
|
||||
- **Request/response plumbing:** `internal/ai/vision/api_request.go`, `api_client.go`, `engine_openai.go`, `engine_openai_test.go`.
|
||||
- **Workers & CLI:** `internal/workers/vision.go`, `internal/commands/vision_run.go`.
|
||||
- **Shared utilities:** `internal/ai/vision/schema`, `pkg/clean`, `pkg/media`.
|
||||
|
||||
#### Next Steps
|
||||
|
||||
- [ ] Introduce the future `generate` model type that combines captions, labels, and optional markers.
|
||||
- [ ] Evaluate additional OpenAI models as pricing and capabilities evolve.
|
||||
- [ ] Expose token usage metrics (input/output/reasoning) via Prometheus once the schema stabilises.
|
||||
|
|
@ -1,6 +1,29 @@
|
|||
package openai
|
||||
|
||||
import "github.com/photoprism/photoprism/internal/ai/vision/schema"
|
||||
const (
|
||||
// CaptionSystem defines the default system prompt for caption models.
|
||||
CaptionSystem = "You are a PhotoPrism vision model. Return concise, user-friendly captions that describe the main subjects accurately."
|
||||
// CaptionPrompt instructs caption models to respond with a single sentence.
|
||||
CaptionPrompt = "Provide exactly one sentence describing the key subject and action in the image. Avoid filler words and technical jargon."
|
||||
// LabelSystem defines the system prompt for label generation.
|
||||
LabelSystem = "You are a PhotoPrism vision model. Emit JSON that matches the provided schema and keep label names short, singular nouns."
|
||||
// LabelPromptDefault requests general-purpose labels.
|
||||
LabelPromptDefault = "Analyze the image and return label objects with name, confidence (0-1), and topicality (0-1)."
|
||||
// LabelPromptNSFW requests labels including NSFW metadata when required.
|
||||
LabelPromptNSFW = "Analyze the image and return label objects with name, confidence (0-1), topicality (0-1), nsfw (true when sensitive), and nsfw_confidence (0-1)."
|
||||
// DefaultDetail specifies the preferred thumbnail detail level for Requests API calls.
|
||||
DefaultDetail = "low"
|
||||
// CaptionMaxTokens suggests the output budget for caption responses.
|
||||
CaptionMaxTokens = 512
|
||||
// LabelsMaxTokens suggests the output budget for label responses.
|
||||
LabelsMaxTokens = 1024
|
||||
// DefaultTemperature configures deterministic replies.
|
||||
DefaultTemperature = 0.1
|
||||
// DefaultTopP limits nucleus sampling.
|
||||
DefaultTopP = 0.9
|
||||
// DefaultSchemaVersion is used when callers do not specify an explicit schema version.
|
||||
DefaultSchemaVersion = "v1"
|
||||
)
|
||||
|
||||
var (
|
||||
// DefaultModel is the model used by default when accessing the OpenAI API.
|
||||
|
|
@ -8,8 +31,3 @@ var (
|
|||
// DefaultResolution is the default thumbnail size submitted to the OpenAI.
|
||||
DefaultResolution = 720
|
||||
)
|
||||
|
||||
// LabelsSchema returns the canonical label schema string consumed by OpenAI models.
|
||||
func LabelsSchema() string {
|
||||
return schema.LabelsDefault
|
||||
}
|
||||
|
|
|
|||
16
internal/ai/vision/openai/schema.go
Normal file
16
internal/ai/vision/openai/schema.go
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
package openai
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
||||
"github.com/photoprism/photoprism/internal/ai/vision/schema"
|
||||
)
|
||||
|
||||
// SchemaLabels returns the canonical labels JSON Schema string consumed by Ollama models.
|
||||
//
|
||||
// Related documentation and references:
|
||||
// - https://platform.openai.com/docs/guides/structured-outputs
|
||||
// - https://json-schema.org/learn/miscellaneous-examples
|
||||
func SchemaLabels(nsfw bool) json.RawMessage {
|
||||
return schema.LabelsJsonSchema(nsfw)
|
||||
}
|
||||
73
internal/ai/vision/openai/testdata/caption-response.json
vendored
Normal file
73
internal/ai/vision/openai/testdata/caption-response.json
vendored
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
{
|
||||
"id": "resp_0d356718505119f3006916e5d8730881a0b91de2aa700f6196",
|
||||
"object": "response",
|
||||
"created_at": 1763108312,
|
||||
"status": "completed",
|
||||
"background": false,
|
||||
"billing": {
|
||||
"payer": "developer"
|
||||
},
|
||||
"error": null,
|
||||
"incomplete_details": null,
|
||||
"instructions": null,
|
||||
"max_output_tokens": 512,
|
||||
"max_tool_calls": null,
|
||||
"model": "gpt-5-nano-2025-08-07",
|
||||
"output": [
|
||||
{
|
||||
"id": "rs_0d356718505119f3006916e5d8efd481a0a4f9cc1823cc6c83",
|
||||
"type": "reasoning",
|
||||
"summary": []
|
||||
},
|
||||
{
|
||||
"id": "msg_0d356718505119f3006916e5d9433881a0bc79197d2cfc2027",
|
||||
"type": "message",
|
||||
"status": "completed",
|
||||
"content": [
|
||||
{
|
||||
"type": "output_text",
|
||||
"annotations": [],
|
||||
"logprobs": [],
|
||||
"text": "A bee gathers nectar from the vibrant red poppy\u2019s center."
|
||||
}
|
||||
],
|
||||
"role": "assistant"
|
||||
}
|
||||
],
|
||||
"parallel_tool_calls": true,
|
||||
"previous_response_id": null,
|
||||
"prompt_cache_key": null,
|
||||
"prompt_cache_retention": null,
|
||||
"reasoning": {
|
||||
"effort": "low",
|
||||
"summary": null
|
||||
},
|
||||
"safety_identifier": null,
|
||||
"service_tier": "default",
|
||||
"store": true,
|
||||
"temperature": 1.0,
|
||||
"text": {
|
||||
"format": {
|
||||
"type": "text"
|
||||
},
|
||||
"verbosity": "medium"
|
||||
},
|
||||
"tool_choice": "auto",
|
||||
"tools": [],
|
||||
"top_logprobs": 0,
|
||||
"top_p": 1.0,
|
||||
"truncation": "disabled",
|
||||
"usage": {
|
||||
"input_tokens": 576,
|
||||
"input_tokens_details": {
|
||||
"cached_tokens": 0
|
||||
},
|
||||
"output_tokens": 19,
|
||||
"output_tokens_details": {
|
||||
"reasoning_tokens": 0
|
||||
},
|
||||
"total_tokens": 595
|
||||
},
|
||||
"user": null,
|
||||
"metadata": {}
|
||||
}
|
||||
114
internal/ai/vision/openai/testdata/labels-response.json
vendored
Normal file
114
internal/ai/vision/openai/testdata/labels-response.json
vendored
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
{
|
||||
"id": "resp_0fa91dfb69b7d644006916ea0b72ac819f84ff3152a38dfcdb",
|
||||
"object": "response",
|
||||
"created_at": 1763109387,
|
||||
"status": "completed",
|
||||
"background": false,
|
||||
"billing": {
|
||||
"payer": "developer"
|
||||
},
|
||||
"error": null,
|
||||
"incomplete_details": null,
|
||||
"instructions": null,
|
||||
"max_output_tokens": 1024,
|
||||
"max_tool_calls": null,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"output": [
|
||||
{
|
||||
"id": "rs_0fa91dfb69b7d644006916ea0c3450819f8a13396bf377f474",
|
||||
"type": "reasoning",
|
||||
"summary": []
|
||||
},
|
||||
{
|
||||
"id": "msg_0fa91dfb69b7d644006916ea0d2dfc819faf52b11334fc10a4",
|
||||
"type": "message",
|
||||
"status": "completed",
|
||||
"content": [
|
||||
{
|
||||
"type": "output_text",
|
||||
"annotations": [],
|
||||
"logprobs": [],
|
||||
"text": "{\"labels\":[{\"name\":\"flower\",\"confidence\":0.99,\"topicality\":0.99},{\"name\":\"bee\",\"confidence\":0.95,\"topicality\":0.95},{\"name\":\"petal\",\"confidence\":0.92,\"topicality\":0.88},{\"name\":\"pollen\",\"confidence\":0.85,\"topicality\":0.8},{\"name\":\"insect\",\"confidence\":0.9,\"topicality\":0.85},{\"name\":\"red\",\"confidence\":0.88,\"topicality\":0.6},{\"name\":\"close-up\",\"confidence\":0.86,\"topicality\":0.7},{\"name\":\"nature\",\"confidence\":0.8,\"topicality\":0.5}]}"
|
||||
}
|
||||
],
|
||||
"role": "assistant"
|
||||
}
|
||||
],
|
||||
"parallel_tool_calls": true,
|
||||
"previous_response_id": null,
|
||||
"prompt_cache_key": null,
|
||||
"prompt_cache_retention": null,
|
||||
"reasoning": {
|
||||
"effort": "low",
|
||||
"summary": null
|
||||
},
|
||||
"safety_identifier": null,
|
||||
"service_tier": "default",
|
||||
"store": true,
|
||||
"temperature": 1.0,
|
||||
"text": {
|
||||
"format": {
|
||||
"type": "json_schema",
|
||||
"description": null,
|
||||
"name": "photoprism_vision_labels_v1",
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"labels": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"minLength": 1
|
||||
},
|
||||
"confidence": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1
|
||||
},
|
||||
"topicality": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"confidence",
|
||||
"topicality"
|
||||
],
|
||||
"additionalProperties": false
|
||||
},
|
||||
"default": []
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"labels"
|
||||
],
|
||||
"additionalProperties": false
|
||||
},
|
||||
"strict": true
|
||||
},
|
||||
"verbosity": "medium"
|
||||
},
|
||||
"tool_choice": "auto",
|
||||
"tools": [],
|
||||
"top_logprobs": 0,
|
||||
"top_p": 1.0,
|
||||
"truncation": "disabled",
|
||||
"usage": {
|
||||
"input_tokens": 724,
|
||||
"input_tokens_details": {
|
||||
"cached_tokens": 0
|
||||
},
|
||||
"output_tokens": 169,
|
||||
"output_tokens_details": {
|
||||
"reasoning_tokens": 0
|
||||
},
|
||||
"total_tokens": 893
|
||||
},
|
||||
"user": null,
|
||||
"metadata": {}
|
||||
}
|
||||
142
internal/ai/vision/openai/transport.go
Normal file
142
internal/ai/vision/openai/transport.go
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
package openai
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
// ContentTypeText identifies text input segments for the Responses API.
|
||||
ContentTypeText = "input_text"
|
||||
// ContentTypeImage identifies image input segments for the Responses API.
|
||||
ContentTypeImage = "input_image"
|
||||
|
||||
// ResponseFormatJSONSchema requests JSON constrained by a schema.
|
||||
ResponseFormatJSONSchema = "json_schema"
|
||||
// ResponseFormatJSONObject requests a free-form JSON object.
|
||||
ResponseFormatJSONObject = "json_object"
|
||||
)
|
||||
|
||||
// HTTPRequest represents the payload expected by OpenAI's Responses API.
|
||||
type HTTPRequest struct {
|
||||
Model string `json:"model"`
|
||||
Input []InputMessage `json:"input"`
|
||||
Text *TextOptions `json:"text,omitempty"`
|
||||
Reasoning *Reasoning `json:"reasoning,omitempty"`
|
||||
MaxOutputTokens int `json:"max_output_tokens,omitempty"`
|
||||
Temperature float64 `json:"temperature,omitempty"`
|
||||
TopP float64 `json:"top_p,omitempty"`
|
||||
PresencePenalty float64 `json:"presence_penalty,omitempty"`
|
||||
FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
|
||||
}
|
||||
|
||||
// TextOptions carries formatting preferences for textual responses.
|
||||
type TextOptions struct {
|
||||
Format *ResponseFormat `json:"format,omitempty"`
|
||||
}
|
||||
|
||||
// Reasoning configures the effort level for reasoning models.
|
||||
type Reasoning struct {
|
||||
Effort string `json:"effort,omitempty"`
|
||||
}
|
||||
|
||||
// InputMessage captures a single system or user message in the request.
|
||||
type InputMessage struct {
|
||||
Role string `json:"role"`
|
||||
Type string `json:"type,omitempty"`
|
||||
Content []ContentItem `json:"content"`
|
||||
}
|
||||
|
||||
// ContentItem represents a text or image entry within a message.
|
||||
type ContentItem struct {
|
||||
Type string `json:"type"`
|
||||
Text string `json:"text,omitempty"`
|
||||
ImageURL string `json:"image_url,omitempty"`
|
||||
Detail string `json:"detail,omitempty"`
|
||||
}
|
||||
|
||||
// ResponseFormat describes how OpenAI should format its response.
|
||||
type ResponseFormat struct {
|
||||
Type string `json:"type"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Schema json.RawMessage `json:"schema,omitempty"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Strict bool `json:"strict,omitempty"`
|
||||
}
|
||||
|
||||
// Response mirrors the subset of the Responses API response we need.
|
||||
type Response struct {
|
||||
ID string `json:"id"`
|
||||
Model string `json:"model"`
|
||||
Output []ResponseOutput `json:"output"`
|
||||
Error *struct {
|
||||
Message string `json:"message"`
|
||||
Type string `json:"type"`
|
||||
} `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// ResponseOutput captures assistant messages within the response.
|
||||
type ResponseOutput struct {
|
||||
Role string `json:"role"`
|
||||
Content []ResponseContent `json:"content"`
|
||||
}
|
||||
|
||||
// ResponseContent contains individual message parts (JSON or text).
|
||||
type ResponseContent struct {
|
||||
Type string `json:"type"`
|
||||
Text string `json:"text,omitempty"`
|
||||
JSON json.RawMessage `json:"json,omitempty"`
|
||||
}
|
||||
|
||||
// FirstJSON returns the first JSON payload contained in the response.
|
||||
func (r *Response) FirstJSON() json.RawMessage {
|
||||
if r == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
for i := range r.Output {
|
||||
for j := range r.Output[i].Content {
|
||||
if len(r.Output[i].Content[j].JSON) > 0 {
|
||||
return r.Output[i].Content[j].JSON
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// FirstText returns the first textual payload contained in the response.
|
||||
func (r *Response) FirstText() string {
|
||||
if r == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
for i := range r.Output {
|
||||
for j := range r.Output[i].Content {
|
||||
if text := strings.TrimSpace(r.Output[i].Content[j].Text); text != "" {
|
||||
return text
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
// ParseErrorMessage extracts a human readable error message from a Responses API payload.
|
||||
func ParseErrorMessage(raw []byte) string {
|
||||
var errResp struct {
|
||||
Error *struct {
|
||||
Message string `json:"message"`
|
||||
} `json:"error"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(raw, &errResp); err != nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
if errResp.Error != nil {
|
||||
return strings.TrimSpace(errResp.Error.Message)
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
120
internal/ai/vision/openai/transport_test.go
Normal file
120
internal/ai/vision/openai/transport_test.go
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
package openai
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func loadTestResponse(t *testing.T, name string) *Response {
|
||||
t.Helper()
|
||||
|
||||
filePath := filepath.Join("testdata", name)
|
||||
|
||||
data, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to read %s: %v", filePath, err)
|
||||
}
|
||||
|
||||
var resp Response
|
||||
if err := json.Unmarshal(data, &resp); err != nil {
|
||||
t.Fatalf("failed to unmarshal %s: %v", filePath, err)
|
||||
}
|
||||
|
||||
return &resp
|
||||
}
|
||||
|
||||
func TestParseErrorMessage(t *testing.T) {
|
||||
t.Run("returns message when present", func(t *testing.T) {
|
||||
raw := []byte(`{"error":{"message":"Invalid schema"}}`)
|
||||
msg := ParseErrorMessage(raw)
|
||||
if msg != "Invalid schema" {
|
||||
t.Fatalf("expected message, got %q", msg)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("returns empty string when error is missing", func(t *testing.T) {
|
||||
raw := []byte(`{"output":[]}`)
|
||||
if msg := ParseErrorMessage(raw); msg != "" {
|
||||
t.Fatalf("expected empty message, got %q", msg)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestResponseFirstTextCaption(t *testing.T) {
|
||||
resp := loadTestResponse(t, "caption-response.json")
|
||||
|
||||
if jsonPayload := resp.FirstJSON(); len(jsonPayload) != 0 {
|
||||
t.Fatalf("expected no JSON payload, got: %s", jsonPayload)
|
||||
}
|
||||
|
||||
text := resp.FirstText()
|
||||
expected := "A bee gathers nectar from the vibrant red poppy’s center."
|
||||
if text != expected {
|
||||
t.Fatalf("unexpected caption text: %q", text)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResponseFirstTextLabels(t *testing.T) {
|
||||
resp := loadTestResponse(t, "labels-response.json")
|
||||
|
||||
if jsonPayload := resp.FirstJSON(); len(jsonPayload) != 0 {
|
||||
t.Fatalf("expected no JSON payload, got: %s", jsonPayload)
|
||||
}
|
||||
|
||||
text := resp.FirstText()
|
||||
if len(text) == 0 {
|
||||
t.Fatal("expected structured JSON string in text payload")
|
||||
}
|
||||
if text[0] != '{' {
|
||||
t.Fatalf("expected JSON object in text payload, got %q", text)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResponseFirstJSONFromStructuredPayload(t *testing.T) {
|
||||
resp := &Response{
|
||||
ID: "resp_structured",
|
||||
Model: "gpt-5-mini",
|
||||
Output: []ResponseOutput{
|
||||
{
|
||||
Role: "assistant",
|
||||
Content: []ResponseContent{
|
||||
{
|
||||
Type: "output_json",
|
||||
JSON: json.RawMessage(`{"labels":[{"name":"sunset"}]}`),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
jsonPayload := resp.FirstJSON()
|
||||
if len(jsonPayload) == 0 {
|
||||
t.Fatal("expected JSON payload, got empty result")
|
||||
}
|
||||
|
||||
var decoded struct {
|
||||
Labels []map[string]string `json:"labels"`
|
||||
}
|
||||
if err := json.Unmarshal(jsonPayload, &decoded); err != nil {
|
||||
t.Fatalf("failed to decode JSON payload: %v", err)
|
||||
}
|
||||
|
||||
if len(decoded.Labels) != 1 || decoded.Labels[0]["name"] != "sunset" {
|
||||
t.Fatalf("unexpected JSON payload: %+v", decoded.Labels)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSchemaLabelsReturnsValidJSON(t *testing.T) {
|
||||
raw := SchemaLabels(false)
|
||||
|
||||
var decoded map[string]any
|
||||
if err := json.Unmarshal(raw, &decoded); err != nil {
|
||||
t.Fatalf("schema should be valid JSON: %v", err)
|
||||
}
|
||||
|
||||
if decoded["type"] != "object" {
|
||||
t.Fatalf("expected type object, got %v", decoded["type"])
|
||||
}
|
||||
}
|
||||
52
internal/ai/vision/schema/README.md
Normal file
52
internal/ai/vision/schema/README.md
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
## PhotoPrism — Vision Schema Reference
|
||||
|
||||
**Last Updated:** November 14, 2025
|
||||
|
||||
### Overview
|
||||
|
||||
This package contains the canonical label response specifications used by PhotoPrism’s external vision engines. It exposes two helpers:
|
||||
|
||||
- `LabelsJsonSchema(nsfw bool)` — returns a JSON **Schema** document tailored for OpenAI Responses requests, enabling strict validation of structured outputs.
|
||||
- `LabelsJson(nsfw bool)` — returns a literal JSON **sample** that Ollama-style models can mirror when they only support prompt-enforced structures.
|
||||
|
||||
Both helpers build on the same field set (`name`, `confidence`, `topicality`, and optional NSFW flags) so downstream parsing logic (`LabelResult`) can remain engine-agnostic.
|
||||
|
||||
### Schema Types & Differences
|
||||
|
||||
| Helper | Target Engine | Format | Validation Style | When To Use |
|
||||
|---------------------------|--------------------------|--------------------------------------------------------|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|
|
||||
| `LabelsJsonSchema(false)` | OpenAI (standard labels) | JSON Schema Draft | Strong: OpenAI enforces field types/ranges server-side before returning a response. | When calling GPT‑vision models via `ApiFormatOpenAI` to ensure PhotoPrism receives well-formed label arrays. |
|
||||
| `LabelsJsonSchema(true)` | OpenAI (labels + NSFW) | JSON Schema Draft with additional boolean/float fields | Strong: same enforcement plus required NSFW fields. | When `DetectNSFWLabels` or NSFW-specific prompts are active and the model must emit `nsfw` + `nsfw_confidence`. |
|
||||
| `LabelsJson(false)` | Ollama (standard labels) | Plain JSON example | Soft: model is nudged to mimic the structure through prompt instructions. | When running self-hosted Ollama models that support “JSON mode” but do not consume JSON Schema definitions. |
|
||||
| `LabelsJson(true)` | Ollama (labels + NSFW) | Plain JSON example with NSFW keys | Soft: prompts describe the required keys; the adapter validates after parsing. | When Ollama prompts mention NSFW scoring or PhotoPrism sets `DetectNSFWLabels=true`. |
|
||||
|
||||
**Key technical distinction:** OpenAI’s Responses API accepts a JSON Schema (see `LabelsJsonSchema*`) and guarantees compliance by rejecting invalid responses, while Ollama currently relies on prompt-directed output. For Ollama integrations we provide a representative JSON document (`LabelsJson*`) that models can imitate; PhotoPrism then normalizes and validates the results in Go.
|
||||
|
||||
### Field Definitions
|
||||
|
||||
- `name` — single-word noun describing the subject (string, required).
|
||||
- `confidence` — normalized score between `0` and `1` (float, required).
|
||||
- `topicality` — relative relevance score between `0` and `1` (float, required; defaults to `confidence` if omitted after parsing).
|
||||
- `nsfw` — boolean flag indicating sensitive content (required only in NSFW variants).
|
||||
- `nsfw_confidence` — normalized probability for the NSFW assessment (required only in NSFW variants).
|
||||
|
||||
OpenAI schemas enforce these ranges/types, while Ollama prompts remind the model to emit matching keys. After parsing, PhotoPrism applies `LabelConfidenceDefault` and `normalizeLabelResult` to fill gaps and enforce naming rules.
|
||||
|
||||
### Usage Guidance
|
||||
|
||||
1. **OpenAI models** (`Engine: openai`, `RequestFormat: openai`):
|
||||
- Leave `Schema` unset in `vision.yml`; the engine defaults call `LabelsJsonSchema(model.PromptContains("nsfw"))`.
|
||||
- Optionally override the schema via `Schema`/`SchemaFile` if you extend fields, but keep required keys so `LabelResult` parsing succeeds.
|
||||
2. **Ollama models** (`Engine: ollama`, `RequestFormat: ollama`):
|
||||
- Rely on the built-in samples from `LabelsJson` or include them directly in prompts via `model.SchemaInstructions()`.
|
||||
- Because enforcement happens after the response arrives, keep `Format: json` (default) and `Options.ForceJson=true` for label models to make parsing stricter.
|
||||
3. **Custom engines**:
|
||||
- Reuse these helpers to stay compatible with PhotoPrism’s label DTOs.
|
||||
- When adding new fields, update both schema/sample versions so OpenAI and Ollama adapters remain aligned.
|
||||
|
||||
### References
|
||||
|
||||
- JSON Schema primer: https://json-schema.org/learn/miscellaneous-examples
|
||||
- OpenAI structured outputs: https://platform.openai.com/docs/guides/structured-outputs
|
||||
- JSON mode background (Ollama-style prompts): https://www.alibabacloud.com/help/en/model-studio/json-mode
|
||||
- JSON syntax refresher: https://www.json.org/json-en.html
|
||||
|
|
@ -1,16 +1,115 @@
|
|||
package schema
|
||||
|
||||
// LabelsDefault provides the minimal JSON schema for label responses used across engines.
|
||||
const (
|
||||
LabelsDefault = "{\n \"labels\": [{\n \"name\": \"\",\n \"confidence\": 0,\n \"topicality\": 0 }]\n}"
|
||||
LabelsNSFW = "{\n \"labels\": [{\n \"name\": \"\",\n \"confidence\": 0,\n \"topicality\": 0,\n \"nsfw\": false,\n \"nsfw_confidence\": 0\n }]\n}"
|
||||
import (
|
||||
"encoding/json"
|
||||
)
|
||||
|
||||
// Labels returns the canonical label schema string.
|
||||
func Labels(nsfw bool) string {
|
||||
// LabelsJsonSchemaDefault provides the minimal JSON schema for label responses used across engines.
|
||||
const (
|
||||
LabelsJsonSchemaDefault = `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"labels": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"minLength": 1
|
||||
},
|
||||
"confidence": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1
|
||||
},
|
||||
"topicality": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1
|
||||
}
|
||||
},
|
||||
"required": ["name", "confidence", "topicality"],
|
||||
"additionalProperties": false
|
||||
},
|
||||
"default": []
|
||||
}
|
||||
},
|
||||
"required": ["labels"],
|
||||
"additionalProperties": false
|
||||
}`
|
||||
LabelsJsonDefault = "{\n \"labels\": [{\n \"name\": \"\",\n \"confidence\": 0,\n \"topicality\": 0 }]\n}"
|
||||
LabelsJsonSchemaNSFW = `{
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"labels": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"minLength": 1
|
||||
},
|
||||
"confidence": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1
|
||||
},
|
||||
"topicality": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1
|
||||
},
|
||||
"nsfw": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"nsfw_confidence": {
|
||||
"type": "number",
|
||||
"minimum": 0,
|
||||
"maximum": 1
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"confidence",
|
||||
"topicality",
|
||||
"nsfw",
|
||||
"nsfw_confidence"
|
||||
],
|
||||
"additionalProperties": false
|
||||
},
|
||||
"default": []
|
||||
}
|
||||
},
|
||||
"required": ["labels"],
|
||||
"additionalProperties": false
|
||||
}`
|
||||
LabelsJsonNSFW = "{\n \"labels\": [{\n \"name\": \"\",\n \"confidence\": 0,\n \"topicality\": 0,\n \"nsfw\": false,\n \"nsfw_confidence\": 0\n }]\n}"
|
||||
)
|
||||
|
||||
// LabelsJsonSchema returns the canonical label JSON Schema string for OpenAI API endpoints.
|
||||
//
|
||||
// Related documentation and references:
|
||||
// - https://platform.openai.com/docs/guides/structured-outputs
|
||||
// - https://json-schema.org/learn/miscellaneous-examples
|
||||
func LabelsJsonSchema(nsfw bool) json.RawMessage {
|
||||
if nsfw {
|
||||
return LabelsNSFW
|
||||
return json.RawMessage(LabelsJsonSchemaNSFW)
|
||||
} else {
|
||||
return LabelsDefault
|
||||
return json.RawMessage(LabelsJsonSchemaDefault)
|
||||
}
|
||||
}
|
||||
|
||||
// LabelsJson returns the canonical label JSON string for Ollama vision models.
|
||||
//
|
||||
// Related documentation and references:
|
||||
// - https://www.alibabacloud.com/help/en/model-studio/json-mode
|
||||
// - https://www.json.org/json-en.html
|
||||
func LabelsJson(nsfw bool) string {
|
||||
if nsfw {
|
||||
return LabelsJsonNSFW
|
||||
} else {
|
||||
return LabelsJsonDefault
|
||||
}
|
||||
}
|
||||
|
|
|
|||
36
internal/ai/vision/schema/name.go
Normal file
36
internal/ai/vision/schema/name.go
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
package schema
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/photoprism/photoprism/pkg/clean"
|
||||
)
|
||||
|
||||
const (
|
||||
NamePrefix = "photoprism_vision"
|
||||
)
|
||||
|
||||
// JsonSchemaName returns the schema version string to be used for API requests.
|
||||
func JsonSchemaName(schema json.RawMessage, version string) string {
|
||||
var schemaName string
|
||||
|
||||
switch {
|
||||
case bytes.Contains(schema, []byte("labels")):
|
||||
schemaName = "labels"
|
||||
case bytes.Contains(schema, []byte("labels")):
|
||||
schemaName = "caption"
|
||||
default:
|
||||
schemaName = "schema"
|
||||
}
|
||||
|
||||
version = clean.TypeLowerUnderscore(version)
|
||||
|
||||
if version == "" {
|
||||
version = "v1"
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%s_%s_%s", NamePrefix, schemaName, version)
|
||||
|
||||
}
|
||||
23
internal/ai/vision/schema/name_test.go
Normal file
23
internal/ai/vision/schema/name_test.go
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
package schema
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestJsonSchemaName(t *testing.T) {
|
||||
t.Run("Default", func(t *testing.T) {
|
||||
assert.Equal(t, "photoprism_vision_schema_v1", JsonSchemaName(nil, ""))
|
||||
})
|
||||
t.Run("Labels", func(t *testing.T) {
|
||||
assert.Equal(t, "photoprism_vision_labels_v1", JsonSchemaName(json.RawMessage(LabelsJsonSchemaDefault), ""))
|
||||
})
|
||||
t.Run("LabelsV1", func(t *testing.T) {
|
||||
assert.Equal(t, "photoprism_vision_labels_v2", JsonSchemaName([]byte("labels"), "v2"))
|
||||
})
|
||||
t.Run("LabelsJsonSchema", func(t *testing.T) {
|
||||
assert.Equal(t, "photoprism_vision_labels_v1", JsonSchemaName(LabelsJsonSchema(false), "v1"))
|
||||
})
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
Package schema defines canonical JSON schema templates shared by PhotoPrism's AI vision engines.
|
||||
Package schema defines canonical JSON and JSON Schema templates shared by PhotoPrism's AI vision engines.
|
||||
|
||||
Copyright (c) 2018 - 2025 PhotoPrism UG. All rights reserved.
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
package vision
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/photoprism/photoprism/pkg/http/scheme"
|
||||
)
|
||||
|
||||
|
|
@ -36,7 +39,9 @@ func (m *Service) EndpointKey() string {
|
|||
return ""
|
||||
}
|
||||
|
||||
return m.Key
|
||||
ensureEnv()
|
||||
|
||||
return strings.TrimSpace(os.ExpandEnv(m.Key))
|
||||
}
|
||||
|
||||
// EndpointFileScheme returns the endpoint API file scheme type.
|
||||
|
|
|
|||
|
|
@ -9,14 +9,12 @@ func TestThresholds_GetConfidence(t *testing.T) {
|
|||
t.Fatalf("expected 0, got %d", got)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("AboveMax", func(t *testing.T) {
|
||||
th := Thresholds{Confidence: 150}
|
||||
if got := th.GetConfidence(); got != 1 {
|
||||
t.Fatalf("expected 1, got %d", got)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Float", func(t *testing.T) {
|
||||
th := Thresholds{Confidence: 25}
|
||||
if got := th.GetConfidenceFloat32(); got != 0.25 {
|
||||
|
|
@ -32,14 +30,12 @@ func TestThresholds_GetTopicality(t *testing.T) {
|
|||
t.Fatalf("expected 0, got %d", got)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("AboveMax", func(t *testing.T) {
|
||||
th := Thresholds{Topicality: 300}
|
||||
if got := th.GetTopicality(); got != 1 {
|
||||
t.Fatalf("expected 1, got %d", got)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Float", func(t *testing.T) {
|
||||
th := Thresholds{Topicality: 45}
|
||||
if got := th.GetTopicalityFloat32(); got != 0.45 {
|
||||
|
|
@ -55,14 +51,12 @@ func TestThresholds_GetNSFW(t *testing.T) {
|
|||
t.Fatalf("expected default %d, got %d", DefaultThresholds.NSFW, got)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("AboveMax", func(t *testing.T) {
|
||||
th := Thresholds{NSFW: 200}
|
||||
if got := th.GetNSFW(); got != 1 {
|
||||
t.Fatalf("expected 1, got %d", got)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Float", func(t *testing.T) {
|
||||
th := Thresholds{NSFW: 80}
|
||||
if got := th.GetNSFWFloat32(); got != 0.8 {
|
||||
|
|
|
|||
|
|
@ -25,7 +25,34 @@ Additional information can be found in our Developer Guide:
|
|||
package vision
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/photoprism/photoprism/internal/event"
|
||||
"github.com/photoprism/photoprism/pkg/clean"
|
||||
"github.com/photoprism/photoprism/pkg/fs"
|
||||
)
|
||||
|
||||
var log = event.Log
|
||||
|
||||
var ensureEnvOnce sync.Once
|
||||
|
||||
// ensureEnv loads environment-backed credentials once so adapters can look up
|
||||
// OPENAI_API_KEY even when operators rely on OPENAI_API_KEY_FILE. Future engine
|
||||
// integrations can reuse this hook to normalise additional secrets.
|
||||
func ensureEnv() {
|
||||
ensureEnvOnce.Do(func() {
|
||||
if os.Getenv("OPENAI_API_KEY") != "" {
|
||||
return
|
||||
}
|
||||
|
||||
if path := strings.TrimSpace(os.Getenv("OPENAI_API_KEY_FILE")); fs.FileExistsNotEmpty(path) {
|
||||
if data, err := os.ReadFile(path); err == nil {
|
||||
if key := clean.Auth(string(data)); key != "" {
|
||||
_ = os.Setenv("OPENAI_API_KEY", key)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -339,7 +339,14 @@ func OIDCRedirect(router *gin.RouterGroup) {
|
|||
sess.SetAuthID(user.AuthID, provider.Issuer())
|
||||
sess.SetUser(user)
|
||||
sess.SetGrantType(authn.GrantAuthorizationCode)
|
||||
sess.IdToken = tokens.IDToken
|
||||
|
||||
// Ensure that the ID token fits into the existing
|
||||
// database column; otherwise, truncate it.
|
||||
if n := len(tokens.IDToken); n > 2048 {
|
||||
sess.IdToken = tokens.IDToken[:2048]
|
||||
} else {
|
||||
sess.IdToken = tokens.IDToken
|
||||
}
|
||||
|
||||
// Set session expiration and timeout.
|
||||
sess.SetExpiresIn(unix.Day)
|
||||
|
|
|
|||
|
|
@ -4542,6 +4542,12 @@
|
|||
"prompt": {
|
||||
"type": "string"
|
||||
},
|
||||
"schema": {
|
||||
"items": {
|
||||
"type": "integer"
|
||||
},
|
||||
"type": "array"
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean"
|
||||
},
|
||||
|
|
@ -4562,6 +4568,15 @@
|
|||
},
|
||||
"vision.ApiRequestOptions": {
|
||||
"properties": {
|
||||
"combine_outputs": {
|
||||
"type": "string"
|
||||
},
|
||||
"detail": {
|
||||
"type": "string"
|
||||
},
|
||||
"force_json": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"frequency_penalty": {
|
||||
"type": "number"
|
||||
},
|
||||
|
|
@ -4571,6 +4586,9 @@
|
|||
"main_gpu": {
|
||||
"type": "integer"
|
||||
},
|
||||
"max_output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"min_p": {
|
||||
"type": "number"
|
||||
},
|
||||
|
|
@ -4616,6 +4634,9 @@
|
|||
"repeat_penalty": {
|
||||
"type": "number"
|
||||
},
|
||||
"schema_version": {
|
||||
"type": "string"
|
||||
},
|
||||
"seed": {
|
||||
"type": "integer"
|
||||
},
|
||||
|
|
|
|||
27
internal/config/README.md
Normal file
27
internal/config/README.md
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
# Config Package Guide
|
||||
|
||||
## Overview
|
||||
|
||||
PhotoPrism’s runtime configuration is managed by this package. Fields are defined in [`options.go`](options.go) and then initialized with values from command-line flags, environment variables, and optional YAML files (`storage/config/*.yml`).
|
||||
|
||||
## Sources and Precedence
|
||||
|
||||
PhotoPrism loads configuration in the following order:
|
||||
|
||||
1. **Built-in defaults** defined in this package.
|
||||
2. **`defaults.yml`** — optional system defaults (typically `/etc/photoprism/defaults.yml`). See [Global Config Defaults](https://docs.photoprism.app/getting-started/config-files/defaults/) if you package PhotoPrism for other environments and need to override the compiled defaults.
|
||||
3. **Environment variables** prefixed with `PHOTOPRISM_…` and specified in [`flags.go`](flags.go) along with the CLI flags. This is the primary override mechanism in container environments.
|
||||
4. **`options.yml`** — user-level configuration stored under `storage/config/options.yml` (or another directory controlled by `PHOTOPRISM_CONFIG_PATH`). Values here override both defaults and environment variables, see [Config Files](https://docs.photoprism.app/getting-started/config-files/).
|
||||
5. **CLI flags** (for example `photoprism --cache-path=/tmp/cache`). Flags always win when a conflict exists.
|
||||
|
||||
The `PHOTOPRISM_CONFIG_PATH` variable controls where PhotoPrism looks for YAML files (defaults to `storage/config`).
|
||||
|
||||
> Any change to configuration (flags, env vars, YAML files) requires a restart. The Go process reads options during startup and does not watch for changes.
|
||||
|
||||
## CLI Reference
|
||||
|
||||
- `photoprism help` (or `photoprism --help`) lists all subcommands and global flags.
|
||||
- `photoprism show config` renders every active option along with its current value. Pass `--json`, `--md`, `--tsv`, or `--csv` to change the output format.
|
||||
- `photoprism show config-options` prints the description and default value for each option. Use this when updating [`flags.go`](flags.go).
|
||||
- `photoprism show config-yaml` displays the configuration keys and their expected types in the [same structure that the YAML files use](https://docs.photoprism.app/getting-started/config-files/). It is a read-only helper meant to guide you when editing files under `storage/config`.
|
||||
- Additional `show` subcommands document search filters, metadata tags, and supported thumbnail sizes; see [`internal/commands/show.go`](../commands/show.go) for the complete list.
|
||||
|
|
@ -4,5 +4,5 @@ package feat
|
|||
var (
|
||||
VisionModelGenerate = false // controls exposure of the generate endpoint and CLI commands
|
||||
VisionModelMarkers = false // gates marker generation/return until downstream UI and reconciliation paths are ready
|
||||
VisionServiceOpenAI = false // controls whether users are able to configure OpenAI as a vision service engine
|
||||
VisionServiceOpenAI = true // controls whether users are able to configure OpenAI as a vision service engine
|
||||
)
|
||||
|
|
|
|||
|
|
@ -135,6 +135,7 @@ func (w *Vision) Start(filter string, count int, models []string, customSrc stri
|
|||
done := make(map[string]bool)
|
||||
offset := 0
|
||||
updated := 0
|
||||
processed := 0
|
||||
|
||||
// Make sure count is within
|
||||
if count < 1 || count > search.MaxResults {
|
||||
|
|
@ -197,6 +198,8 @@ func (w *Vision) Start(filter string, count int, models []string, customSrc stri
|
|||
continue
|
||||
}
|
||||
|
||||
processed++
|
||||
|
||||
fileName := photoprism.FileName(photo.FileRoot, photo.FileName)
|
||||
file, fileErr := photoprism.NewMediaFile(fileName)
|
||||
|
||||
|
|
@ -279,7 +282,18 @@ func (w *Vision) Start(filter string, count int, models []string, customSrc stri
|
|||
}
|
||||
}
|
||||
|
||||
log.Infof("vision: updated %s [%s]", english.Plural(updated, "picture", "pictures"), time.Since(start))
|
||||
elapsed := time.Since(start)
|
||||
|
||||
switch {
|
||||
case processed == 0:
|
||||
log.Infof("vision: no pictures required processing [%s]", elapsed)
|
||||
case updated == processed:
|
||||
log.Infof("vision: updated %s [%s]", english.Plural(updated, "picture", "pictures"), elapsed)
|
||||
case updated == 0:
|
||||
log.Infof("vision: processed %s (no metadata changes detected) [%s]", english.Plural(processed, "picture", "pictures"), elapsed)
|
||||
default:
|
||||
log.Infof("vision: updated %s out of %s [%s]", english.Plural(updated, "picture", "pictures"), english.Plural(processed, "picture", "pictures"), elapsed)
|
||||
}
|
||||
|
||||
if updated > 0 {
|
||||
updateIndex = true
|
||||
|
|
|
|||
|
|
@ -26,13 +26,13 @@ func TestASCII(t *testing.T) {
|
|||
}
|
||||
|
||||
func BenchmarkASCII(b *testing.B) {
|
||||
for n := 0; n < b.N; n++ {
|
||||
for b.Loop() {
|
||||
ASCII("https://docs.photoprism.app/getting-started 👍/config-options/#file-converters")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkASCIIEmpty(b *testing.B) {
|
||||
for n := 0; n < b.N; n++ {
|
||||
for b.Loop() {
|
||||
ASCII("")
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ var DomainRegexp = regexp.MustCompile("^(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\
|
|||
|
||||
// Auth returns the sanitized authentication identifier trimmed to a maximum length of 255 characters.
|
||||
func Auth(s string) string {
|
||||
if s == "" || len(s) > 2048 {
|
||||
if s == "" || len(s) > 510 {
|
||||
return ""
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -43,6 +43,12 @@ func TestAuth(t *testing.T) {
|
|||
t.Run("TeLessThanSGreaterThanT", func(t *testing.T) {
|
||||
assert.Equal(t, "Test", Auth("Te<s>t"))
|
||||
})
|
||||
t.Run("ApiKey", func(t *testing.T) {
|
||||
assert.Equal(t,
|
||||
"ab-prot-keech1aqu8quamiNaecuisuem1ahg7dieph8eitohzo7hoo7pe-Chohzu4eaA-Chohzu4ea-soh7Seic8eig9joojaeshe4Ahsu8zeibooCh9ooquaaleev3poLeev0su9jei2yeich3ahsi9quar1oqueic",
|
||||
Auth("ab-prot-keech1aqu8quamiNaecuisuem1ahg7dieph8eitohzo7hoo7pe-Chohzu4eaA-Chohzu4ea-soh7Seic8eig9joojaeshe4Ahsu8zeibooCh9ooquaaleev3poLeev0su9jei2yeich3ahsi9quar1oqueic"),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
func TestHandle(t *testing.T) {
|
||||
|
|
|
|||
|
|
@ -27,13 +27,13 @@ func TestHeader(t *testing.T) {
|
|||
}
|
||||
|
||||
func BenchmarkHeader(b *testing.B) {
|
||||
for n := 0; n < b.N; n++ {
|
||||
for b.Loop() {
|
||||
Header("https://..docs.photoprism.app/gettin\\g-started/config-options/\tfile-converters")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkHeaderEmpty(b *testing.B) {
|
||||
for n := 0; n < b.N; n++ {
|
||||
for b.Loop() {
|
||||
Header("")
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ func TestSearchQuery(t *testing.T) {
|
|||
func BenchmarkSearchQuery_Complex(b *testing.B) {
|
||||
s := "Jens AND Mander and me Or Kitty WITH flowers IN the park AT noon | img% json OR BILL!\n"
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for b.Loop() {
|
||||
_ = SearchQuery(s)
|
||||
}
|
||||
}
|
||||
|
|
@ -56,7 +56,7 @@ func BenchmarkSearchQuery_Complex(b *testing.B) {
|
|||
func BenchmarkSearchQuery_Short(b *testing.B) {
|
||||
s := "cat and dog"
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for b.Loop() {
|
||||
_ = SearchQuery(s)
|
||||
}
|
||||
}
|
||||
|
|
@ -65,7 +65,7 @@ func BenchmarkSearchQuery_LongNoOps(b *testing.B) {
|
|||
// No tokens to replace, primarily tests normalization + trim.
|
||||
s := strings.Repeat("alpha beta gamma ", 50)
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for b.Loop() {
|
||||
_ = SearchQuery(s)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,13 +26,13 @@ func TestUri(t *testing.T) {
|
|||
}
|
||||
|
||||
func BenchmarkUri(b *testing.B) {
|
||||
for n := 0; n < b.N; n++ {
|
||||
for b.Loop() {
|
||||
Uri("https://docs.photoprism.app/getting-started/config-options/#file-converters")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkUriEmpty(b *testing.B) {
|
||||
for n := 0; n < b.N; n++ {
|
||||
for b.Loop() {
|
||||
Uri("")
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -233,7 +233,7 @@ var benchDir = flag.String("benchdir", runtime.GOROOT(), "The directory to scan
|
|||
|
||||
func BenchmarkFastWalk(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for b.Loop() {
|
||||
err := fastwalk.Walk(*benchDir, func(path string, typ os.FileMode) error { return nil })
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
|
|
|
|||
|
|
@ -27,13 +27,13 @@ func TestCacheControlMaxAge(t *testing.T) {
|
|||
}
|
||||
|
||||
func BenchmarkTestCacheControlMaxAge(b *testing.B) {
|
||||
for n := 0; n < b.N; n++ {
|
||||
for b.Loop() {
|
||||
_ = CacheControlMaxAge(DurationYear, false)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkTestCacheControlMaxAgeImmutable(b *testing.B) {
|
||||
for n := 0; n < b.N; n++ {
|
||||
for b.Loop() {
|
||||
_ = CacheControlMaxAge(DurationYear, false) + ", " + CacheControlImmutable
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ func BenchmarkContainsAny_LargeOverlap(b *testing.B) {
|
|||
bList[i] = a[i*4]
|
||||
}
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for b.Loop() {
|
||||
if !ContainsAny(a, bList) {
|
||||
b.Fatalf("expected overlap")
|
||||
}
|
||||
|
|
@ -44,7 +44,7 @@ func BenchmarkContainsAny_Disjoint(b *testing.B) {
|
|||
a := makeStrings("a", 5000)
|
||||
bList := makeStrings("b", 5000)
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for b.Loop() {
|
||||
if ContainsAny(a, bList) {
|
||||
b.Fatalf("expected disjoint")
|
||||
}
|
||||
|
|
@ -56,7 +56,7 @@ func BenchmarkJoin_Large(b *testing.B) {
|
|||
j := append(makeStrings("y", 5000), a[:1000]...) // 1000 duplicates
|
||||
j = shuffleEveryK(j, 7)
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for b.Loop() {
|
||||
out := Join(a, j)
|
||||
if len(out) != 10000 {
|
||||
b.Fatalf("unexpected length: %d", len(out))
|
||||
|
|
|
|||
|
|
@ -166,7 +166,7 @@ func TestIsJoinToken(t *testing.T) {
|
|||
}
|
||||
|
||||
func BenchmarkJoinToken(b *testing.B) {
|
||||
for n := 0; n < b.N; n++ {
|
||||
for b.Loop() {
|
||||
JoinToken()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ func TestClip(t *testing.T) {
|
|||
func BenchmarkClipRunesASCII(b *testing.B) {
|
||||
s := strings.Repeat("abc def ghi ", 20) // ASCII
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for b.Loop() {
|
||||
_ = Clip(s, 50)
|
||||
}
|
||||
}
|
||||
|
|
@ -37,7 +37,7 @@ func BenchmarkClipRunesASCII(b *testing.B) {
|
|||
func BenchmarkClipRunesUTF8(b *testing.B) {
|
||||
s := strings.Repeat("Grüße 世", 20) // non-ASCII runes
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for b.Loop() {
|
||||
_ = Clip(s, 50)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ func TestContainsAlnumLower(t *testing.T) {
|
|||
func BenchmarkContainsNumber(b *testing.B) {
|
||||
s := "The quick brown fox jumps over 13 lazy dogs"
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for b.Loop() {
|
||||
_ = ContainsNumber(s)
|
||||
}
|
||||
}
|
||||
|
|
@ -123,7 +123,7 @@ func BenchmarkContainsNumber(b *testing.B) {
|
|||
func BenchmarkSortCaseInsensitive(b *testing.B) {
|
||||
words := []string{"Zebra", "apple", "Banana", "cherry", "Apricot", "banana", "zebra", "Cherry"}
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for b.Loop() {
|
||||
w := append([]string(nil), words...)
|
||||
SortCaseInsensitive(w)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ func makeLargeText(distinct, repeats int) string {
|
|||
func BenchmarkWords_Large(b *testing.B) {
|
||||
s := makeLargeText(200, 200) // ~40k tokens mixed
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for b.Loop() {
|
||||
_ = Words(s)
|
||||
}
|
||||
}
|
||||
|
|
@ -54,7 +54,7 @@ func BenchmarkWords_Large(b *testing.B) {
|
|||
func BenchmarkUniqueKeywords_Large(b *testing.B) {
|
||||
s := makeLargeText(200, 200)
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for b.Loop() {
|
||||
_ = UniqueKeywords(s)
|
||||
}
|
||||
}
|
||||
|
|
@ -62,7 +62,7 @@ func BenchmarkUniqueKeywords_Large(b *testing.B) {
|
|||
func BenchmarkUniqueKeywords_ManyDup(b *testing.B) {
|
||||
s := makeLargeText(20, 2000) // many repeats, few distinct
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for b.Loop() {
|
||||
_ = UniqueKeywords(s)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
Loading…
Add table
Add a link
Reference in a new issue