Merge remote-tracking branch 'origin/develop' into PostgreSQL

This commit is contained in:
Keith Martin 2025-11-15 18:33:45 +10:00
commit dbf4fd51f9
116 changed files with 26388 additions and 24020 deletions

View file

@ -1,6 +1,6 @@
# PhotoPrism® Repository Guidelines
**Last Updated:** November 11, 2025
**Last Updated:** November 14, 2025
## Purpose
@ -17,6 +17,7 @@ Learn more: https://agents.md/
- REST API: https://docs.photoprism.dev/ (Swagger), https://docs.photoprism.app/developer-guide/api/ (Docs)
- Code Maps: [`CODEMAP.md`](CODEMAP.md) (Backend/Go), [`frontend/CODEMAP.md`](frontend/CODEMAP.md) (Frontend/JS)
- Face Detection & Embeddings Notes: [`internal/ai/face/README.md`](internal/ai/face/README.md)
- Vision Engine Guides: [`internal/ai/vision/openai/README.md`](internal/ai/vision/openai/README.md), [`internal/ai/vision/ollama/README.md`](internal/ai/vision/ollama/README.md)
> Quick Tip: to inspect GitHub issue details without leaving the terminal, run `curl -s https://api.github.com/repos/photoprism/photoprism/issues/<id>`.
@ -224,6 +225,8 @@ Note: Across our public documentation, official images, and in production, the c
- Dialogs must follow the shared focus pattern documented in `frontend/src/common/README.md`.
- Always expose `ref="dialog"` on `<v-dialog>` overlays, call `$view.enter/leave` in `@after-enter` / `@after-leave`, and avoid positive `tabindex` values.
- Persistent dialogs (those with the `persistent` prop) must handle Escape via `@keydown.esc.exact` so Vuetifys default rejection animation is suppressed; keep other shortcuts on `@keyup` so inner inputs can cancel them first.
- Global shortcuts run through `onShortCut(ev)` in `common/view.js`; it only forwards Escape and `ctrl`/`meta` combinations, so do not rely on it for arbitrary keys.
- When a dialog opens nested menus (for example, combobox suggestion lists), ensure they work with the global trap; see the README for troubleshooting tips.
## Safety & Data

View file

@ -1,6 +1,6 @@
PhotoPrism — Backend CODEMAP
**Last Updated:** November 2, 2025
**Last Updated:** November 14, 2025
Purpose
- Give agents and contributors a fast, reliable map of where things live and how they fit together, so you can add features, fix bugs, and write tests without spelunking.
@ -35,6 +35,7 @@ High-Level Package Map (Go)
- `internal/config` — configuration, flags/env/options, client config, DB init/migrate
- `internal/entity` — GORM v1 models, queries, search helpers, migrations
- `internal/photoprism` — core domain logic (indexing, import, faces, thumbnails, cleanup)
- `internal/ai/vision` — multi-engine computer vision pipeline (models, adapters, schema). Adapter docs: [`internal/ai/vision/openai/README.md`](internal/ai/vision/openai/README.md) and [`internal/ai/vision/ollama/README.md`](internal/ai/vision/ollama/README.md).
- `internal/workers` — background schedulers (index, vision, sync, meta, backup)
- `internal/auth` — ACL, sessions, OIDC
- `internal/service` — cluster/portal, maps, hub, webdav

View file

@ -1,5 +1,5 @@
# Ubuntu 25.10 (Questing Quokka)
FROM photoprism/develop:251018-questing
FROM photoprism/develop:251113-questing
# Harden npm usage by default (applies to npm ci / install in dev container)
ENV NPM_CONFIG_IGNORE_SCRIPTS=true

26
NOTICE
View file

@ -9,7 +9,7 @@ The following 3rd-party software packages may be used by or distributed with
PhotoPrism. Any information relevant to third-party vendors listed below are
collected using common, reasonable means.
Date generated: 2025-11-10
Date generated: 2025-11-12
================================================================================
@ -2443,8 +2443,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
--------------------------------------------------------------------------------
Package: github.com/golang/geo
Version: v0.0.0-20251110120158-2d428c1fd7a2
License: Apache-2.0 (https://github.com/golang/geo/blob/2d428c1fd7a2/LICENSE)
Version: v0.0.0-20251111181513-e7f3a1a58fb3
License: Apache-2.0 (https://github.com/golang/geo/blob/e7f3a1a58fb3/LICENSE)
Apache License
@ -8188,8 +8188,8 @@ License: Apache-2.0 (https://github.com/go4org/go4/blob/214862532bf5/LICENSE)
--------------------------------------------------------------------------------
Package: golang.org/x/crypto
Version: v0.43.0
License: BSD-3-Clause (https://cs.opensource.google/go/x/crypto/+/v0.43.0:LICENSE)
Version: v0.44.0
License: BSD-3-Clause (https://cs.opensource.google/go/x/crypto/+/v0.44.0:LICENSE)
Copyright 2009 The Go Authors.
@ -8222,8 +8222,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
Package: golang.org/x/image
Version: v0.32.0
License: BSD-3-Clause (https://cs.opensource.google/go/x/image/+/v0.32.0:LICENSE)
Version: v0.33.0
License: BSD-3-Clause (https://cs.opensource.google/go/x/image/+/v0.33.0:LICENSE)
Copyright 2009 The Go Authors.
@ -8256,8 +8256,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
Package: golang.org/x/mod/semver
Version: v0.29.0
License: BSD-3-Clause (https://cs.opensource.google/go/x/mod/+/v0.29.0:LICENSE)
Version: v0.30.0
License: BSD-3-Clause (https://cs.opensource.google/go/x/mod/+/v0.30.0:LICENSE)
Copyright 2009 The Go Authors.
@ -8290,8 +8290,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
Package: golang.org/x/net
Version: v0.46.0
License: BSD-3-Clause (https://cs.opensource.google/go/x/net/+/v0.46.0:LICENSE)
Version: v0.47.0
License: BSD-3-Clause (https://cs.opensource.google/go/x/net/+/v0.47.0:LICENSE)
Copyright 2009 The Go Authors.
@ -8426,8 +8426,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
Package: golang.org/x/text
Version: v0.30.0
License: BSD-3-Clause (https://cs.opensource.google/go/x/text/+/v0.30.0:LICENSE)
Version: v0.31.0
License: BSD-3-Clause (https://cs.opensource.google/go/x/text/+/v0.31.0:LICENSE)
Copyright 2009 The Go Authors.

View file

@ -3,8 +3,8 @@ msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2025-10-17 17:32+0000\n"
"PO-Revision-Date: 2025-10-22 08:25+0000\n"
"Last-Translator: DeepL <noreply-mt-deepl@weblate.org>\n"
"PO-Revision-Date: 2025-11-12 07:40+0000\n"
"Last-Translator: dtsolakis <dtsola@eranet.gr>\n"
"Language-Team: none\n"
"Language: el\n"
"MIME-Version: 1.0\n"
@ -23,11 +23,11 @@ msgstr "Αυτό δεν είναι εφικτό"
#: messages.go:106
msgid "Changes could not be saved"
msgstr "Οι αλλαγές δεν μπόρεσαν να αποθηκευτούν"
msgstr "Οι αλλαγές δεν ήταν δυνατό να αποθηκευτούν"
#: messages.go:107
msgid "Could not be deleted"
msgstr "Δεν μπόρεσε να διαγραφεί"
msgstr "Δεν ήταν εφικτή η διαγραφή"
#: messages.go:108
#, c-format
@ -48,7 +48,7 @@ msgstr "Πολύ μεγάλο αρχείο"
#: messages.go:112
msgid "Unsupported"
msgstr "Ανυποστήρικτος"
msgstr "Δεν υποστηρίζεται"
#: messages.go:113
msgid "Unsupported type"
@ -56,11 +56,11 @@ msgstr "Μη υποστηριζόμενος τύπος"
#: messages.go:114
msgid "Unsupported format"
msgstr "Μη υποστηριζόμενη μορφή"
msgstr "Μη υποστηριζόμενος μορφότυπος"
#: messages.go:115
msgid "Originals folder is empty"
msgstr "Ο φάκελος Πρωτότυπα είναι άδειος"
msgstr "Ο φάκελος πρωτότυπων είναι άδειος"
#: messages.go:116
msgid "Selection not found"
@ -84,19 +84,19 @@ msgstr "Η ετικέτα δεν βρέθηκε"
#: messages.go:121
msgid "Album not found"
msgstr "Η Συλλογή δεν βρέθηκε"
msgstr "Το άλμπουμ δεν βρέθηκε"
#: messages.go:122
msgid "Subject not found"
msgstr "Το Θέμα δεν βρέθηκε"
msgstr "Το θέμα δεν βρέθηκε"
#: messages.go:123
msgid "Person not found"
msgstr "Το Άτομο δεν βρέθηκε"
msgstr "Το άτομο δεν βρέθηκε"
#: messages.go:124
msgid "Face not found"
msgstr "Το Πρόσωπο δεν βρέθηκε"
msgstr "Το πρόσωπο δεν βρέθηκε"
#: messages.go:125
msgid "Not available in public mode"
@ -104,7 +104,7 @@ msgstr "Μη διαθέσιμο κατά τη δημόσια λειτουργί
#: messages.go:126
msgid "Not available in read-only mode"
msgstr "μη διαθέσιμο στην κατάσταση \"μόνο για ανάγνωση\""
msgstr "Μη διαθέσιμο στην κατάσταση \"μόνο ανάγνωση\""
#: messages.go:127
msgid "Please log in to your account"
@ -112,7 +112,7 @@ msgstr "Παρακαλούμε συνδεθείτε και δοκιμάστε ξ
#: messages.go:128
msgid "Permission denied"
msgstr "Το Άτομο διαγράφηκε"
msgstr "Δέν δόθηκε άδεια"
#: messages.go:129
msgid "Payment required"
@ -120,31 +120,31 @@ msgstr "Απαιτείται πληρωμή"
#: messages.go:130
msgid "Upload might be offensive"
msgstr "Η φόρτωση μπορεί να είναι προσβλητική"
msgstr "Το ανέβασμα μπορεί να είναι προσβλητικό"
#: messages.go:131
msgid "Upload failed"
msgstr "Αποτυχία αποστολής"
msgstr "Αποτυχία ανεβάσματος"
#: messages.go:132
msgid "No items selected"
msgstr "Δεν έχουν επιλεγεί αντικείμενα"
msgstr "Δεν έχουν επιλεγεί στοιχεία"
#: messages.go:133
msgid "Failed creating file, please check permissions"
msgstr "Απέτυχε η δημιουργία αρχείου, παρακαλούμε ελέγξτε τα δικαιώματα"
msgstr "Απέτυχε η δημιουργία αρχείου, ελέγξτε τα δικαιώματα"
#: messages.go:134
msgid "Failed creating folder, please check permissions"
msgstr "Απέτυχε η δημιουργία φακέλου, παρακαλούμε ελέγξτε τα δικαιώματα"
msgstr "Απέτυχε η δημιουργία φακέλου, ελέγξτε τα δικαιώματα"
#: messages.go:135
msgid "Could not connect, please try again"
msgstr "Δεν ήταν δυνατή η σύνδεση, παρακαλώ δοκιμάστε ξανά"
msgstr "Δεν ήταν δυνατή η σύνδεση, δοκιμάστε ξανά"
#: messages.go:136
msgid "Enter verification code"
msgstr "βάλτε κωδικό επιβεβαίωσης"
msgstr "Εισάγετε τον κωδικό επαλήθευσης"
#: messages.go:137
msgid "Invalid verification code, please try again"
@ -152,11 +152,11 @@ msgstr "Μη έγκυρος κωδικός επαλήθευσης, δοκιμά
#: messages.go:138
msgid "Invalid password, please try again"
msgstr "Μη έγκυρος κωδικός πρόσβασης, παρακαλώ δοκιμάστε ξανά"
msgstr "Μη έγκυρος κωδικός πρόσβασης, δοκιμάστε ξανά"
#: messages.go:139
msgid "Feature disabled"
msgstr "Λειτουργία απενεργοποιημένη"
msgstr "Απενεργοποιημένη δυνατότητα"
#: messages.go:140
msgid "No labels selected"
@ -164,7 +164,7 @@ msgstr "Δεν έχουν επιλεγεί ετικέτες"
#: messages.go:141
msgid "No albums selected"
msgstr "Δεν έχουν επιλεγεί συλλογές"
msgstr "Δεν έχουν επιλεγεί άλμπουμ"
#: messages.go:142
msgid "No files available for download"
@ -188,7 +188,7 @@ msgstr "Μη έγκυρο όνομα"
#: messages.go:147
msgid "Busy, please try again later"
msgstr "Απασχολημένος, προσπαθήστε ξανά αργότερα"
msgstr "Το σύστημα είναι απασχολημένο, προσπαθήστε ξανά αργότερα"
#: messages.go:148
#, c-format
@ -197,7 +197,7 @@ msgstr "Το διάστημα αφύπνισης είναι %s, αλλά πρέ
#: messages.go:149
msgid "Your account could not be connected"
msgstr "Ο λογαριασμός σας δεν μπόρεσε να συνδεθεί"
msgstr "Ο λογαριασμός σας δεν ήταν δυνατό να συνδεθεί"
#: messages.go:150
msgid "Too many requests"
@ -205,11 +205,11 @@ msgstr "Πάρα πολλά αιτήματα"
#: messages.go:151
msgid "Insufficient storage"
msgstr "Ανεπαρκής αποθήκευση"
msgstr "Ανεπαρκής χώρος"
#: messages.go:152
msgid "Quota exceeded"
msgstr "Υπέρβαση ποσόστωσης"
msgstr "Υπέρβαση ορίου"
#: messages.go:155
msgid "Changes successfully saved"
@ -217,20 +217,20 @@ msgstr "Οι αλλαγές αποθηκεύτηκαν επιτυχώς"
#: messages.go:156
msgid "Album created"
msgstr "Η Συλλογή δημιουργήθηκε"
msgstr "Το άλμπουμ δημιουργήθηκε"
#: messages.go:157
msgid "Album saved"
msgstr "Η Συλλογή αποθηκεύθηκε"
msgstr "Το άλμπουμ αποθηκεύθηκε"
#: messages.go:158
#, c-format
msgid "Album %s deleted"
msgstr "Η Συλλογή %s διαγράφηκε"
msgstr "Το άλμπουμ %s διαγράφηκε"
#: messages.go:159
msgid "Album contents cloned"
msgstr "Τα περιεχόμενα της Συλλογής αντιγράφηκαν"
msgstr "Τα περιεχόμενα του άλμπουμ αντιγράφηκαν"
#: messages.go:160
msgid "File removed from stack"
@ -267,15 +267,15 @@ msgstr "%d καταχωρήσεις αφαιρέθηκαν από %s"
#: messages.go:167
msgid "Account created"
msgstr "Ο Λογαριασμός δημιουργήθηκε"
msgstr "Ο λογαριασμός δημιουργήθηκε"
#: messages.go:168
msgid "Account saved"
msgstr "Ο Λογαριασμός αποθηκεύθηκε"
msgstr "Ο λογαριασμός αποθηκεύθηκε"
#: messages.go:169
msgid "Account deleted"
msgstr "Ο Λογαριασμός διαγράφηκε"
msgstr "Ο λογαριασμός διαγράφηκε"
#: messages.go:170
msgid "Settings saved"
@ -297,7 +297,7 @@ msgstr "Η εισαγωγή ακυρώθηκε"
#: messages.go:174
#, c-format
msgid "Indexing completed in %d s"
msgstr "Η δημιουργία ευρετηρίου σε %d s"
msgstr "Η ευρετηρίαση ολοκληρώθηκε σε %d s"
#: messages.go:175
msgid "Indexing originals..."
@ -329,27 +329,27 @@ msgstr "Αντιγραφή αρχείων από %s"
#: messages.go:181
msgid "Labels deleted"
msgstr "Οι Ετικέτες διαγράφηκαν"
msgstr "Οι ετικέτες διαγράφηκαν"
#: messages.go:182
msgid "Label saved"
msgstr "Η Ετικέτα αποθηκεύτηκε"
msgstr "Η ετικέτα αποθηκεύτηκε"
#: messages.go:183
msgid "Subject saved"
msgstr "Το Θέμα αποθηκεύθηκε"
msgstr "Το θέμα αποθηκεύθηκε"
#: messages.go:184
msgid "Subject deleted"
msgstr "Το Θέμα διαγράφηκε"
msgstr "Το θέμα διαγράφηκε"
#: messages.go:185
msgid "Person saved"
msgstr "Το Άτομο αποθηκεύθηκε"
msgstr "Το άτομο αποθηκεύθηκε"
#: messages.go:186
msgid "Person deleted"
msgstr "Το Άτομο διαγράφηκε"
msgstr "Το άτομο διαγράφηκε"
#: messages.go:187
msgid "File uploaded"
@ -358,15 +358,15 @@ msgstr "Το αρχείο διαγράφηκε"
#: messages.go:188
#, c-format
msgid "%d files uploaded in %d s"
msgstr "%d αρχεία μεταφορτώθηκαν σε %d s"
msgstr "%d αρχεία ανεβάστηκαν σε %d s"
#: messages.go:189
msgid "Processing upload..."
msgstr "Επεξεργασία μεταφόρτωσης..."
msgstr "Επεξεργασία ανεβάσματος..."
#: messages.go:190
msgid "Upload has been processed"
msgstr "Η φόρτωση έχει ολοκληρωθεί"
msgstr "Το ανέβασμα έχει ολοκληρωθεί"
#: messages.go:191
msgid "Selection approved"
@ -382,16 +382,16 @@ msgstr "Η επιλογή αποκαταστάθηκε"
#: messages.go:194
msgid "Selection marked as private"
msgstr "Η επιλογή χαρακτηρίστηκε ως ιδιωτική"
msgstr "Η επιλογή μαρκαρίστηκε ως ιδιωτική"
#: messages.go:195
msgid "Albums deleted"
msgstr "Οι Συλλογές διαγράφηκαν"
msgstr "Διαγραμμένα άλμπουμ"
#: messages.go:196
#, c-format
msgid "Zip created in %d s"
msgstr "Το αρχείο συμπίεσης δημιουργήθηκε σε %d s"
msgstr "Το αρχείο zip δημιουργήθηκε σε %d s"
#: messages.go:197
msgid "Permanently deleted"
@ -404,11 +404,11 @@ msgstr "%s έχει αποκατασταθεί"
#: messages.go:199
msgid "Successfully verified"
msgstr "Επαληθεύτηκε με επιτυχία"
msgstr "Επιτυχής επαλήθευση"
#: messages.go:200
msgid "Successfully activated"
msgstr "Ενεργοποιήθηκε με επιτυχία"
msgstr "Επιτυχής ενεργοποίηση"
#~ msgid "Storage is full"
#~ msgstr "Ο αποθηκευτικός χώρος είναι γεμάτος"

View file

@ -410,7 +410,8 @@ services:
## Login with "user / photoprism" and "admin / photoprism".
keycloak:
image: quay.io/keycloak/keycloak:25.0
stop_grace_period: 30s
stop_grace_period: 20s
profiles: [ "all", "auth", "keycloak" ]
command: "start-dev" # development mode, do not use this in production!
links:
- "traefik:localssl.dev"

View file

@ -1,6 +1,6 @@
PhotoPrism — Frontend CODEMAP
**Last Updated:** October 13, 2025
**Last Updated:** November 12, 2025
Purpose
- Help agents and contributors navigate the Vue 3 + Vuetify 3 app quickly and make safe changes.
@ -107,6 +107,10 @@ Common HowTos
- Compute `key` from route + filter params and cap eager loads with `Rest.restoreCap(Model.batchSize())` (defaults to 10× the batch size).
- Check `$view.wasBackwardNavigation()` when deciding whether to reuse stored state; `src/app.js` wires the router guards that keep the history direction in sync so no globals like `window.backwardsNavigationDetected` are needed.
- Handle dialog shortcuts
- Persistent dialogs (`persistent` prop) must listen for Escape on `@keydown.esc.exact` to override Vuetifys rejection animation; keep Enter and other actions on `@keyup` so child inputs can intercept them first.
- Global shortcuts go through `onShortCut(ev)` in `common/view.js`. It only forwards Escape and `ctrl`/`meta` combinations, so do not depend on it for plain character keys.
Conventions & Safety
- Avoid `v-html`; use `v-sanitize` or `$util.sanitizeHtml()` (build enforces this)
- Keep big components lazy if needed; split views logically under `src/page`

View file

@ -20,7 +20,7 @@
"@mdi/font": "^7.4.47",
"@testing-library/jest-dom": "^6.9.1",
"@testing-library/react": "^16.3.0",
"@vitejs/plugin-react": "^5.1.0",
"@vitejs/plugin-react": "^5.1.1",
"@vitejs/plugin-vue": "^6.0.1",
"@vitest/browser": "^3.2.4",
"@vitest/coverage-v8": "^3.2.4",
@ -34,7 +34,7 @@
"babel-loader": "^10.0.0",
"babel-plugin-istanbul": "^7.0.1",
"babel-plugin-polyfill-corejs3": "^0.13.0",
"browserslist": "^4.27.0",
"browserslist": "^4.28.0",
"cheerio": "1.0.0-rc.12",
"core-js": "^3.46.0",
"cross-env": "^7.0.3",
@ -79,7 +79,7 @@
"regenerator-runtime": "^0.14.1",
"resolve-url-loader": "^5.0.0",
"sanitize-html": "^2.17.0",
"sass": "^1.93.3",
"sass": "^1.94.0",
"sass-loader": "^16.0.6",
"sockette": "^2.0.6",
"style-loader": "^4.0.0",
@ -98,7 +98,7 @@
"vue-sanitize-directive": "^0.2.1",
"vue-style-loader": "^4.1.3",
"vue3-gettext": "^2.4.0",
"vuetify": "^3.10.9",
"vuetify": "^3.10.10",
"webpack": "^5.102.1",
"webpack-bundle-analyzer": "^4.10.2",
"webpack-cli": "^6.0.1",
@ -4301,9 +4301,9 @@
"license": "MIT"
},
"node_modules/@rolldown/pluginutils": {
"version": "1.0.0-beta.43",
"resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.43.tgz",
"integrity": "sha512-5Uxg7fQUCmfhax7FJke2+8B6cqgeUJUD9o2uXIKXhD+mG0mL6NObmVoi9wXEU1tY89mZKgAYA6fTbftx3q2ZPQ==",
"version": "1.0.0-beta.47",
"resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.47.tgz",
"integrity": "sha512-8QagwMH3kNCuzD8EWL8R2YPW5e4OrHNSAHRFDdmFqEwEaD/KcNKjVoumo+gP2vW5eKB2UPbM6vTYiGZX0ixLnw==",
"license": "MIT"
},
"node_modules/@rollup/plugin-node-resolve": {
@ -4941,9 +4941,9 @@
"license": "MIT"
},
"node_modules/@types/node": {
"version": "24.10.0",
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.0.tgz",
"integrity": "sha512-qzQZRBqkFsYyaSWXuEHc2WR9c0a0CXwiE5FWUvn7ZM+vdy1uZLfCunD38UzhuB7YN/J11ndbDBcTmOdxJo9Q7A==",
"version": "24.10.1",
"resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.1.tgz",
"integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==",
"license": "MIT",
"dependencies": {
"undici-types": "~7.16.0"
@ -4998,15 +4998,15 @@
"license": "MIT"
},
"node_modules/@vitejs/plugin-react": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-5.1.0.tgz",
"integrity": "sha512-4LuWrg7EKWgQaMJfnN+wcmbAW+VSsCmqGohftWjuct47bv8uE4n/nPpq4XjJPsxgq00GGG5J8dvBczp8uxScew==",
"version": "5.1.1",
"resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-5.1.1.tgz",
"integrity": "sha512-WQfkSw0QbQ5aJ2CHYw23ZGkqnRwqKHD/KYsMeTkZzPT4Jcf0DcBxBtwMJxnu6E7oxw5+JC6ZAiePgh28uJ1HBA==",
"license": "MIT",
"dependencies": {
"@babel/core": "^7.28.4",
"@babel/core": "^7.28.5",
"@babel/plugin-transform-react-jsx-self": "^7.27.1",
"@babel/plugin-transform-react-jsx-source": "^7.27.1",
"@rolldown/pluginutils": "1.0.0-beta.43",
"@rolldown/pluginutils": "1.0.0-beta.47",
"@types/babel__core": "^7.20.5",
"react-refresh": "^0.18.0"
},
@ -6157,9 +6157,9 @@
}
},
"node_modules/autoprefixer": {
"version": "10.4.21",
"resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.21.tgz",
"integrity": "sha512-O+A6LWV5LDHSJD3LjHYoNi4VLsj/Whi7k6zG12xTYaU4cQ8oxQGckXNX8cRHK5yOZ/ppVHe0ZBXGzSV9jXdVbQ==",
"version": "10.4.22",
"resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.22.tgz",
"integrity": "sha512-ARe0v/t9gO28Bznv6GgqARmVqcWOV3mfgUPn9becPHMiD3o9BwlRgaeccZnwTpZ7Zwqrm+c1sUSsMxIzQzc8Xg==",
"funding": [
{
"type": "opencollective",
@ -6176,9 +6176,9 @@
],
"license": "MIT",
"dependencies": {
"browserslist": "^4.24.4",
"caniuse-lite": "^1.0.30001702",
"fraction.js": "^4.3.7",
"browserslist": "^4.27.0",
"caniuse-lite": "^1.0.30001754",
"fraction.js": "^5.3.4",
"normalize-range": "^0.1.2",
"picocolors": "^1.1.1",
"postcss-value-parser": "^4.2.0"
@ -6328,9 +6328,9 @@
"license": "MIT"
},
"node_modules/baseline-browser-mapping": {
"version": "2.8.25",
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.25.tgz",
"integrity": "sha512-2NovHVesVF5TXefsGX1yzx1xgr7+m9JQenvz6FQY3qd+YXkKkYiv+vTCc7OriP9mcDZpTC5mAOYN4ocd29+erA==",
"version": "2.8.27",
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.27.tgz",
"integrity": "sha512-2CXFpkjVnY2FT+B6GrSYxzYf65BJWEqz5tIRHCvNsZZ2F3CmsCB37h8SpYgKG7y9C4YAeTipIPWG7EmFmhAeXA==",
"license": "Apache-2.0",
"bin": {
"baseline-browser-mapping": "dist/cli.js"
@ -6387,9 +6387,9 @@
}
},
"node_modules/browserslist": {
"version": "4.27.0",
"resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.27.0.tgz",
"integrity": "sha512-AXVQwdhot1eqLihwasPElhX2tAZiBjWdJ9i/Zcj2S6QYIjkx62OKSfnobkriB81C3l4w0rVy3Nt4jaTBltYEpw==",
"version": "4.28.0",
"resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.0.tgz",
"integrity": "sha512-tbydkR/CxfMwelN0vwdP/pLkDwyAASZ+VfWm4EOwlB6SWhx1sYnWLqo8N5j0rAzPfzfRaxt0mM/4wPU/Su84RQ==",
"funding": [
{
"type": "opencollective",
@ -6407,10 +6407,10 @@
"license": "MIT",
"peer": true,
"dependencies": {
"baseline-browser-mapping": "^2.8.19",
"caniuse-lite": "^1.0.30001751",
"electron-to-chromium": "^1.5.238",
"node-releases": "^2.0.26",
"baseline-browser-mapping": "^2.8.25",
"caniuse-lite": "^1.0.30001754",
"electron-to-chromium": "^1.5.249",
"node-releases": "^2.0.27",
"update-browserslist-db": "^1.1.4"
},
"bin": {
@ -7690,9 +7690,9 @@
}
},
"node_modules/electron-to-chromium": {
"version": "1.5.249",
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.249.tgz",
"integrity": "sha512-5vcfL3BBe++qZ5kuFhD/p8WOM1N9m3nwvJPULJx+4xf2usSlZFJ0qoNYO2fOX4hi3ocuDcmDobtA+5SFr4OmBg==",
"version": "1.5.250",
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.250.tgz",
"integrity": "sha512-/5UMj9IiGDMOFBnN4i7/Ry5onJrAGSbOGo3s9FEKmwobGq6xw832ccET0CE3CkkMBZ8GJSlUIesZofpyurqDXw==",
"license": "ISC"
},
"node_modules/emmet": {
@ -8895,15 +8895,15 @@
}
},
"node_modules/fraction.js": {
"version": "4.3.7",
"resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-4.3.7.tgz",
"integrity": "sha512-ZsDfxO51wGAXREY55a7la9LScWpwv9RxIrYABrlvOFBlH/ShPnrtsXeuUIfXKKOVicNxQ+o8JTbJvjS4M89yew==",
"version": "5.3.4",
"resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-5.3.4.tgz",
"integrity": "sha512-1X1NTtiJphryn/uLQz3whtY6jK3fTqoE3ohKs0tT+Ujr1W59oopxmoEh7Lu5p6vBaPbgoM0bzveAW4Qi5RyWDQ==",
"license": "MIT",
"engines": {
"node": "*"
},
"funding": {
"type": "patreon",
"type": "github",
"url": "https://github.com/sponsors/rawify"
}
},
@ -10487,9 +10487,9 @@
"license": "MIT"
},
"node_modules/js-yaml": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
"integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
"version": "4.1.1",
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz",
"integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==",
"license": "MIT",
"dependencies": {
"argparse": "^2.0.1"
@ -14039,9 +14039,9 @@
}
},
"node_modules/sass": {
"version": "1.93.3",
"resolved": "https://registry.npmjs.org/sass/-/sass-1.93.3.tgz",
"integrity": "sha512-elOcIZRTM76dvxNAjqYrucTSI0teAF/L2Lv0s6f6b7FOwcwIuA357bIE871580AjHJuSvLIRUosgV+lIWx6Rgg==",
"version": "1.94.0",
"resolved": "https://registry.npmjs.org/sass/-/sass-1.94.0.tgz",
"integrity": "sha512-Dqh7SiYcaFtdv5Wvku6QgS5IGPm281L+ZtVD1U2FJa7Q0EFRlq8Z3sjYtz6gYObsYThUOz9ArwFqPZx+1azILQ==",
"license": "MIT",
"peer": true,
"dependencies": {
@ -16338,9 +16338,9 @@
}
},
"node_modules/vuetify": {
"version": "3.10.9",
"resolved": "https://registry.npmjs.org/vuetify/-/vuetify-3.10.9.tgz",
"integrity": "sha512-hVeBkkSJhbhbKOIfMQTmTJ1R1S+ZHp7k3r0wWwA28eUe3ulD/uY6nrF/AZ3ZIh+WRHiDqipWmG1RrrZGUtmZOg==",
"version": "3.10.10",
"resolved": "https://registry.npmjs.org/vuetify/-/vuetify-3.10.10.tgz",
"integrity": "sha512-4RRQrJCaiWRalciBVpIKuZmPlfGUGwJalXuca8nHVNTDKJq4LHYNLcIKEbfdyP/6VBiWG4jZPJDTmC0dpXu+sA==",
"license": "MIT",
"peer": true,
"funding": {
@ -16790,9 +16790,9 @@
}
},
"node_modules/webpack-plugin-vuetify/node_modules/yocto-queue": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.1.tgz",
"integrity": "sha512-AyeEbWOu/TAXdxlV9wmGcR0+yh2j3vYPGOECcIj2S7MkrLyC7ne+oye2BKTItt0ii2PHk4cDy+95+LshzbXnGg==",
"version": "1.2.2",
"resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz",
"integrity": "sha512-4LCcse/U2MHZ63HAJVE+v71o7yOdIe4cZ70Wpf8D/IyjDKYQLV5GD46B+hSTjJsvV5PztjvHoU580EftxjDZFQ==",
"license": "MIT",
"engines": {
"node": ">=12.20"

View file

@ -44,7 +44,7 @@
"@mdi/font": "^7.4.47",
"@testing-library/jest-dom": "^6.9.1",
"@testing-library/react": "^16.3.0",
"@vitejs/plugin-react": "^5.1.0",
"@vitejs/plugin-react": "^5.1.1",
"@vitejs/plugin-vue": "^6.0.1",
"@vitest/browser": "^3.2.4",
"@vitest/coverage-v8": "^3.2.4",
@ -58,7 +58,7 @@
"babel-loader": "^10.0.0",
"babel-plugin-istanbul": "^7.0.1",
"babel-plugin-polyfill-corejs3": "^0.13.0",
"browserslist": "^4.27.0",
"browserslist": "^4.28.0",
"cheerio": "1.0.0-rc.12",
"core-js": "^3.46.0",
"cross-env": "^7.0.3",
@ -103,7 +103,7 @@
"regenerator-runtime": "^0.14.1",
"resolve-url-loader": "^5.0.0",
"sanitize-html": "^2.17.0",
"sass": "^1.93.3",
"sass": "^1.94.0",
"sass-loader": "^16.0.6",
"sockette": "^2.0.6",
"style-loader": "^4.0.0",
@ -122,7 +122,7 @@
"vue-sanitize-directive": "^0.2.1",
"vue-style-loader": "^4.1.3",
"vue3-gettext": "^2.4.0",
"vuetify": "^3.10.9",
"vuetify": "^3.10.10",
"webpack": "^5.102.1",
"webpack-bundle-analyzer": "^4.10.2",
"webpack-cli": "^6.0.1",

View file

@ -1,10 +1,10 @@
# View Helper Guidelines
**Last Updated:** November 11, 2025
**Last Updated:** November 12, 2025
## Focus Management
PhotoPrism maintains predictable keyboard focus across pages and dialogs by using a shared view helper:
PhotoPrism uses a shared view helper to maintain predictable focus across pages and dialogs:
- [`frontend/src/common/view.js`](https://github.com/photoprism/photoprism/blob/develop/frontend/src/common/view.js)
@ -64,10 +64,12 @@ Vuetify dialogs are teleported to the overlay container, so consistent refs and
```vue
<v-card-actions class="action-buttons">
<v-btn variant="flat" color="button" class="action-cancel" @click.stop="close">
<v-btn variant="flat" color="button"
class="action-cancel" @click.stop="close">
{{ $gettext(`Cancel`) }}
</v-btn>
<v-btn variant="flat" color="highlight" class="action-confirm" @click.stop="confirm">
<v-btn variant="flat" color="highlight"
class="action-confirm" @click.stop="confirm">
{{ $gettext(`Delete`) }}
</v-btn>
</v-card-actions>
@ -79,6 +81,72 @@ Vuetify dialogs are teleported to the overlay container, so consistent refs and
Only add local `@focusout` handlers if a dialog needs custom behaviour. If you do, always call `ev.preventDefault()` when you redirect focus so you do not fight the global handler.
### Keyboard Event Handling
Dialogs and page shells often react to keyboard shortcuts (Escape to close, Enter to confirm, etc.). To keep those handlers compatible with text inputs and other interactive children:
- Attach listeners to the focusable container that the view helper manages the page wrapper with `tabindex="-1"` or the dialog root (`<v-dialog ref="dialog">`).
- Prefer `@keyup` (for example, `@keyup.enter.exact="confirm"`) so elements inside the container receive `keydown` events first and can call `event.stopPropagation()` when they need to keep the key (such as pressing Enter inside a form field).
- **Persistent dialogs (`persistent` attribute)** must handle the Escape key with `@keydown.esc.exact="close"`. Vuetifys built-in Escape handler plays a “rejection” shake animation when the dialog refuses to close; attaching a direct keydown listener overrides the built-in handler and suppresses the animation while still allowing inner inputs to cancel the event.
- Combine modifiers like `.exact` and `.stop` intentionally. Use `.stop` only when the handler fully resolves the action; otherwise allow events to bubble to ancestor traps.
- If a component must react on `keydown`, scope the listener to the specific control instead of the container, and document why the early trigger is required.
- When emitting from reusable components, forward the native event (`close(event)`) so parents can inspect `event.defaultPrevented` or `event.key` before acting.
Note: To override Vuetifys built-in `<v-dialog>` Escape handler (and stop the “rejection” animation on persistent dialogs), attach a direct `@keydown.esc.exact="close"` listener; the global `onShortCut(ev)` hook is not sufficient on its own.
Example dialog wiring:
```vue
<v-dialog
ref="dialog"
persistent
@keydown.esc.exact="close"
@keyup.enter.exact="confirm"
>
<v-card ref="content" tabindex="-1">
<!-- dialog body -->
</v-card>
</v-dialog>
```
Example page container:
```vue
<template>
<div class="p-page p-settings" tabindex="-1" @keyup.esc.exact="maybeClose">
<!-- page content -->
</div>
</template>
```
Both snippets allow focused inputs to veto shortcuts by calling `event.stopPropagation()` or `event.preventDefault()` before the key reaches the container listener, keeping focus management predictable across the app.
#### Global Shortcut Forwarding
`common/view.js` registers a single `keydown` listener that forwards shortcut keys to the active component:
```js
// onKeyDown forwards global shortcuts (Escape, Ctrl/⌘ combos)
// to the active component when supported.
onKeyDown(ev) {
if (!this.current || !ev || !(ev instanceof KeyboardEvent) || !ev.code) {
return;
} else if (!ev.ctrlKey && !ev.metaKey && ev.code !== "Escape") {
return;
} else if (typeof this.current?.onShortCut !== "function") {
return;
}
if (this.current.onShortCut(ev)) {
ev.preventDefault();
}
}
```
- Implement `onShortCut(ev)` on pages or dialogs when you need to react to Ctrl / ⌘ combinations or global Escape handling. The helper only forwards events where `ev.ctrlKey` or `ev.metaKey` is `true`, or the Escape key is pressed, so it cannot be repurposed for arbitrary keys.
- Persistent dialogs that must suppress Vuetifys rejection animation should still attach a direct `@keydown.esc.exact` handler; `onShortCut(ev)` alone does not override the built-in dialog behaviour.
- Return `true` from `onShortCut(ev)` after handling a shortcut to signal `preventDefault()`. Return `false` to fall back to the browsers native behaviour.
### Example: Delete Confirmation Dialog
```vue
@ -90,19 +158,22 @@ Vuetify dialogs are teleported to the overlay container, so consistent refs and
max-width="350"
class="p-dialog p-file-delete-dialog"
@keydown.esc.exact="close"
@keyup.enter.exact="confirm"
@after-enter="afterEnter"
@after-leave="afterLeave"
>
<v-card ref="content" tabindex="-1">
<v-card-title class="d-flex justify-start align-center ga-3">
<v-icon icon="mdi-delete-outline" size="54" color="primary"></v-icon>
<p class="text-subtitle-1">{{ $gettext(`Are you sure you want to permanently delete this file?`) }}</p>
<p class="text-subtitle-1">{{ $gettext(`Are you sure?`) }}</p>
</v-card-title>
<v-card-actions class="action-buttons mt-1">
<v-btn variant="flat" color="button" class="action-cancel" @click.stop="close">
<v-btn variant="flat" color="button"
class="action-cancel" @click.stop="close">
{{ $gettext(`Cancel`) }}
</v-btn>
<v-btn color="highlight" variant="flat" class="action-confirm" @click.stop="confirm">
<v-btn color="highlight" variant="flat"
class="action-confirm" @click.stop="confirm">
{{ $gettext(`Delete`) }}
</v-btn>
</v-card-actions>

View file

@ -222,16 +222,6 @@ function resolveFocusTarget(root) {
if (sentinel instanceof HTMLElement) {
return sentinel;
}
if (!window.$isMobile) {
const focusable = el.querySelector(
'input:not([type="hidden"]), select, textarea, button, [tabindex]:not([tabindex="-1"])'
);
if (focusable instanceof HTMLElement) {
return focusable;
}
}
} catch {
// Ignore.
}
@ -294,17 +284,17 @@ export function findFocusElement(c) {
if (c.$refs && c.$refs instanceof Object) {
focusRefs.forEach((r) => {
if (c.$refs[r]) {
candidates.push(c.$refs[r]);
const el = getHTMLElement(c.$refs[r]);
if (el) {
candidates.push(el);
}
}
});
}
if (c.$el) {
candidates.push(c.$el);
}
if (c.$el?.parentElement) {
candidates.push(c.$el.parentElement);
const el = getHTMLElement(c);
if (el) {
candidates.push(el);
}
for (let i = 0; i < candidates.length; i++) {

View file

@ -8,7 +8,7 @@
scrim
max-width="360"
class="p-dialog p-confirm-dialog"
@keyup.esc.exact="close"
@keydown.esc.exact="close"
@keyup.enter.exact="confirm"
@after-enter="afterEnter"
@after-leave="afterLeave"

View file

@ -7,6 +7,7 @@
class="p-dialog dialog-label-edit"
color="background"
@keydown.esc.exact="close"
@keyup.enter.exact="confirm"
@after-enter="afterEnter"
@after-leave="afterLeave"
>
@ -34,7 +35,6 @@
:label="$gettext('Name')"
:disabled="disabled"
class="input-title"
@keyup.enter="confirm"
></v-text-field>
</v-col>
<v-col sm="4">

View file

@ -17,7 +17,7 @@
@keydown.space.exact="onKeyDown"
@keydown.left.exact="onKeyDown"
@keydown.right.exact="onKeyDown"
@keydown.esc.stop="close"
@keydown.esc.exact.stop="close"
@click.capture="captureDialogClick"
@pointerdown.capture="captureDialogPointerDown"
>

View file

@ -8,11 +8,11 @@
scrim
scrollable
class="p-location-dialog"
@keydown.esc="close"
@keydown.esc.exact="close"
@after-enter="afterEnter"
@after-leave="afterLeave"
>
<v-card :tile="$vuetify.display.xs">
<v-card ref="content" tabindex="-1" :tile="$vuetify.display.xs">
<v-toolbar v-if="$vuetify.display.xs" flat color="navigation" class="mb-4" density="compact">
<v-btn icon @click.stop="close">
<v-icon>mdi-close</v-icon>
@ -193,6 +193,18 @@ export default {
},
},
methods: {
afterEnter() {
this.$view.enter(this);
if (this.currentLat && this.currentLng && !(this.currentLat === 0 && this.currentLng === 0)) {
this.fetchLocationInfo(this.currentLat, this.currentLng);
}
},
afterLeave() {
this.location = null;
this.locationLoading = false;
this.resetSearchState();
this.$view.leave(this);
},
close() {
this.$emit("close");
},
@ -206,16 +218,6 @@ export default {
});
}
},
afterEnter() {
if (this.currentLat && this.currentLng && !(this.currentLat === 0 && this.currentLng === 0)) {
this.fetchLocationInfo(this.currentLat, this.currentLng);
}
},
afterLeave() {
this.location = null;
this.locationLoading = false;
this.resetSearchState();
},
onMarkerMoved(event) {
this.setPositionAndFetchInfo(event.lat, event.lng);
},

View file

@ -12,7 +12,7 @@
autocorrect="off"
autocapitalize="none"
class="input-coordinates"
@keydown.enter="applyCoordinates"
@keydown.enter.stop="applyCoordinates"
@update:model-value="onCoordinateInputChange"
@paste="pastePosition"
>

View file

@ -7,6 +7,7 @@
class="dialog-person-edit"
color="background"
@keydown.esc.exact="close"
@keyup.enter.exact="confirm"
@after-enter="afterEnter"
@after-leave="afterLeave"
>
@ -34,7 +35,6 @@
:label="$gettext('Name')"
:disabled="disabled"
class="input-title"
@keyup.enter="confirm"
></v-text-field>
</v-col>
<v-col sm="4">

View file

@ -6,6 +6,8 @@
max-width="350"
class="p-dialog p-people-merge-dialog"
@keydown.esc.exact="close"
@after-enter="afterEnter"
@after-leave="afterLeave"
>
<v-card>
<v-card-title class="d-flex justify-start align-center ga-3">
@ -42,6 +44,7 @@ export default {
default: new Subject(),
},
},
emits: ["close", "confirm"],
data() {
return {};
},
@ -58,6 +61,12 @@ export default {
},
},
methods: {
afterEnter() {
this.$view.enter(this);
},
afterLeave() {
this.$view.leave(this);
},
close() {
this.$emit("close");
},

View file

@ -1,6 +1,7 @@
<template>
<v-dialog
ref="dialog"
tabindex="-1"
:model-value="visible"
:fullscreen="$vuetify.display.smAndDown"
scrim
@ -11,7 +12,7 @@
@after-leave="afterLeave"
@keydown.left.exact="onKeyLeft"
@keydown.right.exact="onKeyRight"
@keydown.esc.stop="onClose"
@keydown.esc.exact.stop="onClose"
>
<v-card ref="content" tabindex="-1" :tile="$vuetify.display.smAndDown">
<v-toolbar flat color="navigation" :density="$vuetify.display.smAndDown ? 'compact' : 'comfortable'">
@ -195,7 +196,7 @@ export default {
},
methods: {
afterEnter() {
this.$view.enter(this);
this.$view.enter(this, this.$refs.content);
this.ready = true;
},
afterLeave() {

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -111,7 +111,12 @@
@close="dialog.delete = false"
@confirm="onConfirmDelete"
></p-confirm-dialog>
<v-dialog :model-value="details.visible" max-width="550" class="p-dialog">
<v-dialog
:model-value="details.visible"
max-width="550"
class="p-dialog"
@keydown.esc.exact="details.visible = false"
>
<v-card>
<v-card-title class="d-flex justify-start align-center ga-3">
<v-icon v-if="details.err.Level === 'error'" icon="mdi-alert-circle-outline" color="error"></v-icon>

View file

@ -0,0 +1,102 @@
import { mount, config as VTUConfig } from "@vue/test-utils";
import { describe, it, expect, beforeEach } from "vitest";
import { nextTick } from "vue";
import PLightbox from "component/lightbox.vue";
const mountLightbox = () =>
mount(PLightbox, {
global: {
stubs: {
"v-dialog": true,
"v-icon": true,
"v-slider": true,
"p-lightbox-menu": true,
"p-sidebar-info": true,
},
},
});
describe("PLightbox (low-mock, jsdom-friendly)", () => {
beforeEach(() => {
localStorage.removeItem("lightbox.info");
sessionStorage.removeItem("lightbox.muted");
});
it("toggleInfo updates info and localStorage when visible", async () => {
const wrapper = mountLightbox();
await wrapper.setData({ visible: true });
// Use exposed onShortCut to trigger info toggle (KeyI)
await wrapper.vm.onShortCut({ code: "KeyI" });
await nextTick();
expect(localStorage.getItem("lightbox.info")).toBe("true");
await wrapper.vm.onShortCut({ code: "KeyI" });
await nextTick();
expect(localStorage.getItem("lightbox.info")).toBe("false");
});
it("toggleMute writes sessionStorage without requiring video or exposed state", async () => {
const wrapper = mountLightbox();
expect(sessionStorage.getItem("lightbox.muted")).toBeNull();
await wrapper.vm.onShortCut({ code: "KeyM" });
expect(sessionStorage.getItem("lightbox.muted")).toBe("true");
await wrapper.vm.onShortCut({ code: "KeyM" });
expect(sessionStorage.getItem("lightbox.muted")).toBe("false");
});
it("getPadding returns expected structure for large and small screens", async () => {
const wrapper = mountLightbox();
// Large viewport
const large = wrapper.vm.$options.methods.getPadding.call(
wrapper.vm,
{ x: 1200, y: 800 },
{ width: 4000, height: 3000 }
);
expect(large).toHaveProperty("top");
expect(large).toHaveProperty("bottom");
expect(large).toHaveProperty("left");
expect(large).toHaveProperty("right");
// Small viewport (<= mobileBreakpoint) should yield zeros
const small = wrapper.vm.$options.methods.getPadding.call(
wrapper.vm,
{ x: 360, y: 640 },
{ width: 1200, height: 800 }
);
expect(small).toEqual({ top: 0, bottom: 0, left: 0, right: 0 });
});
it("KeyI is ignored when dialog is not visible", async () => {
const wrapper = mountLightbox();
expect(localStorage.getItem("lightbox.info")).toBeNull();
await wrapper.vm.onShortCut({ code: "KeyI" });
expect(localStorage.getItem("lightbox.info")).toBeNull();
});
it("getViewport falls back to window size without content ref", () => {
const wrapper = mountLightbox();
const vp = wrapper.vm.$options.methods.getViewport.call(wrapper.vm);
expect(vp.x).toBeGreaterThan(0);
expect(vp.y).toBeGreaterThan(0);
});
it("menuActions marks Download action visible when allowed", () => {
const wrapper = mountLightbox();
const ctx = {
$gettext: VTUConfig.global.mocks.$gettext,
$pgettext: VTUConfig.global.mocks.$pgettext,
// minimal state needed by menuActions visibility checks
canManageAlbums: false,
canArchive: false,
canDownload: true,
collection: null,
context: "",
model: {},
};
const actions = wrapper.vm.$options.methods.menuActions.call(ctx);
const download = actions.find((a) => a?.name === "download");
expect(download).toBeTruthy();
expect(download.visible).toBe(true);
});
});

View file

@ -26,10 +26,48 @@ const vuetify = createVuetify({
// Configure Vue Test Utils global configuration
config.global.mocks = {
$gettext: (text) => text,
$pgettext: (_ctx, text) => text,
$isRtl: false,
$config: {
feature: (_name) => true,
feature: () => true,
get: () => false,
getSettings: () => ({ features: { edit: true, favorites: true, download: true, archive: true } }),
allow: () => true,
featExperimental: () => false,
featDevelop: () => false,
values: {},
dir: () => "ltr",
},
$event: {
subscribe: () => "sub-id",
subscribeOnce: () => "sub-id-once",
unsubscribe: () => {},
publish: () => {},
},
$view: {
enter: () => {},
leave: () => {},
isActive: () => true,
},
$notify: { success: () => {}, error: () => {}, warn: () => {} },
$fullscreen: {
isSupported: () => true,
isEnabled: () => false,
request: () => Promise.resolve(),
exit: () => Promise.resolve(),
},
$clipboard: { selection: [], has: () => false, toggle: () => {} },
$util: {
hasTouch: () => false,
encodeHTML: (s) => s,
sanitizeHtml: (s) => s,
formatSeconds: (n) => String(n),
formatRemainingSeconds: () => "0",
videoFormat: () => "avc",
videoFormatUrl: () => "/v.mp4",
thumb: () => ({ src: "/t.jpg", w: 100, h: 100 }),
},
$api: { post: vi.fn(), delete: vi.fn(), get: vi.fn() },
};
config.global.plugins = [vuetify];

12
go.mod
View file

@ -14,7 +14,7 @@ require (
github.com/esimov/pigo v1.4.6
github.com/gin-contrib/gzip v1.2.3
github.com/gin-gonic/gin v1.11.0
github.com/golang/geo v0.0.0-20251110120158-2d428c1fd7a2
github.com/golang/geo v0.0.0-20251111181513-e7f3a1a58fb3
github.com/google/open-location-code/go v0.0.0-20250620134813-83986da0156b
github.com/gorilla/websocket v1.5.3
github.com/gosimple/slug v1.15.0
@ -38,15 +38,15 @@ require (
github.com/tidwall/gjson v1.18.0
github.com/ulule/deepcopier v0.0.0-20200430083143-45decc6639b6
go4.org v0.0.0-20230225012048-214862532bf5 // indirect
golang.org/x/crypto v0.43.0
golang.org/x/net v0.46.0
golang.org/x/crypto v0.44.0
golang.org/x/net v0.47.0
gonum.org/v1/gonum v0.16.0
gopkg.in/yaml.v2 v2.4.0
)
require (
github.com/go-xmlfmt/xmlfmt v1.1.3 // indirect
golang.org/x/image v0.32.0
golang.org/x/image v0.33.0
)
require github.com/olekukonko/tablewriter v1.1.0
@ -71,7 +71,7 @@ require (
require github.com/dustinkirkland/golang-petname v0.0.0-20240428194347-eebcea082ee0
require golang.org/x/text v0.30.0
require golang.org/x/text v0.31.0
require (
github.com/IGLOU-EU/go-wildcard v1.0.3
@ -89,7 +89,7 @@ require (
github.com/wamuir/graft v0.10.0
github.com/yalue/onnxruntime_go v1.22.0
github.com/zitadel/oidc/v3 v3.45.0
golang.org/x/mod v0.29.0
golang.org/x/mod v0.30.0
golang.org/x/sys v0.38.0
google.golang.org/protobuf v1.36.10
gorm.io/driver/mysql v1.5.7

24
go.sum
View file

@ -198,8 +198,8 @@ github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGw
github.com/golang/geo v0.0.0-20190916061304-5b978397cfec/go.mod h1:QZ0nwyI2jOfgRAoBvP+ab5aRr7c9x7lhGEJrKvBwjWI=
github.com/golang/geo v0.0.0-20200319012246-673a6f80352d/go.mod h1:QZ0nwyI2jOfgRAoBvP+ab5aRr7c9x7lhGEJrKvBwjWI=
github.com/golang/geo v0.0.0-20210211234256-740aa86cb551/go.mod h1:QZ0nwyI2jOfgRAoBvP+ab5aRr7c9x7lhGEJrKvBwjWI=
github.com/golang/geo v0.0.0-20251110120158-2d428c1fd7a2 h1:y32Bz5DExjF7HJwv9PIr4xM34xYm7Y0FzFtk4iGBOTo=
github.com/golang/geo v0.0.0-20251110120158-2d428c1fd7a2/go.mod h1:Mymr9kRGDc64JPr03TSZmuIBODZ3KyswLzm1xL0HFA8=
github.com/golang/geo v0.0.0-20251111181513-e7f3a1a58fb3 h1:PO47XJrekjtVhITkwfywMBMbKW2WNU49y9LmqvDzwIc=
github.com/golang/geo v0.0.0-20251111181513-e7f3a1a58fb3/go.mod h1:Mymr9kRGDc64JPr03TSZmuIBODZ3KyswLzm1xL0HFA8=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
@ -462,8 +462,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
golang.org/x/crypto v0.44.0 h1:A97SsFvM3AIwEEmTBiaxPPTYpDC47w720rdiiUvgoAU=
golang.org/x/crypto v0.44.0/go.mod h1:013i+Nw79BMiQiMsOPcVCB5ZIJbYkerPrGnOa00tvmc=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@ -478,8 +478,8 @@ golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+o
golang.org/x/image v0.0.0-20200927104501-e162460cd6b5/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/image v0.0.0-20220902085622-e7cb96979f69/go.mod h1:doUCurBvlfPMKfmIpRIywoHmhN3VyhnoFDbvIEWF4hY=
golang.org/x/image v0.18.0/go.mod h1:4yyo5vMFQjVjUcVk4jEQcU9MGy/rulF5WvUILseCM2E=
golang.org/x/image v0.32.0 h1:6lZQWq75h7L5IWNk0r+SCpUJ6tUVd3v4ZHnbRKLkUDQ=
golang.org/x/image v0.32.0/go.mod h1:/R37rrQmKXtO6tYXAjtDLwQgFLHmhW+V6ayXlxzP2Pc=
golang.org/x/image v0.33.0 h1:LXRZRnv1+zGd5XBUVRFmYEphyyKJjQjCRiOuAP3sZfQ=
golang.org/x/image v0.33.0/go.mod h1:DD3OsTYT9chzuzTQt+zMcOlBHgfoKQb1gry8p76Y1sc=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
@ -500,8 +500,8 @@ golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk=
golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@ -530,8 +530,8 @@ golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@ -608,8 +608,8 @@ golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=

View file

@ -9,6 +9,9 @@ import (
"io"
"net/http"
"github.com/sirupsen/logrus"
"github.com/photoprism/photoprism/internal/ai/vision/ollama"
"github.com/photoprism/photoprism/pkg/clean"
"github.com/photoprism/photoprism/pkg/http/header"
)
@ -69,6 +72,10 @@ func PerformApiRequest(apiRequest *ApiRequest, uri, method, key string) (apiResp
return nil, parseErr
}
if log.IsLevelEnabled(logrus.TraceLevel) {
log.Tracef("vision: response %s", string(body))
}
return parsed, nil
}
@ -89,12 +96,12 @@ func PerformApiRequest(apiRequest *ApiRequest, uri, method, key string) (apiResp
return apiResponse, nil
}
func decodeOllamaResponse(data []byte) (*ApiResponseOllama, error) {
resp := &ApiResponseOllama{}
func decodeOllamaResponse(data []byte) (*ollama.Response, error) {
resp := &ollama.Response{}
dec := json.NewDecoder(bytes.NewReader(data))
for {
var chunk ApiResponseOllama
var chunk ollama.Response
if err := dec.Decode(&chunk); err != nil {
if errors.Is(err, io.EOF) {
break

View file

@ -8,6 +8,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/photoprism/photoprism/internal/ai/vision/ollama"
"github.com/photoprism/photoprism/pkg/http/scheme"
)
@ -49,7 +50,7 @@ func TestPerformApiRequestOllama(t *testing.T) {
var req ApiRequest
assert.NoError(t, json.NewDecoder(r.Body).Decode(&req))
assert.Equal(t, FormatJSON, req.Format)
assert.NoError(t, json.NewEncoder(w).Encode(ApiResponseOllama{
assert.NoError(t, json.NewEncoder(w).Encode(ollama.Response{
Model: "qwen2.5vl:latest",
Response: `{"labels":[{"name":"test","confidence":0.9,"topicality":0.8}]}`,
}))
@ -72,7 +73,7 @@ func TestPerformApiRequestOllama(t *testing.T) {
})
t.Run("LabelsWithCodeFence", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
assert.NoError(t, json.NewEncoder(w).Encode(ApiResponseOllama{
assert.NoError(t, json.NewEncoder(w).Encode(ollama.Response{
Model: "gemma3:latest",
Response: "```json\n{\"labels\":[{\"name\":\"lingerie\",\"confidence\":0.81,\"topicality\":0.73}]}\n```\nThe model provided additional commentary.",
}))
@ -95,7 +96,7 @@ func TestPerformApiRequestOllama(t *testing.T) {
})
t.Run("CaptionFallback", func(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
assert.NoError(t, json.NewEncoder(w).Encode(ApiResponseOllama{
assert.NoError(t, json.NewEncoder(w).Encode(ollama.Response{
Model: "qwen2.5vl:latest",
Response: "plain text",
}))

View file

@ -1,10 +1,8 @@
package vision
import (
"errors"
"fmt"
"os"
"time"
"github.com/photoprism/photoprism/pkg/clean"
"github.com/photoprism/photoprism/pkg/http/scheme"
@ -12,53 +10,6 @@ import (
"github.com/photoprism/photoprism/pkg/rnd"
)
// ApiResponseOllama represents a Ollama API service response.
type ApiResponseOllama struct {
Id string `yaml:"Id,omitempty" json:"id,omitempty"`
Code int `yaml:"Code,omitempty" json:"code,omitempty"`
Error string `yaml:"Error,omitempty" json:"error,omitempty"`
Model string `yaml:"Model,omitempty" json:"model,omitempty"`
CreatedAt time.Time `yaml:"CreatedAt,omitempty" json:"created_at,omitempty"`
Response string `yaml:"Response,omitempty" json:"response,omitempty"`
Done bool `yaml:"Done,omitempty" json:"done,omitempty"`
Context []int `yaml:"Context,omitempty" json:"context,omitempty"`
TotalDuration int64 `yaml:"TotalDuration,omitempty" json:"total_duration,omitempty"`
LoadDuration int `yaml:"LoadDuration,omitempty" json:"load_duration,omitempty"`
PromptEvalCount int `yaml:"PromptEvalCount,omitempty" json:"prompt_eval_count,omitempty"`
PromptEvalDuration int `yaml:"PromptEvalDuration,omitempty" json:"prompt_eval_duration,omitempty"`
EvalCount int `yaml:"EvalCount,omitempty" json:"eval_count,omitempty"`
EvalDuration int64 `yaml:"EvalDuration,omitempty" json:"eval_duration,omitempty"`
Result ApiResult `yaml:"Result,omitempty" json:"result,omitempty"`
}
// Err returns an error if the request has failed.
func (r *ApiResponseOllama) Err() error {
if r == nil {
return errors.New("response is nil")
}
if r.Code >= 400 {
if r.Error != "" {
return errors.New(r.Error)
}
return fmt.Errorf("error %d", r.Code)
} else if r.Result.IsEmpty() {
return errors.New("no result")
}
return nil
}
// HasResult checks if there is at least one result in the response data.
func (r *ApiResponseOllama) HasResult() bool {
if r == nil {
return false
}
return !r.Result.IsEmpty()
}
// NewApiRequestOllama returns a new Ollama API request with the specified images as payload.
func NewApiRequestOllama(images Files, fileScheme scheme.Type) (*ApiRequest, error) {
imagesData := make(Files, len(images))

View file

@ -11,6 +11,8 @@ import (
"github.com/sirupsen/logrus"
"github.com/photoprism/photoprism/internal/ai/vision/openai"
"github.com/photoprism/photoprism/internal/ai/vision/schema"
"github.com/photoprism/photoprism/internal/api/download"
"github.com/photoprism/photoprism/pkg/clean"
"github.com/photoprism/photoprism/pkg/fs"
@ -58,6 +60,11 @@ type ApiRequestOptions struct {
UseMmap bool `yaml:"UseMmap,omitempty" json:"use_mmap,omitempty"`
UseMlock bool `yaml:"UseMlock,omitempty" json:"use_mlock,omitempty"`
NumThread int `yaml:"NumThread,omitempty" json:"num_thread,omitempty"`
MaxOutputTokens int `yaml:"MaxOutputTokens,omitempty" json:"max_output_tokens,omitempty"`
Detail string `yaml:"Detail,omitempty" json:"detail,omitempty"`
ForceJson bool `yaml:"ForceJson,omitempty" json:"force_json,omitempty"`
SchemaVersion string `yaml:"SchemaVersion,omitempty" json:"schema_version,omitempty"`
CombineOutputs string `yaml:"CombineOutputs,omitempty" json:"combine_outputs,omitempty"`
}
// ApiRequestContext represents a context parameter returned from a previous request.
@ -77,6 +84,7 @@ type ApiRequest struct {
Context *ApiRequestContext `form:"context" yaml:"Context,omitempty" json:"context,omitempty"`
Stream bool `form:"stream" yaml:"Stream,omitempty" json:"stream"`
Images Files `form:"images" yaml:"Images,omitempty" json:"images,omitempty"`
Schema json.RawMessage `form:"schema" yaml:"Schema,omitempty" json:"schema,omitempty"`
ResponseFormat ApiFormat `form:"-" yaml:"-" json:"-"`
}
@ -195,6 +203,14 @@ func (r *ApiRequest) GetResponseFormat() ApiFormat {
// JSON returns the request data as JSON-encoded bytes.
func (r *ApiRequest) JSON() ([]byte, error) {
if r == nil {
return nil, errors.New("api request is nil")
}
if r.ResponseFormat == ApiFormatOpenAI {
return r.openAIJSON()
}
return json.Marshal(*r)
}
@ -229,6 +245,8 @@ func (r *ApiRequest) sanitizedForLog() ApiRequest {
sanitized.Url = sanitizeLogPayload(r.Url)
sanitized.Schema = r.Schema
return sanitized
}
@ -287,3 +305,134 @@ func isLikelyBase64(value string) bool {
return true
}
// openAIJSON converts the request data into an OpenAI Responses API payload.
func (r *ApiRequest) openAIJSON() ([]byte, error) {
detail := openai.DefaultDetail
if opts := r.Options; opts != nil && strings.TrimSpace(opts.Detail) != "" {
detail = strings.TrimSpace(opts.Detail)
}
messages := make([]openai.InputMessage, 0, 2)
if system := strings.TrimSpace(r.System); system != "" {
messages = append(messages, openai.InputMessage{
Role: "system",
Type: "message",
Content: []openai.ContentItem{
{
Type: openai.ContentTypeText,
Text: system,
},
},
})
}
userContent := make([]openai.ContentItem, 0, len(r.Images)+1)
if prompt := strings.TrimSpace(r.Prompt); prompt != "" {
userContent = append(userContent, openai.ContentItem{
Type: openai.ContentTypeText,
Text: prompt,
})
}
for _, img := range r.Images {
if img == "" {
continue
}
userContent = append(userContent, openai.ContentItem{
Type: openai.ContentTypeImage,
ImageURL: img,
Detail: detail,
})
}
if len(userContent) > 0 {
messages = append(messages, openai.InputMessage{
Role: "user",
Type: "message",
Content: userContent,
})
}
if len(messages) == 0 {
return nil, errors.New("openai request requires at least one message")
}
payload := openai.HTTPRequest{
Model: strings.TrimSpace(r.Model),
Input: messages,
}
if payload.Model == "" {
payload.Model = openai.DefaultModel
}
if strings.HasPrefix(strings.ToLower(payload.Model), "gpt-5") {
payload.Reasoning = &openai.Reasoning{Effort: "low"}
}
if opts := r.Options; opts != nil {
if opts.MaxOutputTokens > 0 {
payload.MaxOutputTokens = opts.MaxOutputTokens
}
if opts.Temperature > 0 {
payload.Temperature = opts.Temperature
}
if opts.TopP > 0 {
payload.TopP = opts.TopP
}
if opts.PresencePenalty != 0 {
payload.PresencePenalty = opts.PresencePenalty
}
if opts.FrequencyPenalty != 0 {
payload.FrequencyPenalty = opts.FrequencyPenalty
}
}
if format := buildOpenAIResponseFormat(r); format != nil {
payload.Text = &openai.TextOptions{
Format: format,
}
}
return json.Marshal(payload)
}
// buildOpenAIResponseFormat determines which response_format to send to OpenAI.
func buildOpenAIResponseFormat(r *ApiRequest) *openai.ResponseFormat {
if r == nil {
return nil
}
opts := r.Options
hasSchema := len(r.Schema) > 0
if !hasSchema && (opts == nil || !opts.ForceJson) {
return nil
}
result := &openai.ResponseFormat{}
if hasSchema {
result.Type = openai.ResponseFormatJSONSchema
result.Schema = r.Schema
if opts != nil && strings.TrimSpace(opts.SchemaVersion) != "" {
result.Name = strings.TrimSpace(opts.SchemaVersion)
} else {
result.Name = schema.JsonSchemaName(r.Schema, openai.DefaultSchemaVersion)
}
} else {
result.Type = openai.ResponseFormatJSONObject
}
return result
}

View file

@ -53,7 +53,11 @@ func captionInternal(images Files, mediaSrc media.Src) (result *CaptionResult, m
apiRequest.System = model.GetSystemPrompt()
apiRequest.Prompt = model.GetPrompt()
apiRequest.Options = model.GetOptions()
if apiRequest.Options == nil {
apiRequest.Options = model.GetOptions()
}
apiRequest.WriteLog()
if apiResponse, err = PerformApiRequest(apiRequest, uri, method, model.EndpointKey()); err != nil {

View file

@ -58,14 +58,15 @@ func init() {
RegisterEngineAlias(EngineVision, EngineInfo{
RequestFormat: ApiFormatVision,
ResponseFormat: ApiFormatVision,
FileScheme: string(scheme.Data),
FileScheme: scheme.Data,
DefaultResolution: DefaultResolution,
})
RegisterEngineAlias(openai.EngineName, EngineInfo{
Uri: "https://api.openai.com/v1/responses",
RequestFormat: ApiFormatOpenAI,
ResponseFormat: ApiFormatOpenAI,
FileScheme: string(scheme.Data),
FileScheme: scheme.Data,
DefaultResolution: openai.DefaultResolution,
})
}
@ -79,6 +80,7 @@ func RegisterEngine(format ApiFormat, engine Engine) {
// EngineInfo describes metadata that can be associated with an engine alias.
type EngineInfo struct {
Uri string
RequestFormat ApiFormat
ResponseFormat ApiFormat
FileScheme string

View file

@ -28,7 +28,7 @@ func init() {
RegisterEngineAlias(ollama.EngineName, EngineInfo{
RequestFormat: ApiFormatOllama,
ResponseFormat: ApiFormatOllama,
FileScheme: string(scheme.Base64),
FileScheme: scheme.Base64,
DefaultResolution: ollama.DefaultResolution,
})
@ -72,7 +72,7 @@ func (ollamaDefaults) SchemaTemplate(model *Model) string {
switch model.Type {
case ModelTypeLabels:
return ollama.LabelsSchema(model.PromptContains("nsfw"))
return ollama.SchemaLabels(model.PromptContains("nsfw"))
}
return ""
@ -134,64 +134,99 @@ func (ollamaParser) Parse(ctx context.Context, req *ApiRequest, raw []byte, stat
return nil, err
}
result := &ApiResponse{
response := &ApiResponse{
Id: req.GetId(),
Code: status,
Model: &Model{Name: ollamaResp.Model},
Result: ApiResult{
Labels: append([]LabelResult{}, ollamaResp.Result.Labels...),
Caption: func() *CaptionResult {
if ollamaResp.Result.Caption != nil {
copyCaption := *ollamaResp.Result.Caption
return &copyCaption
}
return nil
}(),
Labels: convertOllamaLabels(ollamaResp.Result.Labels),
Caption: convertOllamaCaption(ollamaResp.Result.Caption),
},
}
parsedLabels := len(result.Result.Labels) > 0
parsedLabels := len(response.Result.Labels) > 0
if !parsedLabels && strings.TrimSpace(ollamaResp.Response) != "" && req.Format == FormatJSON {
if labels, parseErr := parseOllamaLabels(ollamaResp.Response); parseErr != nil {
log.Debugf("vision: %s (parse ollama labels)", clean.Error(parseErr))
// Qwen3-VL models stream their JSON payload in the "Thinking" field.
fallbackJSON := strings.TrimSpace(ollamaResp.Response)
if fallbackJSON == "" {
fallbackJSON = strings.TrimSpace(ollamaResp.Thinking)
}
if !parsedLabels && fallbackJSON != "" && (req.Format == FormatJSON || strings.HasPrefix(fallbackJSON, "{")) {
if labels, parseErr := parseOllamaLabels(fallbackJSON); parseErr != nil {
log.Warnf("vision: %s (parse ollama labels)", clean.Error(parseErr))
} else if len(labels) > 0 {
result.Result.Labels = append(result.Result.Labels, labels...)
response.Result.Labels = append(response.Result.Labels, labels...)
parsedLabels = true
}
}
if parsedLabels {
filtered := result.Result.Labels[:0]
for i := range result.Result.Labels {
if result.Result.Labels[i].Confidence <= 0 {
result.Result.Labels[i].Confidence = ollama.LabelConfidenceDefault
filtered := response.Result.Labels[:0]
for i := range response.Result.Labels {
if response.Result.Labels[i].Confidence <= 0 {
response.Result.Labels[i].Confidence = ollama.LabelConfidenceDefault
}
if result.Result.Labels[i].Topicality <= 0 {
result.Result.Labels[i].Topicality = result.Result.Labels[i].Confidence
if response.Result.Labels[i].Topicality <= 0 {
response.Result.Labels[i].Topicality = response.Result.Labels[i].Confidence
}
// Apply thresholds and canonicalize the name.
normalizeLabelResult(&result.Result.Labels[i])
normalizeLabelResult(&response.Result.Labels[i])
if result.Result.Labels[i].Name == "" {
if response.Result.Labels[i].Name == "" {
continue
}
if result.Result.Labels[i].Source == "" {
result.Result.Labels[i].Source = entity.SrcOllama
if response.Result.Labels[i].Source == "" {
response.Result.Labels[i].Source = entity.SrcOllama
}
filtered = append(filtered, result.Result.Labels[i])
filtered = append(filtered, response.Result.Labels[i])
}
result.Result.Labels = filtered
response.Result.Labels = filtered
} else if caption := strings.TrimSpace(ollamaResp.Response); caption != "" {
result.Result.Caption = &CaptionResult{
response.Result.Caption = &CaptionResult{
Text: caption,
Source: entity.SrcOllama,
}
}
return result, nil
return response, nil
}
func convertOllamaLabels(payload []ollama.LabelPayload) []LabelResult {
if len(payload) == 0 {
return nil
}
labels := make([]LabelResult, len(payload))
for i := range payload {
labels[i] = LabelResult{
Name: payload[i].Name,
Source: payload[i].Source,
Priority: payload[i].Priority,
Confidence: payload[i].Confidence,
Topicality: payload[i].Topicality,
Categories: payload[i].Categories,
NSFW: payload[i].NSFW,
NSFWConfidence: payload[i].NSFWConfidence,
}
}
return labels
}
func convertOllamaCaption(payload *ollama.CaptionPayload) *CaptionResult {
if payload == nil {
return nil
}
return &CaptionResult{
Text: payload.Text,
Source: payload.Source,
Confidence: payload.Confidence,
}
}

View file

@ -10,9 +10,9 @@ import (
func TestOllamaDefaultConfidenceApplied(t *testing.T) {
req := &ApiRequest{Format: FormatJSON}
payload := ApiResponseOllama{
Result: ApiResult{
Labels: []LabelResult{{Name: "forest path", Confidence: 0, Topicality: 0}},
payload := ollama.Response{
Result: ollama.ResultPayload{
Labels: []ollama.LabelPayload{{Name: "forest path", Confidence: 0, Topicality: 0}},
},
}
raw, err := json.Marshal(payload)
@ -37,3 +37,46 @@ func TestOllamaDefaultConfidenceApplied(t *testing.T) {
t.Fatalf("expected topicality to default to confidence, got %.2f", resp.Result.Labels[0].Topicality)
}
}
func TestOllamaParserFallbacks(t *testing.T) {
t.Run("ThinkingFieldJSON", func(t *testing.T) {
req := &ApiRequest{Format: FormatJSON}
payload := ollama.Response{
Thinking: `{"labels":[{"name":"cat","confidence":0.9,"topicality":0.8}]}`,
}
raw, err := json.Marshal(payload)
if err != nil {
t.Fatalf("marshal: %v", err)
}
parser := ollamaParser{}
resp, err := parser.Parse(context.Background(), req, raw, 200)
if err != nil {
t.Fatalf("parse failed: %v", err)
}
if len(resp.Result.Labels) != 1 || resp.Result.Labels[0].Name != "Cat" {
t.Fatalf("expected cat label, got %+v", resp.Result.Labels)
}
})
t.Run("JsonPrefixedResponse", func(t *testing.T) {
req := &ApiRequest{} // no explicit format
payload := ollama.Response{
Response: `{"labels":[{"name":"cat","confidence":0.91,"topicality":0.81}]}`,
}
raw, err := json.Marshal(payload)
if err != nil {
t.Fatalf("marshal: %v", err)
}
parser := ollamaParser{}
resp, err := parser.Parse(context.Background(), req, raw, 200)
if err != nil {
t.Fatalf("parse failed: %v", err)
}
if len(resp.Result.Labels) != 1 || resp.Result.Labels[0].Name != "Cat" {
t.Fatalf("expected cat label, got %+v", resp.Result.Labels)
}
})
}

View file

@ -1,18 +1,342 @@
package vision
import (
"context"
"encoding/json"
"errors"
"fmt"
"strings"
"github.com/photoprism/photoprism/internal/ai/vision/openai"
"github.com/photoprism/photoprism/internal/entity"
"github.com/photoprism/photoprism/pkg/clean"
"github.com/photoprism/photoprism/pkg/http/scheme"
)
// init registers the OpenAI engine alias so models can set Engine: "openai"
// and inherit sensible defaults (request/response formats, file scheme, and
// preferred thumbnail resolution).
// openaiDefaults provides canned prompts, schema templates, and options for OpenAI engines.
type openaiDefaults struct{}
// openaiBuilder prepares ApiRequest objects for OpenAI's Responses API.
type openaiBuilder struct{}
// openaiParser converts Responses API payloads into ApiResponse instances.
type openaiParser struct{}
func init() {
RegisterEngineAlias(openai.EngineName, EngineInfo{
RequestFormat: ApiFormatOpenAI,
ResponseFormat: ApiFormatOpenAI,
FileScheme: string(scheme.Base64),
DefaultResolution: openai.DefaultResolution,
RegisterEngine(ApiFormatOpenAI, Engine{
Builder: openaiBuilder{},
Parser: openaiParser{},
Defaults: openaiDefaults{},
})
}
// SystemPrompt returns the default OpenAI system prompt for the specified model type.
func (openaiDefaults) SystemPrompt(model *Model) string {
if model == nil {
return ""
}
switch model.Type {
case ModelTypeCaption:
return openai.CaptionSystem
case ModelTypeLabels:
return openai.LabelSystem
default:
return ""
}
}
// UserPrompt returns the default OpenAI user prompt for the specified model type.
func (openaiDefaults) UserPrompt(model *Model) string {
if model == nil {
return ""
}
switch model.Type {
case ModelTypeCaption:
return openai.CaptionPrompt
case ModelTypeLabels:
if DetectNSFWLabels {
return openai.LabelPromptNSFW
}
return openai.LabelPromptDefault
default:
return ""
}
}
// SchemaTemplate returns the JSON schema template for the model, if applicable.
func (openaiDefaults) SchemaTemplate(model *Model) string {
if model == nil {
return ""
}
switch model.Type {
case ModelTypeLabels:
return string(openai.SchemaLabels(model.PromptContains("nsfw")))
default:
return ""
}
}
// Options returns default OpenAI request options for the model.
func (openaiDefaults) Options(model *Model) *ApiRequestOptions {
if model == nil {
return nil
}
switch model.Type {
case ModelTypeCaption:
/*
Options:
Detail: low
MaxOutputTokens: 512
Temperature: 0.1
TopP: 0.9
(Sampling values are zeroed for GPT-5 models in openaiBuilder.Build.)
*/
return &ApiRequestOptions{
Detail: openai.DefaultDetail,
MaxOutputTokens: openai.CaptionMaxTokens,
Temperature: openai.DefaultTemperature,
TopP: openai.DefaultTopP,
}
case ModelTypeLabels:
/*
Options:
Detail: low
MaxOutputTokens: 1024
Temperature: 0.1
ForceJson: true
SchemaVersion: "photoprism_vision_labels_v1"
(Sampling values are zeroed for GPT-5 models in openaiBuilder.Build.)
*/
return &ApiRequestOptions{
Detail: openai.DefaultDetail,
MaxOutputTokens: openai.LabelsMaxTokens,
Temperature: openai.DefaultTemperature,
TopP: openai.DefaultTopP,
ForceJson: true,
}
default:
return nil
}
}
// Build constructs an OpenAI request payload using base64-encoded thumbnails.
func (openaiBuilder) Build(ctx context.Context, model *Model, files Files) (*ApiRequest, error) {
if model == nil {
return nil, ErrInvalidModel
}
dataReq, err := NewApiRequestImages(files, scheme.Data)
if err != nil {
return nil, err
}
req := &ApiRequest{
Id: dataReq.Id,
Images: append(Files(nil), dataReq.Images...),
ResponseFormat: ApiFormatOpenAI,
}
if opts := model.GetOptions(); opts != nil {
req.Options = cloneOptions(opts)
if model.Type == ModelTypeCaption {
// Captions default to plain text responses; structured JSON is optional.
req.Options.ForceJson = false
if req.Options.MaxOutputTokens < openai.CaptionMaxTokens {
req.Options.MaxOutputTokens = openai.CaptionMaxTokens
}
} else if model.Type == ModelTypeLabels {
if req.Options.MaxOutputTokens < openai.LabelsMaxTokens {
req.Options.MaxOutputTokens = openai.LabelsMaxTokens
}
}
if strings.HasPrefix(strings.ToLower(strings.TrimSpace(model.Name)), "gpt-5") {
req.Options.Temperature = 0
req.Options.TopP = 0
}
}
if schema := strings.TrimSpace(model.SchemaTemplate()); schema != "" {
if raw, parseErr := parseOpenAISchema(schema); parseErr != nil {
log.Warnf("vision: failed to parse OpenAI schema template (%s)", clean.Error(parseErr))
} else {
req.Schema = raw
}
}
return req, nil
}
// Parse converts an OpenAI Responses API payload into the internal ApiResponse representation.
func (openaiParser) Parse(ctx context.Context, req *ApiRequest, raw []byte, status int) (*ApiResponse, error) {
if status >= 300 {
if msg := openai.ParseErrorMessage(raw); msg != "" {
return nil, fmt.Errorf("openai: %s", msg)
}
return nil, fmt.Errorf("openai: status %d", status)
}
var resp openai.Response
if err := json.Unmarshal(raw, &resp); err != nil {
return nil, err
}
if resp.Error != nil && resp.Error.Message != "" {
return nil, errors.New(resp.Error.Message)
}
result := ApiResult{}
if jsonPayload := resp.FirstJSON(); len(jsonPayload) > 0 {
if err := populateOpenAIJSONResult(&result, jsonPayload); err != nil {
log.Debugf("vision: %s (parse openai json payload)", clean.Error(err))
}
}
if result.Caption == nil {
if text := resp.FirstText(); text != "" {
trimmed := strings.TrimSpace(text)
var parsedJSON bool
if len(trimmed) > 0 && (trimmed[0] == '{' || trimmed[0] == '[') {
if err := populateOpenAIJSONResult(&result, json.RawMessage(trimmed)); err != nil {
log.Debugf("vision: %s (parse openai json text payload)", clean.Error(err))
} else {
parsedJSON = true
}
}
if !parsedJSON && trimmed != "" {
result.Caption = &CaptionResult{
Text: trimmed,
Source: entity.SrcOpenAI,
}
}
}
}
var responseID string
if req != nil {
responseID = req.GetId()
}
modelName := strings.TrimSpace(resp.Model)
if modelName == "" && req != nil {
modelName = strings.TrimSpace(req.Model)
}
return &ApiResponse{
Id: responseID,
Code: status,
Model: &Model{Name: modelName},
Result: result,
}, nil
}
// parseOpenAISchema validates the provided JSON schema and returns it as a raw message.
func parseOpenAISchema(schema string) (json.RawMessage, error) {
var raw json.RawMessage
if err := json.Unmarshal([]byte(schema), &raw); err != nil {
return nil, err
}
return normalizeOpenAISchema(raw)
}
// normalizeOpenAISchema upgrades legacy label schema definitions so they comply with
// OpenAI's json_schema format requirements.
func normalizeOpenAISchema(raw json.RawMessage) (json.RawMessage, error) {
if len(raw) == 0 {
return raw, nil
}
var doc map[string]any
if err := json.Unmarshal(raw, &doc); err != nil {
// Fallback to the original payload if it isn't a JSON object.
return raw, nil
}
if t, ok := doc["type"]; ok {
if typeStr, ok := t.(string); ok && strings.TrimSpace(typeStr) != "" {
return raw, nil
}
}
if _, ok := doc["properties"]; ok {
return raw, nil
}
labels, ok := doc["labels"]
if !ok {
return raw, nil
}
nsfw := false
if items, ok := labels.([]any); ok && len(items) > 0 {
if first, ok := items[0].(map[string]any); ok {
if _, hasNSFW := first["nsfw"]; hasNSFW {
nsfw = true
}
if _, hasNSFWConfidence := first["nsfw_confidence"]; hasNSFWConfidence {
nsfw = true
}
}
}
return openai.SchemaLabels(nsfw), nil
}
// populateOpenAIJSONResult unmarshals a structured OpenAI response into ApiResult fields.
func populateOpenAIJSONResult(result *ApiResult, payload json.RawMessage) error {
if result == nil || len(payload) == 0 {
return nil
}
var envelope struct {
Caption *struct {
Text string `json:"text"`
Confidence float32 `json:"confidence"`
} `json:"caption"`
Labels []LabelResult `json:"labels"`
}
if err := json.Unmarshal(payload, &envelope); err != nil {
return err
}
if envelope.Caption != nil {
text := strings.TrimSpace(envelope.Caption.Text)
if text != "" {
result.Caption = &CaptionResult{
Text: text,
Confidence: envelope.Caption.Confidence,
Source: entity.SrcOpenAI,
}
}
}
if len(envelope.Labels) > 0 {
filtered := envelope.Labels[:0]
for i := range envelope.Labels {
if envelope.Labels[i].Source == "" {
envelope.Labels[i].Source = entity.SrcOpenAI
}
normalizeLabelResult(&envelope.Labels[i])
if envelope.Labels[i].Name == "" {
continue
}
filtered = append(filtered, envelope.Labels[i])
}
result.Labels = append(result.Labels, filtered...)
}
return nil
}

View file

@ -0,0 +1,337 @@
package vision
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/photoprism/photoprism/internal/ai/vision/openai"
"github.com/photoprism/photoprism/internal/ai/vision/schema"
"github.com/photoprism/photoprism/internal/entity"
)
func TestOpenAIBuilderBuild(t *testing.T) {
model := &Model{
Type: ModelTypeLabels,
Name: openai.DefaultModel,
Engine: openai.EngineName,
}
model.ApplyEngineDefaults()
request, err := openaiBuilder{}.Build(context.Background(), model, Files{examplesPath + "/chameleon_lime.jpg"})
require.NoError(t, err)
require.NotNil(t, request)
assert.Equal(t, ApiFormatOpenAI, request.ResponseFormat)
assert.NotEmpty(t, request.Images)
assert.NotNil(t, request.Options)
assert.Equal(t, openai.DefaultDetail, request.Options.Detail)
assert.True(t, request.Options.ForceJson)
assert.GreaterOrEqual(t, request.Options.MaxOutputTokens, openai.LabelsMaxTokens)
}
func TestOpenAIBuilderBuildCaptionDisablesForceJSON(t *testing.T) {
model := &Model{
Type: ModelTypeCaption,
Name: openai.DefaultModel,
Engine: openai.EngineName,
Options: &ApiRequestOptions{ForceJson: true},
}
model.ApplyEngineDefaults()
request, err := openaiBuilder{}.Build(context.Background(), model, Files{examplesPath + "/chameleon_lime.jpg"})
require.NoError(t, err)
require.NotNil(t, request)
require.NotNil(t, request.Options)
assert.False(t, request.Options.ForceJson)
assert.GreaterOrEqual(t, request.Options.MaxOutputTokens, openai.CaptionMaxTokens)
}
func TestApiRequestJSONForOpenAI(t *testing.T) {
req := &ApiRequest{
Model: "gpt-5-mini",
System: "system",
Prompt: "describe the scene",
Images: []string{"data:image/jpeg;base64,AA=="},
ResponseFormat: ApiFormatOpenAI,
Options: &ApiRequestOptions{
Detail: openai.DefaultDetail,
MaxOutputTokens: 128,
Temperature: 0.2,
TopP: 0.8,
ForceJson: true,
},
Schema: json.RawMessage(`{"type":"object","properties":{"caption":{"type":"object"}}}`),
}
payload, err := req.JSON()
require.NoError(t, err)
var decoded struct {
Model string `json:"model"`
Input []struct {
Role string `json:"role"`
Content []struct {
Type string `json:"type"`
} `json:"content"`
} `json:"input"`
Text struct {
Format struct {
Type string `json:"type"`
Name string `json:"name"`
Schema json.RawMessage `json:"schema"`
Strict bool `json:"strict"`
} `json:"format"`
} `json:"text"`
Reasoning struct {
Effort string `json:"effort"`
} `json:"reasoning"`
MaxOutputTokens int `json:"max_output_tokens"`
}
require.NoError(t, json.Unmarshal(payload, &decoded))
assert.Equal(t, "gpt-5-mini", decoded.Model)
require.Len(t, decoded.Input, 2)
assert.Equal(t, "system", decoded.Input[0].Role)
assert.Equal(t, openai.ResponseFormatJSONSchema, decoded.Text.Format.Type)
assert.Equal(t, schema.JsonSchemaName(decoded.Text.Format.Schema, openai.DefaultSchemaVersion), decoded.Text.Format.Name)
assert.False(t, decoded.Text.Format.Strict)
assert.NotNil(t, decoded.Text.Format.Schema)
assert.Equal(t, "low", decoded.Reasoning.Effort)
assert.Equal(t, 128, decoded.MaxOutputTokens)
}
func TestApiRequestJSONForOpenAIDefaultSchemaName(t *testing.T) {
req := &ApiRequest{
Model: "gpt-5-mini",
Images: []string{"data:image/jpeg;base64,AA=="},
ResponseFormat: ApiFormatOpenAI,
Options: &ApiRequestOptions{
Detail: openai.DefaultDetail,
MaxOutputTokens: 64,
ForceJson: true,
},
Schema: json.RawMessage(`{"type":"object"}`),
}
payload, err := req.JSON()
require.NoError(t, err)
var decoded struct {
Text struct {
Format struct {
Name string `json:"name"`
} `json:"format"`
} `json:"text"`
}
require.NoError(t, json.Unmarshal(payload, &decoded))
assert.Equal(t, schema.JsonSchemaName(req.Schema, openai.DefaultSchemaVersion), decoded.Text.Format.Name)
}
func TestOpenAIParserParsesJSONFromTextPayload(t *testing.T) {
respPayload := `{
"id": "resp_123",
"model": "gpt-5-mini",
"output": [{
"role": "assistant",
"content": [{
"type": "output_text",
"text": "{\"labels\":[{\"name\":\"deer\",\"confidence\":0.98,\"topicality\":0.99}]}"
}]
}]
}`
req := &ApiRequest{
Id: "test",
Model: "gpt-5-mini",
ResponseFormat: ApiFormatOpenAI,
}
resp, err := openaiParser{}.Parse(context.Background(), req, []byte(respPayload), http.StatusOK)
require.NoError(t, err)
require.NotNil(t, resp)
require.Len(t, resp.Result.Labels, 1)
assert.Equal(t, "Deer", resp.Result.Labels[0].Name)
assert.Nil(t, resp.Result.Caption)
}
func TestParseOpenAISchemaLegacyUpgrade(t *testing.T) {
legacy := `{
"labels": [{
"name": "",
"confidence": 0,
"topicality": 0
}]
}`
raw, err := parseOpenAISchema(legacy)
require.NoError(t, err)
var decoded map[string]any
require.NoError(t, json.Unmarshal(raw, &decoded))
assert.Equal(t, "object", decoded["type"])
props, ok := decoded["properties"].(map[string]any)
require.True(t, ok)
labels, ok := props["labels"].(map[string]any)
require.True(t, ok)
assert.Equal(t, "array", labels["type"])
}
func TestParseOpenAISchemaLegacyUpgradeNSFW(t *testing.T) {
legacy := `{
"labels": [{
"name": "",
"confidence": 0,
"topicality": 0,
"nsfw": false,
"nsfw_confidence": 0
}]
}`
raw, err := parseOpenAISchema(legacy)
require.NoError(t, err)
var decoded map[string]any
require.NoError(t, json.Unmarshal(raw, &decoded))
props := decoded["properties"].(map[string]any)
labels := props["labels"].(map[string]any)
items := labels["items"].(map[string]any)
_, hasNSFW := items["properties"].(map[string]any)["nsfw"]
assert.True(t, hasNSFW)
}
func TestPerformApiRequestOpenAISuccess(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var reqPayload struct {
Model string `json:"model"`
}
assert.NoError(t, json.NewDecoder(r.Body).Decode(&reqPayload))
assert.Equal(t, "gpt-5-mini", reqPayload.Model)
response := map[string]any{
"id": "resp_123",
"model": "gpt-5-mini",
"output": []any{
map[string]any{
"role": "assistant",
"content": []any{
map[string]any{
"type": "output_json",
"json": map[string]any{
"caption": map[string]any{
"text": "A cat rests on a windowsill.",
"confidence": 0.91,
},
"labels": []map[string]any{
{
"name": "cat",
"confidence": 0.92,
"topicality": 0.88,
},
},
},
},
},
},
},
}
assert.NoError(t, json.NewEncoder(w).Encode(response))
}))
defer server.Close()
req := &ApiRequest{
Id: "test",
Model: "gpt-5-mini",
Images: []string{"data:image/jpeg;base64,AA=="},
ResponseFormat: ApiFormatOpenAI,
Options: &ApiRequestOptions{
Detail: openai.DefaultDetail,
},
Schema: json.RawMessage(`{"type":"object"}`),
}
resp, err := PerformApiRequest(req, server.URL, http.MethodPost, "secret")
require.NoError(t, err)
require.NotNil(t, resp)
require.NotNil(t, resp.Result.Caption)
assert.Equal(t, entity.SrcOpenAI, resp.Result.Caption.Source)
assert.Equal(t, "A cat rests on a windowsill.", resp.Result.Caption.Text)
require.Len(t, resp.Result.Labels, 1)
assert.Equal(t, entity.SrcOpenAI, resp.Result.Labels[0].Source)
assert.Equal(t, "Cat", resp.Result.Labels[0].Name)
}
func TestPerformApiRequestOpenAITextFallback(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
response := map[string]any{
"id": "resp_456",
"model": "gpt-5-mini",
"output": []any{
map[string]any{
"role": "assistant",
"content": []any{
map[string]any{
"type": "output_text",
"text": "Two hikers reach the summit at sunset.",
},
},
},
},
}
assert.NoError(t, json.NewEncoder(w).Encode(response))
}))
defer server.Close()
req := &ApiRequest{
Id: "fallback",
Model: "gpt-5-mini",
Images: []string{"data:image/jpeg;base64,AA=="},
ResponseFormat: ApiFormatOpenAI,
Options: &ApiRequestOptions{
Detail: openai.DefaultDetail,
},
Schema: nil,
}
resp, err := PerformApiRequest(req, server.URL, http.MethodPost, "")
require.NoError(t, err)
require.NotNil(t, resp.Result.Caption)
assert.Equal(t, "Two hikers reach the summit at sunset.", resp.Result.Caption.Text)
assert.Equal(t, entity.SrcOpenAI, resp.Result.Caption.Source)
}
func TestPerformApiRequestOpenAIError(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusBadRequest)
_ = json.NewEncoder(w).Encode(map[string]any{
"error": map[string]any{
"message": "Invalid image payload",
},
})
}))
defer server.Close()
req := &ApiRequest{
Id: "error",
Model: "gpt-5-mini",
ResponseFormat: ApiFormatOpenAI,
Schema: nil,
Images: []string{"data:image/jpeg;base64,AA=="},
}
_, err := PerformApiRequest(req, server.URL, http.MethodPost, "")
require.Error(t, err)
assert.Contains(t, err.Error(), "Invalid image payload")
}

View file

@ -96,8 +96,10 @@ func labelsInternal(images Files, mediaSrc media.Src, labelSrc entity.Src) (resu
apiRequest.Prompt = prompt
}
if options := model.GetOptions(); options != nil {
apiRequest.Options = options
if apiRequest.Options == nil {
if options := model.GetOptions(); options != nil {
apiRequest.Options = options
}
}
apiRequest.WriteLog()

View file

@ -154,9 +154,11 @@ func (m *Model) EndpointKey() (key string) {
if key = m.Service.EndpointKey(); key != "" {
return key
} else {
return ServiceKey
}
ensureEnv()
return strings.TrimSpace(os.ExpandEnv(ServiceKey))
}
// EndpointFileScheme returns the endpoint API request file scheme type. Nil
@ -348,6 +350,26 @@ func mergeOptionDefaults(target, defaults *ApiRequestOptions) {
if len(target.Stop) == 0 && len(defaults.Stop) > 0 {
target.Stop = append([]string(nil), defaults.Stop...)
}
if target.MaxOutputTokens <= 0 && defaults.MaxOutputTokens > 0 {
target.MaxOutputTokens = defaults.MaxOutputTokens
}
if strings.TrimSpace(target.Detail) == "" && strings.TrimSpace(defaults.Detail) != "" {
target.Detail = strings.TrimSpace(defaults.Detail)
}
if !target.ForceJson && defaults.ForceJson {
target.ForceJson = true
}
if target.SchemaVersion == "" && defaults.SchemaVersion != "" {
target.SchemaVersion = defaults.SchemaVersion
}
if target.CombineOutputs == "" && defaults.CombineOutputs != "" {
target.CombineOutputs = defaults.CombineOutputs
}
}
func normalizeOptions(opts *ApiRequestOptions) {
@ -422,6 +444,10 @@ func (m *Model) ApplyEngineDefaults() {
}
if info, ok := EngineInfoFor(engine); ok {
if m.Service.Uri == "" {
m.Service.Uri = info.Uri
}
if m.Service.RequestFormat == "" {
m.Service.RequestFormat = info.RequestFormat
}
@ -439,6 +465,10 @@ func (m *Model) ApplyEngineDefaults() {
}
}
if engine == openai.EngineName && strings.TrimSpace(m.Service.Key) == "" {
m.Service.Key = "${OPENAI_API_KEY}"
}
m.Engine = engine
}
@ -490,7 +520,7 @@ func (m *Model) SchemaTemplate() string {
}
if m.schema == "" {
m.schema = visionschema.Labels(m.PromptContains("nsfw"))
m.schema = visionschema.LabelsJson(m.PromptContains("nsfw"))
}
}
})

View file

@ -1,13 +1,17 @@
package vision
import (
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
"github.com/photoprism/photoprism/internal/ai/tensorflow"
"github.com/photoprism/photoprism/internal/ai/vision/ollama"
"github.com/photoprism/photoprism/internal/ai/vision/openai"
"github.com/photoprism/photoprism/internal/entity"
"github.com/photoprism/photoprism/pkg/http/scheme"
)
func TestModelGetOptionsDefaultsOllamaLabels(t *testing.T) {
@ -108,6 +112,85 @@ func TestModelApplyEngineDefaultsSetsResolution(t *testing.T) {
}
}
func TestModelApplyEngineDefaultsSetsServiceDefaults(t *testing.T) {
t.Run("OpenAIEngine", func(t *testing.T) {
model := &Model{
Type: ModelTypeCaption,
Engine: openai.EngineName,
}
model.ApplyEngineDefaults()
assert.Equal(t, "https://api.openai.com/v1/responses", model.Service.Uri)
assert.Equal(t, ApiFormatOpenAI, model.Service.RequestFormat)
assert.Equal(t, ApiFormatOpenAI, model.Service.ResponseFormat)
assert.Equal(t, scheme.Data, model.Service.FileScheme)
})
t.Run("PreserveExistingService", func(t *testing.T) {
model := &Model{
Type: ModelTypeCaption,
Engine: openai.EngineName,
Service: Service{
Uri: "https://custom.example",
FileScheme: scheme.Base64,
RequestFormat: ApiFormatOpenAI,
},
}
model.ApplyEngineDefaults()
assert.Equal(t, "https://custom.example", model.Service.Uri)
assert.Equal(t, scheme.Base64, model.Service.FileScheme)
})
}
func TestModelEndpointKeyOpenAIFallbacks(t *testing.T) {
t.Run("EnvFile", func(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "openai.key")
if err := os.WriteFile(path, []byte("from-file\n"), 0o600); err != nil {
t.Fatalf("write key file: %v", err)
}
t.Setenv("OPENAI_API_KEY", "")
t.Setenv("OPENAI_API_KEY_FILE", path)
model := &Model{Type: ModelTypeCaption, Engine: openai.EngineName}
model.ApplyEngineDefaults()
if got := model.EndpointKey(); got != "from-file" {
t.Fatalf("expected file key, got %q", got)
}
})
t.Run("CustomPlaceholder", func(t *testing.T) {
t.Setenv("OPENAI_API_KEY", "env-secret")
model := &Model{Type: ModelTypeCaption, Engine: openai.EngineName}
model.ApplyEngineDefaults()
if got := model.EndpointKey(); got != "env-secret" {
t.Fatalf("expected env secret, got %q", got)
}
model.Service.Key = "${CUSTOM_KEY}"
t.Setenv("CUSTOM_KEY", "custom-secret")
if got := model.EndpointKey(); got != "custom-secret" {
t.Fatalf("expected custom secret, got %q", got)
}
})
t.Run("GlobalFallback", func(t *testing.T) {
prev := ServiceKey
ServiceKey = "${GLOBAL_KEY}"
defer func() { ServiceKey = prev }()
t.Setenv("GLOBAL_KEY", "global-secret")
model := &Model{}
if got := model.EndpointKey(); got != "global-secret" {
t.Fatalf("expected global secret, got %q", got)
}
})
}
func TestModelGetSource(t *testing.T) {
t.Run("NilModel", func(t *testing.T) {
var model *Model
@ -115,21 +198,18 @@ func TestModelGetSource(t *testing.T) {
t.Fatalf("expected SrcAuto for nil model, got %s", src)
}
})
t.Run("EngineAlias", func(t *testing.T) {
model := &Model{Engine: ollama.EngineName}
if src := model.GetSource(); src != entity.SrcOllama {
t.Fatalf("expected SrcOllama, got %s", src)
}
})
t.Run("RequestFormat", func(t *testing.T) {
model := &Model{Service: Service{RequestFormat: ApiFormatOpenAI}}
if src := model.GetSource(); src != entity.SrcOpenAI {
t.Fatalf("expected SrcOpenAI, got %s", src)
}
})
t.Run("DefaultImage", func(t *testing.T) {
model := &Model{}
if src := model.GetSource(); src != entity.SrcImage {

View file

@ -0,0 +1,152 @@
## PhotoPrism — Ollama Engine Integration
**Last Updated:** November 14, 2025
### Overview
This package provides PhotoPrisms native adapter for Ollama-compatible multimodal models. It lets Caption, Labels, and future Generate workflows call locally hosted models without changing worker logic, reusing the shared API client (`internal/ai/vision/api_client.go`) and result types (`LabelResult`, `CaptionResult`). Requests stay inside your infrastructure, rely on base64 thumbnails, and honor the same ACL, timeout, and logging hooks as the default TensorFlow engines.
#### Context & Constraints
- Engine defaults live in `internal/ai/vision/ollama` and are applied whenever a model sets `Engine: ollama`. Aliases map to `ApiFormatOllama`, `scheme.Base64`, and a default 720px thumbnail.
- Responses may arrive as newline-delimited JSON chunks. `decodeOllamaResponse` keeps the most recent chunk, while `parseOllamaLabels` replays plain JSON strings found in `response`.
- Structured JSON is optional for captions but enforced for labels when `Format: json` (default for label models targeting the Ollama engine).
- The adapter never overwrites TensorFlow defaults. If an Ollama call fails, downstream code still has Nasnet, NSFW, and Face models available.
- Workers assume a single-image payload per request. Run `photoprism vision run` to validate multi-image prompts before changing that invariant.
#### Goals
- Let operators opt into local, private LLMs for captions and labels via `vision.yml`.
- Provide safe defaults (prompts, schema, sampling) so most deployments only need to specify `Name`, `Engine`, and `Service.Uri`.
- Surface reproducible logs, metrics, and CLI commands that make it easy to compare Ollama output against TensorFlow/OpenAI engines.
#### Non-Goals
- Managing Ollama itself (model downloads, GPU scheduling, or authentication). Use the Compose profiles provided in the repository.
- Adding new HTTP endpoints or bypassing the existing `photoprism vision` CLI.
- Replacing TensorFlow workers—Ollama engines are additive and opt-in.
### Architecture & Request Flow
1. **Model Selection**`Config.Model(ModelType)` returns the top-most enabled entry. When `Engine: ollama`, `ApplyEngineDefaults()` fills in the request/response format, base64 file scheme, and a 720px resolution unless overridden.
2. **Request Build**`ollamaBuilder.Build` wraps thumbnails with `NewApiRequestOllama`, which encodes them as base64 strings. `Model.Model()` resolves the exact Ollama tag (`gemma3:4b`, `qwen2.5vl:7b`, etc.).
3. **Transport**`PerformApiRequest` uses a single HTTP POST (default timeout 10min). Authentication is optional; provide `Service.Key` if you proxy through an API gateway.
4. **Parsing**`ollamaParser.Parse` converts payloads into `ApiResponse`. It normalizes confidences (`LabelConfidenceDefault = 0.5` when missing), copies NSFW scores, and canonicalizes label names via `normalizeLabelResult`.
5. **Persistence**`entity.SrcOllama` is stamped on labels/captions so UI badges and audits reflect the new source.
### Prompt, Schema, & Options Guidance
- **System Prompts**
- Labels: `LabelSystem` enforces single-word nouns. Set `System` to override; assign `LabelSystemSimple` when you need descriptive phrases.
- Captions: no system prompt by default; rely on user prompt or set one explicitly for stylistic needs.
- **User Prompts**
- Captions use `CaptionPrompt`, which requests one sentence in active voice.
- Labels default to `LabelPromptDefault`; when `DetectNSFWLabels` is true, the adapter swaps in `LabelPromptNSFW`.
- For stricter noun enforcement, set `Prompt` to `LabelPromptStrict`.
- **Schemas**
- Labels rely on `schema.LabelsJson(nsfw)` (simple JSON template). Setting `Format: json` auto-attaches a reminder (`model.SchemaInstructions()`).
- Override via `Schema` (inline YAML) or `SchemaFile`. `PHOTOPRISM_VISION_LABEL_SCHEMA_FILE` always wins if present.
- **Options**
- Labels: default `Temperature` equals `DefaultTemperature` (0.1 unless configured), `TopP=0.9`, `Stop=["\n\n"]`.
- Captions: only `Temperature` is set; other parameters inherit global defaults.
- Custom `Options` merge with engine defaults. Leave `ForceJson=true` for labels so PhotoPrism can reject malformed payloads early.
### Supported Ollama Vision Models
| Model (Ollama Tag) | Size & Footprint | Strengths | JSON & Language Notes | When To Use |
|-------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `gemma3:4b / 12b / 27b` | 4B/12B/27B parameters, ~3.3GB → 17GB downloads, 128K context | Multimodal text+image reasoning with SigLIP encoder, handles OCR/long documents, supports tool/function calling | Emits structured JSON reliably; >140 languages with strong default English output | High-quality captions + multilingual labels when you have ≥12GB VRAM (4B works on 8GB with Q4_K_M) |
| `qwen2.5vl:7b` | 8.29B params (Q4_K_M) ≈6GB download, 125K context | Excellent charts, GUI grounding, DocVQA, multi-image reasoning, agentic tool use | JSON mode tuned for schema compliance; supports 20+ languages with strong Chinese/English parity | Label extraction for mixed-language archives or UI/diagram analysis |
| `qwen3-vl:2b / 4b / 8b` | Dense 2B/4B/8B tiers (~3GB, ~3.5GB, ~6GB downloads) with native 256K context extendable to 1M; fits single 1224GB GPUs or high-end CPUs (2B) | Spatial + video reasoning upgrades (Interleaved-MRoPE, DeepStack), 32-language OCR, GUI/agent control, long-document ingest | Emits JSON reliably when prompts specify schema; multilingual captions/labels with Thinking variants boosting STEM reasoning | General-purpose captions/labels when you need long-context doc/video support without cloud APIs; 2B for CPU/edge, 4B as balanced default, 8B when accuracy outweighs latency |
| `llama3.2-vision:11b` | 11B params, ~7.8GB download, requires ≥8GB VRAM; 90B variant needs ≥64GB | Strong general reasoning, captioning, OCR, supported by Meta ecosystem tooling | Vision tasks officially supported in English; text-only tasks cover eight major languages | Keep captions consistent with Meta-compatible prompts or when teams already standardize on Llama 3.x |
| `minicpm-v:8b-2.6` | 8B params, ~5.5GB download, 32K context | Optimized for edge GPUs, high OCR accuracy, multi-image/video support, low token count (≈640 tokens for 1.8MP) | Multilingual (EN/ZH/DE/FR/IT/KR). Emits concise JSON but may need stricter stopping sequences | Memory-constrained deployments that still require NSFW/OCR-aware label output |
> Tip: pull models inside the dev container with `docker compose --profile ollama up -d` and then `docker compose exec ollama ollama pull gemma3:4b`. Keep the profile stopped when you do not need extra GPU/CPU load.
> Qwen3-VL models stream their JSON payload via the `thinking` field. PhotoPrism v2025.11+ captures this automatically; if you run older builds, upgrade before enabling these models or responses will appear empty.
### Configuration
#### Environment Variables
- `PHOTOPRISM_VISION_LABEL_SCHEMA_FILE` — Absolute path to a JSON snippet that overrides the default label schema (applies to every Ollama label model).
- `PHOTOPRISM_VISION_YAML` — Custom `vision.yml` path. Keep it synced in Git if you automate deployments.
- `OLLAMA_HOST`, `OLLAMA_MODELS`, `OLLAMA_MAX_QUEUE`, `OLLAMA_NUM_PARALLEL`, etc. — Provided in `compose*.yaml` to tune the Ollama daemon. Adjust `OLLAMA_KEEP_ALIVE` if you want models to stay loaded between worker batches.
- `PHOTOPRISM_LOG_LEVEL=trace` — Enables verbose request/response previews (truncated to avoid leaking images). Use temporarily when debugging parsing issues.
#### `vision.yml` Example
```yaml
Models:
- Type: labels
Name: qwen2.5vl:7b
Engine: ollama
Run: newly-indexed
Resolution: 720
Format: json
Options:
Temperature: 0.05
Stop: ["\n\n"]
ForceJson: true
Service:
Uri: http://ollama:11434/api/generate
RequestFormat: ollama
ResponseFormat: ollama
FileScheme: base64
- Type: caption
Name: gemma3:4b
Engine: ollama
Disabled: false
Options:
Temperature: 0.2
Service:
Uri: http://ollama:11434/api/generate
```
Guidelines:
- Place new entries after the default TensorFlow models so they take precedence while Nasnet/NSFW remain as fallbacks.
- Always specify the exact Ollama tag (`model:version`) so upgrades are deliberate.
- Keep option flags before positional arguments in CLI snippets (`photoprism vision run -m labels --count 1`).
- If you proxy requests (e.g., through Traefik), set `Service.Key` to `Bearer <token>` and configure the proxy to inject/validate it.
### Operational Checklist
- **Scheduling** — Use `Run: newly-indexed` for incremental runs, `Run: manual` for ad-hoc CLI calls, or `Run: on-schedule` when paired with the scheduler. Leave `Run: auto` if you want the worker to decide based on other model states.
- **Timeouts & Retries** — Default timeout is 10minutes (`ServiceTimeout`). Ollama streaming responses complete faster in practice; if you need stricter SLAs, wrap `photoprism vision run` in a job runner and retry failed batches manually.
- **Fallbacks** — Keep Nasnet configured even when Ollama labels are primary. `labels.go` stops at the first successful engine, so duplicates are avoided.
- **Security** — When exposing Ollama beyond localhost, terminate TLS at Traefik and enable API keys. Never return full JSON payloads in logs; rely on trace mode only for debugging and sanitize before sharing.
- **Model Storage** — Bind-mount `./storage/services/ollama:/root/.ollama` (see Compose) so pulled models survive container restarts. Run `docker compose exec ollama ollama list` during deployments to verify availability.
### Observability & Testing
- **CLI Smoke Tests**
- Captions: `photoprism vision run -m caption --count 5 --force`.
- Labels: `photoprism vision run -m labels --count 5 --force`.
- After each run, check `photoprism vision ls` for `source=ollama`.
- **Unit Tests**
- `go test ./internal/ai/vision/ollama ./internal/ai/vision -run Ollama -count=1` covers transport parsing and model defaults.
- Add fixtures under `internal/ai/vision/testdata` when capturing new response shapes; keep files small and anonymized.
- **Logging**
- Set `PHOTOPRISM_LOG_LEVEL=debug` to watch summary lines (“processed labels/caption via ollama”).
- Use `log.Trace` sparingly; it prints truncated JSON blobs for troubleshooting.
- **Metrics**
- `/api/v1/metrics` exposes counts per label source; scrape after a batch to compare throughput with TensorFlow/OpenAI runs.
### Code Map
- `internal/ai/vision/ollama/*.go` — Engine defaults, schema helpers, transport structs.
- `internal/ai/vision/engine_ollama.go` — Builder/parser glue plus label/caption normalization.
- `internal/ai/vision/api_ollama.go` — Base64 payload builder.
- `internal/ai/vision/api_client.go` — Streaming decoder shared among engines.
- `internal/ai/vision/models.go` — Default caption model definition (`gemma3`).
- `compose*.yaml` — Ollama service profile, Traefik labels, and persistent volume wiring.
- `frontend/src/common/util.js` — Maps `src="ollama"` to the correct badge; keep it updated when adding new source strings.
### Next Steps
- [ ] Add formal schema validation (JSON Schema or JTD) so malformed label responses fail fast before normalization.
- [ ] Support multiple thumbnails per request once core workflows confirm the API contract (requires worker + UI changes).
- [ ] Emit per-model latency and success metrics from the vision worker to simplify tuning when several Ollama engines run side-by-side.
- [ ] Mirror any loader changes into PhotoPrism Plus/Pro templates to keep splash + browser checks consistent after enabling external engines.

View file

@ -1,7 +1,5 @@
package ollama
import "github.com/photoprism/photoprism/internal/ai/vision/schema"
const (
// CaptionPrompt instructs Ollama caption models to emit a single, active-voice sentence.
CaptionPrompt = "Create a caption with exactly one sentence in the active voice that describes the main visual content. Begin with the main subject and clear action. Avoid text formatting, meta-language, and filler words."
@ -22,12 +20,3 @@ const (
// DefaultResolution is the default thumbnail size submitted to Ollama models.
DefaultResolution = 720
)
// LabelsSchema returns the canonical label schema string consumed by Ollama models.
func LabelsSchema(nsfw bool) string {
if nsfw {
return schema.LabelsNSFW
} else {
return schema.LabelsDefault
}
}

View file

@ -0,0 +1,14 @@
package ollama
import (
"github.com/photoprism/photoprism/internal/ai/vision/schema"
)
// SchemaLabels returns the canonical label schema string consumed by Ollama models.
//
// Related documentation and references:
// - https://www.alibabacloud.com/help/en/model-studio/json-mode
// - https://www.json.org/json-en.html
func SchemaLabels(nsfw bool) string {
return schema.LabelsJson(nsfw)
}

View file

@ -0,0 +1,80 @@
package ollama
import (
"errors"
"fmt"
"time"
)
// Response encapsulates the subset of the Ollama generate API response we care about.
type Response struct {
ID string `yaml:"Id,omitempty" json:"id,omitempty"`
Code int `yaml:"Code,omitempty" json:"code,omitempty"`
Error string `yaml:"Error,omitempty" json:"error,omitempty"`
Model string `yaml:"Model,omitempty" json:"model,omitempty"`
CreatedAt time.Time `yaml:"CreatedAt,omitempty" json:"created_at,omitempty"`
Response string `yaml:"Response,omitempty" json:"response,omitempty"`
Thinking string `yaml:"Thinking,omitempty" json:"thinking,omitempty"`
Done bool `yaml:"Done,omitempty" json:"done,omitempty"`
Context []int `yaml:"Context,omitempty" json:"context,omitempty"`
TotalDuration int64 `yaml:"TotalDuration,omitempty" json:"total_duration,omitempty"`
LoadDuration int `yaml:"LoadDuration,omitempty" json:"load_duration,omitempty"`
PromptEvalCount int `yaml:"PromptEvalCount,omitempty" json:"prompt_eval_count,omitempty"`
PromptEvalDuration int `yaml:"PromptEvalDuration,omitempty" json:"prompt_eval_duration,omitempty"`
EvalCount int `yaml:"EvalCount,omitempty" json:"eval_count,omitempty"`
EvalDuration int64 `yaml:"EvalDuration,omitempty" json:"eval_duration,omitempty"`
Result ResultPayload `yaml:"Result,omitempty" json:"result,omitempty"`
}
// Err returns an error if the request has failed.
func (r *Response) Err() error {
if r == nil {
return errors.New("response is nil")
}
if r.Code >= 400 {
if r.Error != "" {
return errors.New(r.Error)
}
return fmt.Errorf("error %d", r.Code)
} else if len(r.Result.Labels) == 0 && r.Result.Caption == nil {
return errors.New("no result")
}
return nil
}
// HasResult checks if there is at least one result in the response data.
func (r *Response) HasResult() bool {
if r == nil {
return false
}
return len(r.Result.Labels) > 0 || r.Result.Caption != nil
}
// ResultPayload mirrors the structure returned by Ollama for result data.
type ResultPayload struct {
Labels []LabelPayload `json:"labels"`
Caption *CaptionPayload `json:"caption,omitempty"`
}
// LabelPayload represents a single label object emitted by the Ollama adapter.
type LabelPayload struct {
Name string `json:"name"`
Source string `json:"source,omitempty"`
Priority int `json:"priority,omitempty"`
Confidence float32 `json:"confidence,omitempty"`
Topicality float32 `json:"topicality,omitempty"`
Categories []string `json:"categories,omitempty"`
NSFW bool `json:"nsfw,omitempty"`
NSFWConfidence float32 `json:"nsfw_confidence,omitempty"`
}
// CaptionPayload represents the caption object emitted by the Ollama adapter.
type CaptionPayload struct {
Text string `json:"text"`
Source string `json:"source,omitempty"`
Confidence float32 `json:"confidence,omitempty"`
}

View file

@ -0,0 +1,90 @@
package ollama
import (
"testing"
"time"
)
func TestResponseErr(t *testing.T) {
t.Run("NilResponse", func(t *testing.T) {
if err := (*Response)(nil).Err(); err == nil || err.Error() != "response is nil" {
t.Fatalf("expected nil-response error, got %v", err)
}
})
t.Run("HTTPErrorWithMessage", func(t *testing.T) {
resp := &Response{Code: 429, Error: "too many requests"}
if err := resp.Err(); err == nil || err.Error() != "too many requests" {
t.Fatalf("expected message error, got %v", err)
}
})
t.Run("HTTPErrorWithoutMessage", func(t *testing.T) {
resp := &Response{Code: 500}
if err := resp.Err(); err == nil || err.Error() != "error 500" {
t.Fatalf("expected formatted error, got %v", err)
}
})
t.Run("NoResult", func(t *testing.T) {
resp := &Response{Code: 200}
if err := resp.Err(); err == nil || err.Error() != "no result" {
t.Fatalf("expected no-result error, got %v", err)
}
})
t.Run("HasLabels", func(t *testing.T) {
resp := &Response{
Code: 200,
Result: ResultPayload{Labels: []LabelPayload{{Name: "sky"}}},
Model: "qwen",
}
if err := resp.Err(); err != nil {
t.Fatalf("unexpected error: %v", err)
}
})
t.Run("HasCaption", func(t *testing.T) {
resp := &Response{
Code: 200,
Result: ResultPayload{Caption: &CaptionPayload{Text: "Caption"}},
}
if err := resp.Err(); err != nil {
t.Fatalf("unexpected error: %v", err)
}
})
}
func TestResponseHasResult(t *testing.T) {
if (*Response)(nil).HasResult() {
t.Fatal("nil response should not have result")
}
resp := &Response{}
if resp.HasResult() {
t.Fatal("expected false when result payload is empty")
}
resp.Result.Labels = []LabelPayload{{Name: "sun"}}
if !resp.HasResult() {
t.Fatal("expected true when labels present")
}
resp.Result.Labels = nil
resp.Result.Caption = &CaptionPayload{Text: "Sky", Confidence: 0.9}
if !resp.HasResult() {
t.Fatal("expected true when caption present")
}
}
func TestResponseJSONTagsAreOptional(t *testing.T) {
// Guard against accidental breaking changes to essential fields
resp := Response{
ID: "test",
Model: "ollama",
CreatedAt: time.Now(),
}
if resp.ID == "" || resp.Model == "" {
t.Fatalf("response fields should persist, got %+v", resp)
}
}

View file

@ -0,0 +1,128 @@
## PhotoPrism — OpenAI API Integration
**Last Updated:** November 14, 2025
### Overview
This package contains PhotoPrisms adapter for the OpenAI Responses API. It enables existing caption and label workflows (`GenerateCaption`, `GenerateLabels`, and the `photoprism vision run` CLI) to call OpenAI models alongside TensorFlow and Ollama without changing worker or API code. The implementation focuses on predictable results, structured outputs, and clear observability so operators can opt in gradually.
#### Context & Constraints
- OpenAI requests flow through the existing vision client (`internal/ai/vision/api_client.go`) and must honour PhotoPrisms timeout, logging, and ACL rules.
- Structured outputs are preferred but the adapter must gracefully handle free-form text; `output_text` responses are parsed both as JSON and as plain captions.
- Costs should remain predictable: requests are limited to a single 720px thumbnail (`detail=low`) and capped token budgets (512 caption, 1024 labels).
- Secrets are supplied per model (`Service.Key`) with fallbacks to `OPENAI_API_KEY` / `_FILE`. Logs must redact sensitive data.
#### Goals
- Provide drop-in OpenAI support for captions and labels using `vision.yml`.
- Keep configuration ergonomic by auto-populating prompts, schema names, token limits, and sampling defaults.
- Expose enough logging and tests so operators can compare OpenAI output with existing engines before enabling it broadly.
#### Non-Goals
- Introducing a new `generate` model type or combined caption/label endpoint (reserved for a later phase).
- Replacing the default TensorFlow models; they remain active as fallbacks.
- Managing OpenAI billing or quota dashboards beyond surfacing token counts in logs and metrics.
### Prompt, Model, & Schema Guidance
- **Models:** The adapter targets GPT5 vision tiers (e.g. `gpt-5-nano`, `gpt-5-mini`). These models support image inputs, structured outputs, and deterministic settings. Set `Name` to the exact provider identifier so defaults are applied correctly. Caption models share the same configuration surface and run through the same adapter.
- **Prompts:** Defaults live in `defaults.go`. Captions use a single-sentence instruction; labels use `LabelPromptDefault` (or `LabelPromptNSFW` when PhotoPrism requests NSFW metadata). Custom prompts should retain schema reminders so structured outputs stay valid.
- **Schemas:** Labels use the JSON schema returned by `schema.LabelsJsonSchema(nsfw)`; the response format name is derived via `schema.JsonSchemaName` (e.g. `photoprism_vision_labels_v1`). Captions omit schemas unless operators explicitly request a structured format.
- **When to keep defaults:** For most deployments, leaving `System`, `Prompt`, `Schema`, and `Options` unset yields stable output with minimal configuration. Override them only when domain-specific language or custom scoring is necessary, and add regression tests alongside.
Budget-conscious operators can experiment with lighter prompts or lower-resolution thumbnails, but should keep token limits and determinism settings intact to avoid unexpected bills and UI churn.
#### Performance & Cost Estimates
- **Token budgets:** Captions request up to 512 output tokens; labels request up to 1024. Input tokens are typically ≤700 for a single 720px thumbnail plus prompts.
- **Latency:** GPT5 nano/mini vision calls typically complete in 38s, depending on OpenAI region. Including reasoning metadata (`reasoning.effort=low`) has negligible impact but improves traceability.
- **Costs:** Consult OpenAIs pricing for the selected model. Multiply input/output tokens by the published rate. PhotoPrism currently sends one image per request to keep costs linear with photo count.
### Configuration
#### Environment Variables
- `OPENAI_API_KEY` / `OPENAI_API_KEY_FILE` — fallback credentials when a models `Service.Key` is unset.
- Existing `PHOTOPRISM_VISION_*` variables remain authoritative (see the [Getting Started Guide](https://docs.photoprism.app/getting-started/config-options/#computer-vision) for full lists).
#### `vision.yml` Examples
```yaml
Models:
- Type: caption
Name: gpt-5-nano
Engine: openai
Disabled: false # opt in manually
Resolution: 720 # optional; default is 720
Options:
Detail: low # optional; defaults to low
MaxOutputTokens: 512
Service:
Uri: https://api.openai.com/v1/responses
FileScheme: data
Key: ${OPENAI_API_KEY}
- Type: labels
Name: gpt-5-mini
Engine: openai
Disabled: false
Resolution: 720
Options:
Detail: low
MaxOutputTokens: 1024
ForceJson: true # redundant but explicit
Service:
Uri: https://api.openai.com/v1/responses
FileScheme: data
Key: ${OPENAI_API_KEY}
```
Keep TensorFlow entries in place so PhotoPrism falls back when the external service is unavailable.
#### Defaults
- File scheme: `data:` URLs (base64) for all OpenAI models.
- Resolution: 720px thumbnails (`vision.Thumb(ModelTypeCaption|Labels)`).
- Options: `MaxOutputTokens` raised to 512 (caption) / 1024 (labels); `ForceJson=false` for captions, `true` for labels; `reasoning.effort="low"`.
- Sampling: `Temperature` and `TopP` set to `0` for `gpt-5*` models; inherited values (0.1/0.9) remain for other engines. `openaiBuilder.Build` performs this override while preserving the struct defaults for non-OpenAI adapters.
- Schema naming: Automatically derived via `schema.JsonSchemaName`, so operators may omit `SchemaVersion`.
### Documentation
- Label Generation: <https://docs.photoprism.app/developer-guide/vision/label-generation/>
- Caption Generation: <https://docs.photoprism.app/developer-guide/vision/caption-generation/>
- Vision CLI Commands: <https://docs.photoprism.app/developer-guide/vision/cli/>
### Implementation Details
#### Core Concepts
- **Structured outputs:** PhotoPrism leverages OpenAIs structured output capability as documented at <https://platform.openai.com/docs/guides/structured-outputs>. When a JSON schema is supplied, the adapter emits `text.format` with `type: "json_schema"` and a schema name derived from the content. The parser then prefers `output_json`, but also attempts to decode `output_text` payloads that contain JSON objects.
- **Deterministic sampling:** GPT5 models are run with `temperature=0` and `top_p=0` to minimise variance, while still allowing developers to override values in `vision.yml` if needed.
- **Reasoning metadata:** Requests include `reasoning.effort="low"` so OpenAI returns structured reasoning usage counters, helping operators track token consumption.
- **Worker summaries:** The vision worker now logs either “updated …” or “processed … (no metadata changes detected)”, making reruns easy to audit.
#### Rate Limiting
OpenAI calls respect the existing `limiter.Auth` configuration used by the vision service. Failed requests surface standard HTTP errors and are not automatically retried; operators should ensure they have adequate account limits and consider external rate limiting when sharing credentials.
#### Testing & Validation
1. Unit tests: `go test ./internal/ai/vision/openai ./internal/ai/vision -run OpenAI -count=1`. Fixtures under `internal/ai/vision/openai/testdata/` replay real Responses payloads (captions and labels).
2. CLI smoke test: `photoprism vision run -m labels --count 1 --force` with trace logging enabled to inspect sanitised Responses.
3. Compare worker summaries and label sources (`openai`) in the UI or via `photoprism vision ls`.
#### Code Map
- **Adapter & defaults:** `internal/ai/vision/openai` (defaults, schema helpers, transport, tests).
- **Request/response plumbing:** `internal/ai/vision/api_request.go`, `api_client.go`, `engine_openai.go`, `engine_openai_test.go`.
- **Workers & CLI:** `internal/workers/vision.go`, `internal/commands/vision_run.go`.
- **Shared utilities:** `internal/ai/vision/schema`, `pkg/clean`, `pkg/media`.
#### Next Steps
- [ ] Introduce the future `generate` model type that combines captions, labels, and optional markers.
- [ ] Evaluate additional OpenAI models as pricing and capabilities evolve.
- [ ] Expose token usage metrics (input/output/reasoning) via Prometheus once the schema stabilises.

View file

@ -1,6 +1,29 @@
package openai
import "github.com/photoprism/photoprism/internal/ai/vision/schema"
const (
// CaptionSystem defines the default system prompt for caption models.
CaptionSystem = "You are a PhotoPrism vision model. Return concise, user-friendly captions that describe the main subjects accurately."
// CaptionPrompt instructs caption models to respond with a single sentence.
CaptionPrompt = "Provide exactly one sentence describing the key subject and action in the image. Avoid filler words and technical jargon."
// LabelSystem defines the system prompt for label generation.
LabelSystem = "You are a PhotoPrism vision model. Emit JSON that matches the provided schema and keep label names short, singular nouns."
// LabelPromptDefault requests general-purpose labels.
LabelPromptDefault = "Analyze the image and return label objects with name, confidence (0-1), and topicality (0-1)."
// LabelPromptNSFW requests labels including NSFW metadata when required.
LabelPromptNSFW = "Analyze the image and return label objects with name, confidence (0-1), topicality (0-1), nsfw (true when sensitive), and nsfw_confidence (0-1)."
// DefaultDetail specifies the preferred thumbnail detail level for Requests API calls.
DefaultDetail = "low"
// CaptionMaxTokens suggests the output budget for caption responses.
CaptionMaxTokens = 512
// LabelsMaxTokens suggests the output budget for label responses.
LabelsMaxTokens = 1024
// DefaultTemperature configures deterministic replies.
DefaultTemperature = 0.1
// DefaultTopP limits nucleus sampling.
DefaultTopP = 0.9
// DefaultSchemaVersion is used when callers do not specify an explicit schema version.
DefaultSchemaVersion = "v1"
)
var (
// DefaultModel is the model used by default when accessing the OpenAI API.
@ -8,8 +31,3 @@ var (
// DefaultResolution is the default thumbnail size submitted to the OpenAI.
DefaultResolution = 720
)
// LabelsSchema returns the canonical label schema string consumed by OpenAI models.
func LabelsSchema() string {
return schema.LabelsDefault
}

View file

@ -0,0 +1,16 @@
package openai
import (
"encoding/json"
"github.com/photoprism/photoprism/internal/ai/vision/schema"
)
// SchemaLabels returns the canonical labels JSON Schema string consumed by Ollama models.
//
// Related documentation and references:
// - https://platform.openai.com/docs/guides/structured-outputs
// - https://json-schema.org/learn/miscellaneous-examples
func SchemaLabels(nsfw bool) json.RawMessage {
return schema.LabelsJsonSchema(nsfw)
}

View file

@ -0,0 +1,73 @@
{
"id": "resp_0d356718505119f3006916e5d8730881a0b91de2aa700f6196",
"object": "response",
"created_at": 1763108312,
"status": "completed",
"background": false,
"billing": {
"payer": "developer"
},
"error": null,
"incomplete_details": null,
"instructions": null,
"max_output_tokens": 512,
"max_tool_calls": null,
"model": "gpt-5-nano-2025-08-07",
"output": [
{
"id": "rs_0d356718505119f3006916e5d8efd481a0a4f9cc1823cc6c83",
"type": "reasoning",
"summary": []
},
{
"id": "msg_0d356718505119f3006916e5d9433881a0bc79197d2cfc2027",
"type": "message",
"status": "completed",
"content": [
{
"type": "output_text",
"annotations": [],
"logprobs": [],
"text": "A bee gathers nectar from the vibrant red poppy\u2019s center."
}
],
"role": "assistant"
}
],
"parallel_tool_calls": true,
"previous_response_id": null,
"prompt_cache_key": null,
"prompt_cache_retention": null,
"reasoning": {
"effort": "low",
"summary": null
},
"safety_identifier": null,
"service_tier": "default",
"store": true,
"temperature": 1.0,
"text": {
"format": {
"type": "text"
},
"verbosity": "medium"
},
"tool_choice": "auto",
"tools": [],
"top_logprobs": 0,
"top_p": 1.0,
"truncation": "disabled",
"usage": {
"input_tokens": 576,
"input_tokens_details": {
"cached_tokens": 0
},
"output_tokens": 19,
"output_tokens_details": {
"reasoning_tokens": 0
},
"total_tokens": 595
},
"user": null,
"metadata": {}
}

View file

@ -0,0 +1,114 @@
{
"id": "resp_0fa91dfb69b7d644006916ea0b72ac819f84ff3152a38dfcdb",
"object": "response",
"created_at": 1763109387,
"status": "completed",
"background": false,
"billing": {
"payer": "developer"
},
"error": null,
"incomplete_details": null,
"instructions": null,
"max_output_tokens": 1024,
"max_tool_calls": null,
"model": "gpt-5-mini-2025-08-07",
"output": [
{
"id": "rs_0fa91dfb69b7d644006916ea0c3450819f8a13396bf377f474",
"type": "reasoning",
"summary": []
},
{
"id": "msg_0fa91dfb69b7d644006916ea0d2dfc819faf52b11334fc10a4",
"type": "message",
"status": "completed",
"content": [
{
"type": "output_text",
"annotations": [],
"logprobs": [],
"text": "{\"labels\":[{\"name\":\"flower\",\"confidence\":0.99,\"topicality\":0.99},{\"name\":\"bee\",\"confidence\":0.95,\"topicality\":0.95},{\"name\":\"petal\",\"confidence\":0.92,\"topicality\":0.88},{\"name\":\"pollen\",\"confidence\":0.85,\"topicality\":0.8},{\"name\":\"insect\",\"confidence\":0.9,\"topicality\":0.85},{\"name\":\"red\",\"confidence\":0.88,\"topicality\":0.6},{\"name\":\"close-up\",\"confidence\":0.86,\"topicality\":0.7},{\"name\":\"nature\",\"confidence\":0.8,\"topicality\":0.5}]}"
}
],
"role": "assistant"
}
],
"parallel_tool_calls": true,
"previous_response_id": null,
"prompt_cache_key": null,
"prompt_cache_retention": null,
"reasoning": {
"effort": "low",
"summary": null
},
"safety_identifier": null,
"service_tier": "default",
"store": true,
"temperature": 1.0,
"text": {
"format": {
"type": "json_schema",
"description": null,
"name": "photoprism_vision_labels_v1",
"schema": {
"type": "object",
"properties": {
"labels": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"minLength": 1
},
"confidence": {
"type": "number",
"minimum": 0,
"maximum": 1
},
"topicality": {
"type": "number",
"minimum": 0,
"maximum": 1
}
},
"required": [
"name",
"confidence",
"topicality"
],
"additionalProperties": false
},
"default": []
}
},
"required": [
"labels"
],
"additionalProperties": false
},
"strict": true
},
"verbosity": "medium"
},
"tool_choice": "auto",
"tools": [],
"top_logprobs": 0,
"top_p": 1.0,
"truncation": "disabled",
"usage": {
"input_tokens": 724,
"input_tokens_details": {
"cached_tokens": 0
},
"output_tokens": 169,
"output_tokens_details": {
"reasoning_tokens": 0
},
"total_tokens": 893
},
"user": null,
"metadata": {}
}

View file

@ -0,0 +1,142 @@
package openai
import (
"encoding/json"
"strings"
)
const (
// ContentTypeText identifies text input segments for the Responses API.
ContentTypeText = "input_text"
// ContentTypeImage identifies image input segments for the Responses API.
ContentTypeImage = "input_image"
// ResponseFormatJSONSchema requests JSON constrained by a schema.
ResponseFormatJSONSchema = "json_schema"
// ResponseFormatJSONObject requests a free-form JSON object.
ResponseFormatJSONObject = "json_object"
)
// HTTPRequest represents the payload expected by OpenAI's Responses API.
type HTTPRequest struct {
Model string `json:"model"`
Input []InputMessage `json:"input"`
Text *TextOptions `json:"text,omitempty"`
Reasoning *Reasoning `json:"reasoning,omitempty"`
MaxOutputTokens int `json:"max_output_tokens,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
TopP float64 `json:"top_p,omitempty"`
PresencePenalty float64 `json:"presence_penalty,omitempty"`
FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
}
// TextOptions carries formatting preferences for textual responses.
type TextOptions struct {
Format *ResponseFormat `json:"format,omitempty"`
}
// Reasoning configures the effort level for reasoning models.
type Reasoning struct {
Effort string `json:"effort,omitempty"`
}
// InputMessage captures a single system or user message in the request.
type InputMessage struct {
Role string `json:"role"`
Type string `json:"type,omitempty"`
Content []ContentItem `json:"content"`
}
// ContentItem represents a text or image entry within a message.
type ContentItem struct {
Type string `json:"type"`
Text string `json:"text,omitempty"`
ImageURL string `json:"image_url,omitempty"`
Detail string `json:"detail,omitempty"`
}
// ResponseFormat describes how OpenAI should format its response.
type ResponseFormat struct {
Type string `json:"type"`
Name string `json:"name,omitempty"`
Schema json.RawMessage `json:"schema,omitempty"`
Description string `json:"description,omitempty"`
Strict bool `json:"strict,omitempty"`
}
// Response mirrors the subset of the Responses API response we need.
type Response struct {
ID string `json:"id"`
Model string `json:"model"`
Output []ResponseOutput `json:"output"`
Error *struct {
Message string `json:"message"`
Type string `json:"type"`
} `json:"error,omitempty"`
}
// ResponseOutput captures assistant messages within the response.
type ResponseOutput struct {
Role string `json:"role"`
Content []ResponseContent `json:"content"`
}
// ResponseContent contains individual message parts (JSON or text).
type ResponseContent struct {
Type string `json:"type"`
Text string `json:"text,omitempty"`
JSON json.RawMessage `json:"json,omitempty"`
}
// FirstJSON returns the first JSON payload contained in the response.
func (r *Response) FirstJSON() json.RawMessage {
if r == nil {
return nil
}
for i := range r.Output {
for j := range r.Output[i].Content {
if len(r.Output[i].Content[j].JSON) > 0 {
return r.Output[i].Content[j].JSON
}
}
}
return nil
}
// FirstText returns the first textual payload contained in the response.
func (r *Response) FirstText() string {
if r == nil {
return ""
}
for i := range r.Output {
for j := range r.Output[i].Content {
if text := strings.TrimSpace(r.Output[i].Content[j].Text); text != "" {
return text
}
}
}
return ""
}
// ParseErrorMessage extracts a human readable error message from a Responses API payload.
func ParseErrorMessage(raw []byte) string {
var errResp struct {
Error *struct {
Message string `json:"message"`
} `json:"error"`
}
if err := json.Unmarshal(raw, &errResp); err != nil {
return ""
}
if errResp.Error != nil {
return strings.TrimSpace(errResp.Error.Message)
}
return ""
}

View file

@ -0,0 +1,120 @@
package openai
import (
"encoding/json"
"os"
"path/filepath"
"testing"
)
func loadTestResponse(t *testing.T, name string) *Response {
t.Helper()
filePath := filepath.Join("testdata", name)
data, err := os.ReadFile(filePath)
if err != nil {
t.Fatalf("failed to read %s: %v", filePath, err)
}
var resp Response
if err := json.Unmarshal(data, &resp); err != nil {
t.Fatalf("failed to unmarshal %s: %v", filePath, err)
}
return &resp
}
func TestParseErrorMessage(t *testing.T) {
t.Run("returns message when present", func(t *testing.T) {
raw := []byte(`{"error":{"message":"Invalid schema"}}`)
msg := ParseErrorMessage(raw)
if msg != "Invalid schema" {
t.Fatalf("expected message, got %q", msg)
}
})
t.Run("returns empty string when error is missing", func(t *testing.T) {
raw := []byte(`{"output":[]}`)
if msg := ParseErrorMessage(raw); msg != "" {
t.Fatalf("expected empty message, got %q", msg)
}
})
}
func TestResponseFirstTextCaption(t *testing.T) {
resp := loadTestResponse(t, "caption-response.json")
if jsonPayload := resp.FirstJSON(); len(jsonPayload) != 0 {
t.Fatalf("expected no JSON payload, got: %s", jsonPayload)
}
text := resp.FirstText()
expected := "A bee gathers nectar from the vibrant red poppys center."
if text != expected {
t.Fatalf("unexpected caption text: %q", text)
}
}
func TestResponseFirstTextLabels(t *testing.T) {
resp := loadTestResponse(t, "labels-response.json")
if jsonPayload := resp.FirstJSON(); len(jsonPayload) != 0 {
t.Fatalf("expected no JSON payload, got: %s", jsonPayload)
}
text := resp.FirstText()
if len(text) == 0 {
t.Fatal("expected structured JSON string in text payload")
}
if text[0] != '{' {
t.Fatalf("expected JSON object in text payload, got %q", text)
}
}
func TestResponseFirstJSONFromStructuredPayload(t *testing.T) {
resp := &Response{
ID: "resp_structured",
Model: "gpt-5-mini",
Output: []ResponseOutput{
{
Role: "assistant",
Content: []ResponseContent{
{
Type: "output_json",
JSON: json.RawMessage(`{"labels":[{"name":"sunset"}]}`),
},
},
},
},
}
jsonPayload := resp.FirstJSON()
if len(jsonPayload) == 0 {
t.Fatal("expected JSON payload, got empty result")
}
var decoded struct {
Labels []map[string]string `json:"labels"`
}
if err := json.Unmarshal(jsonPayload, &decoded); err != nil {
t.Fatalf("failed to decode JSON payload: %v", err)
}
if len(decoded.Labels) != 1 || decoded.Labels[0]["name"] != "sunset" {
t.Fatalf("unexpected JSON payload: %+v", decoded.Labels)
}
}
func TestSchemaLabelsReturnsValidJSON(t *testing.T) {
raw := SchemaLabels(false)
var decoded map[string]any
if err := json.Unmarshal(raw, &decoded); err != nil {
t.Fatalf("schema should be valid JSON: %v", err)
}
if decoded["type"] != "object" {
t.Fatalf("expected type object, got %v", decoded["type"])
}
}

View file

@ -0,0 +1,52 @@
## PhotoPrism — Vision Schema Reference
**Last Updated:** November 14, 2025
### Overview
This package contains the canonical label response specifications used by PhotoPrisms external vision engines. It exposes two helpers:
- `LabelsJsonSchema(nsfw bool)` — returns a JSON **Schema** document tailored for OpenAI Responses requests, enabling strict validation of structured outputs.
- `LabelsJson(nsfw bool)` — returns a literal JSON **sample** that Ollama-style models can mirror when they only support prompt-enforced structures.
Both helpers build on the same field set (`name`, `confidence`, `topicality`, and optional NSFW flags) so downstream parsing logic (`LabelResult`) can remain engine-agnostic.
### Schema Types & Differences
| Helper | Target Engine | Format | Validation Style | When To Use |
|---------------------------|--------------------------|--------------------------------------------------------|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|
| `LabelsJsonSchema(false)` | OpenAI (standard labels) | JSON Schema Draft | Strong: OpenAI enforces field types/ranges server-side before returning a response. | When calling GPTvision models via `ApiFormatOpenAI` to ensure PhotoPrism receives well-formed label arrays. |
| `LabelsJsonSchema(true)` | OpenAI (labels + NSFW) | JSON Schema Draft with additional boolean/float fields | Strong: same enforcement plus required NSFW fields. | When `DetectNSFWLabels` or NSFW-specific prompts are active and the model must emit `nsfw` + `nsfw_confidence`. |
| `LabelsJson(false)` | Ollama (standard labels) | Plain JSON example | Soft: model is nudged to mimic the structure through prompt instructions. | When running self-hosted Ollama models that support “JSON mode” but do not consume JSON Schema definitions. |
| `LabelsJson(true)` | Ollama (labels + NSFW) | Plain JSON example with NSFW keys | Soft: prompts describe the required keys; the adapter validates after parsing. | When Ollama prompts mention NSFW scoring or PhotoPrism sets `DetectNSFWLabels=true`. |
**Key technical distinction:** OpenAIs Responses API accepts a JSON Schema (see `LabelsJsonSchema*`) and guarantees compliance by rejecting invalid responses, while Ollama currently relies on prompt-directed output. For Ollama integrations we provide a representative JSON document (`LabelsJson*`) that models can imitate; PhotoPrism then normalizes and validates the results in Go.
### Field Definitions
- `name` — single-word noun describing the subject (string, required).
- `confidence` — normalized score between `0` and `1` (float, required).
- `topicality` — relative relevance score between `0` and `1` (float, required; defaults to `confidence` if omitted after parsing).
- `nsfw` — boolean flag indicating sensitive content (required only in NSFW variants).
- `nsfw_confidence` — normalized probability for the NSFW assessment (required only in NSFW variants).
OpenAI schemas enforce these ranges/types, while Ollama prompts remind the model to emit matching keys. After parsing, PhotoPrism applies `LabelConfidenceDefault` and `normalizeLabelResult` to fill gaps and enforce naming rules.
### Usage Guidance
1. **OpenAI models** (`Engine: openai`, `RequestFormat: openai`):
- Leave `Schema` unset in `vision.yml`; the engine defaults call `LabelsJsonSchema(model.PromptContains("nsfw"))`.
- Optionally override the schema via `Schema`/`SchemaFile` if you extend fields, but keep required keys so `LabelResult` parsing succeeds.
2. **Ollama models** (`Engine: ollama`, `RequestFormat: ollama`):
- Rely on the built-in samples from `LabelsJson` or include them directly in prompts via `model.SchemaInstructions()`.
- Because enforcement happens after the response arrives, keep `Format: json` (default) and `Options.ForceJson=true` for label models to make parsing stricter.
3. **Custom engines**:
- Reuse these helpers to stay compatible with PhotoPrisms label DTOs.
- When adding new fields, update both schema/sample versions so OpenAI and Ollama adapters remain aligned.
### References
- JSON Schema primer: https://json-schema.org/learn/miscellaneous-examples
- OpenAI structured outputs: https://platform.openai.com/docs/guides/structured-outputs
- JSON mode background (Ollama-style prompts): https://www.alibabacloud.com/help/en/model-studio/json-mode
- JSON syntax refresher: https://www.json.org/json-en.html

View file

@ -1,16 +1,115 @@
package schema
// LabelsDefault provides the minimal JSON schema for label responses used across engines.
const (
LabelsDefault = "{\n \"labels\": [{\n \"name\": \"\",\n \"confidence\": 0,\n \"topicality\": 0 }]\n}"
LabelsNSFW = "{\n \"labels\": [{\n \"name\": \"\",\n \"confidence\": 0,\n \"topicality\": 0,\n \"nsfw\": false,\n \"nsfw_confidence\": 0\n }]\n}"
import (
"encoding/json"
)
// Labels returns the canonical label schema string.
func Labels(nsfw bool) string {
// LabelsJsonSchemaDefault provides the minimal JSON schema for label responses used across engines.
const (
LabelsJsonSchemaDefault = `{
"type": "object",
"properties": {
"labels": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"minLength": 1
},
"confidence": {
"type": "number",
"minimum": 0,
"maximum": 1
},
"topicality": {
"type": "number",
"minimum": 0,
"maximum": 1
}
},
"required": ["name", "confidence", "topicality"],
"additionalProperties": false
},
"default": []
}
},
"required": ["labels"],
"additionalProperties": false
}`
LabelsJsonDefault = "{\n \"labels\": [{\n \"name\": \"\",\n \"confidence\": 0,\n \"topicality\": 0 }]\n}"
LabelsJsonSchemaNSFW = `{
"type": "object",
"properties": {
"labels": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"minLength": 1
},
"confidence": {
"type": "number",
"minimum": 0,
"maximum": 1
},
"topicality": {
"type": "number",
"minimum": 0,
"maximum": 1
},
"nsfw": {
"type": "boolean"
},
"nsfw_confidence": {
"type": "number",
"minimum": 0,
"maximum": 1
}
},
"required": [
"name",
"confidence",
"topicality",
"nsfw",
"nsfw_confidence"
],
"additionalProperties": false
},
"default": []
}
},
"required": ["labels"],
"additionalProperties": false
}`
LabelsJsonNSFW = "{\n \"labels\": [{\n \"name\": \"\",\n \"confidence\": 0,\n \"topicality\": 0,\n \"nsfw\": false,\n \"nsfw_confidence\": 0\n }]\n}"
)
// LabelsJsonSchema returns the canonical label JSON Schema string for OpenAI API endpoints.
//
// Related documentation and references:
// - https://platform.openai.com/docs/guides/structured-outputs
// - https://json-schema.org/learn/miscellaneous-examples
func LabelsJsonSchema(nsfw bool) json.RawMessage {
if nsfw {
return LabelsNSFW
return json.RawMessage(LabelsJsonSchemaNSFW)
} else {
return LabelsDefault
return json.RawMessage(LabelsJsonSchemaDefault)
}
}
// LabelsJson returns the canonical label JSON string for Ollama vision models.
//
// Related documentation and references:
// - https://www.alibabacloud.com/help/en/model-studio/json-mode
// - https://www.json.org/json-en.html
func LabelsJson(nsfw bool) string {
if nsfw {
return LabelsJsonNSFW
} else {
return LabelsJsonDefault
}
}

View file

@ -0,0 +1,36 @@
package schema
import (
"bytes"
"encoding/json"
"fmt"
"github.com/photoprism/photoprism/pkg/clean"
)
const (
NamePrefix = "photoprism_vision"
)
// JsonSchemaName returns the schema version string to be used for API requests.
func JsonSchemaName(schema json.RawMessage, version string) string {
var schemaName string
switch {
case bytes.Contains(schema, []byte("labels")):
schemaName = "labels"
case bytes.Contains(schema, []byte("labels")):
schemaName = "caption"
default:
schemaName = "schema"
}
version = clean.TypeLowerUnderscore(version)
if version == "" {
version = "v1"
}
return fmt.Sprintf("%s_%s_%s", NamePrefix, schemaName, version)
}

View file

@ -0,0 +1,23 @@
package schema
import (
"encoding/json"
"testing"
"github.com/stretchr/testify/assert"
)
func TestJsonSchemaName(t *testing.T) {
t.Run("Default", func(t *testing.T) {
assert.Equal(t, "photoprism_vision_schema_v1", JsonSchemaName(nil, ""))
})
t.Run("Labels", func(t *testing.T) {
assert.Equal(t, "photoprism_vision_labels_v1", JsonSchemaName(json.RawMessage(LabelsJsonSchemaDefault), ""))
})
t.Run("LabelsV1", func(t *testing.T) {
assert.Equal(t, "photoprism_vision_labels_v2", JsonSchemaName([]byte("labels"), "v2"))
})
t.Run("LabelsJsonSchema", func(t *testing.T) {
assert.Equal(t, "photoprism_vision_labels_v1", JsonSchemaName(LabelsJsonSchema(false), "v1"))
})
}

View file

@ -1,5 +1,5 @@
/*
Package schema defines canonical JSON schema templates shared by PhotoPrism's AI vision engines.
Package schema defines canonical JSON and JSON Schema templates shared by PhotoPrism's AI vision engines.
Copyright (c) 2018 - 2025 PhotoPrism UG. All rights reserved.

Some files were not shown because too many files have changed in this diff Show more