Merge remote-tracking branch 'origin/develop' into PostgreSQL

2026-01-23 02:24:24 +00:00 · 2025-11-15 18:33:45 +10:00 · 2025-11-15 18:33:45 +10:00 · dbf4fd51f9
commit dbf4fd51f9
parent bce30dbf78 4e07996518
116 changed files with 26388 additions and 24020 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@ -1,6 +1,6 @@
 # PhotoPrism® Repository Guidelines

-**Last Updated:** November 11, 2025
+**Last Updated:** November 14, 2025

 ## Purpose

@ -17,6 +17,7 @@ Learn more: https://agents.md/
 - REST API: https://docs.photoprism.dev/ (Swagger), https://docs.photoprism.app/developer-guide/api/ (Docs)
 - Code Maps: [`CODEMAP.md`](CODEMAP.md) (Backend/Go), [`frontend/CODEMAP.md`](frontend/CODEMAP.md) (Frontend/JS)
 - Face Detection & Embeddings Notes: [`internal/ai/face/README.md`](internal/ai/face/README.md)
+- Vision Engine Guides: [`internal/ai/vision/openai/README.md`](internal/ai/vision/openai/README.md), [`internal/ai/vision/ollama/README.md`](internal/ai/vision/ollama/README.md)

 > Quick Tip: to inspect GitHub issue details without leaving the terminal, run `curl -s https://api.github.com/repos/photoprism/photoprism/issues/<id>`.

@ -224,6 +225,8 @@ Note: Across our public documentation, official images, and in production, the c

 - Dialogs must follow the shared focus pattern documented in `frontend/src/common/README.md`.
 - Always expose `ref="dialog"` on `<v-dialog>` overlays, call `$view.enter/leave` in `@after-enter` / `@after-leave`, and avoid positive `tabindex` values.
+- Persistent dialogs (those with the `persistent` prop) must handle Escape via `@keydown.esc.exact` so Vuetify’s default rejection animation is suppressed; keep other shortcuts on `@keyup` so inner inputs can cancel them first.
+- Global shortcuts run through `onShortCut(ev)` in `common/view.js`; it only forwards Escape and `ctrl`/`meta` combinations, so do not rely on it for arbitrary keys.
 - When a dialog opens nested menus (for example, combobox suggestion lists), ensure they work with the global trap; see the README for troubleshooting tips.

 ## Safety & Data
--- a/CODEMAP.md
+++ b/CODEMAP.md
@ -1,6 +1,6 @@
 PhotoPrism — Backend CODEMAP

-**Last Updated:** November 2, 2025
+**Last Updated:** November 14, 2025

 Purpose
 - Give agents and contributors a fast, reliable map of where things live and how they fit together, so you can add features, fix bugs, and write tests without spelunking.
@ -35,6 +35,7 @@ High-Level Package Map (Go)
 - `internal/config` — configuration, flags/env/options, client config, DB init/migrate
 - `internal/entity` — GORM v1 models, queries, search helpers, migrations
 - `internal/photoprism` — core domain logic (indexing, import, faces, thumbnails, cleanup)
+- `internal/ai/vision` — multi-engine computer vision pipeline (models, adapters, schema). Adapter docs: [`internal/ai/vision/openai/README.md`](internal/ai/vision/openai/README.md) and [`internal/ai/vision/ollama/README.md`](internal/ai/vision/ollama/README.md).
 - `internal/workers` — background schedulers (index, vision, sync, meta, backup)
 - `internal/auth` — ACL, sessions, OIDC
 - `internal/service` — cluster/portal, maps, hub, webdav
--- a/2
+++ b/2
@ -1,5 +1,5 @@
 # Ubuntu 25.10 (Questing Quokka)
-FROM photoprism/develop:251018-questing
+FROM photoprism/develop:251113-questing

 # Harden npm usage by default (applies to npm ci / install in dev container)
 ENV NPM_CONFIG_IGNORE_SCRIPTS=true
--- a/26
+++ b/26
@ -9,7 +9,7 @@ The following 3rd-party software packages may be used by or distributed with
 PhotoPrism. Any information relevant to third-party vendors listed below are
 collected using common, reasonable means.

-Date generated: 2025-11-10
+Date generated: 2025-11-12

 ================================================================================

@ -2443,8 +2443,8 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
 --------------------------------------------------------------------------------

 Package: github.com/golang/geo
-Version: v0.0.0-20251110120158-2d428c1fd7a2
-License: Apache-2.0 (https://github.com/golang/geo/blob/2d428c1fd7a2/LICENSE)
+Version: v0.0.0-20251111181513-e7f3a1a58fb3
+License: Apache-2.0 (https://github.com/golang/geo/blob/e7f3a1a58fb3/LICENSE)


                                 Apache License
@ -8188,8 +8188,8 @@ License: Apache-2.0 (https://github.com/go4org/go4/blob/214862532bf5/LICENSE)
 --------------------------------------------------------------------------------

 Package: golang.org/x/crypto
-Version: v0.43.0
-License: BSD-3-Clause (https://cs.opensource.google/go/x/crypto/+/v0.43.0:LICENSE)
+Version: v0.44.0
+License: BSD-3-Clause (https://cs.opensource.google/go/x/crypto/+/v0.44.0:LICENSE)

 Copyright 2009 The Go Authors.

@ -8222,8 +8222,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 --------------------------------------------------------------------------------

 Package: golang.org/x/image
-Version: v0.32.0
-License: BSD-3-Clause (https://cs.opensource.google/go/x/image/+/v0.32.0:LICENSE)
+Version: v0.33.0
+License: BSD-3-Clause (https://cs.opensource.google/go/x/image/+/v0.33.0:LICENSE)

 Copyright 2009 The Go Authors.

@ -8256,8 +8256,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 --------------------------------------------------------------------------------

 Package: golang.org/x/mod/semver
-Version: v0.29.0
-License: BSD-3-Clause (https://cs.opensource.google/go/x/mod/+/v0.29.0:LICENSE)
+Version: v0.30.0
+License: BSD-3-Clause (https://cs.opensource.google/go/x/mod/+/v0.30.0:LICENSE)

 Copyright 2009 The Go Authors.

@ -8290,8 +8290,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 --------------------------------------------------------------------------------

 Package: golang.org/x/net
-Version: v0.46.0
-License: BSD-3-Clause (https://cs.opensource.google/go/x/net/+/v0.46.0:LICENSE)
+Version: v0.47.0
+License: BSD-3-Clause (https://cs.opensource.google/go/x/net/+/v0.47.0:LICENSE)

 Copyright 2009 The Go Authors.

@ -8426,8 +8426,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 --------------------------------------------------------------------------------

 Package: golang.org/x/text
-Version: v0.30.0
-License: BSD-3-Clause (https://cs.opensource.google/go/x/text/+/v0.30.0:LICENSE)
+Version: v0.31.0
+License: BSD-3-Clause (https://cs.opensource.google/go/x/text/+/v0.31.0:LICENSE)

 Copyright 2009 The Go Authors.

--- a/assets/locales/el/default.po
+++ b/assets/locales/el/default.po
@ -3,8 +3,8 @@ msgstr ""
 "Project-Id-Version: PACKAGE VERSION\n"
 "Report-Msgid-Bugs-To: \n"
 "POT-Creation-Date: 2025-10-17 17:32+0000\n"
-"PO-Revision-Date: 2025-10-22 08:25+0000\n"
-"Last-Translator: DeepL <noreply-mt-deepl@weblate.org>\n"
+"PO-Revision-Date: 2025-11-12 07:40+0000\n"
+"Last-Translator: dtsolakis <dtsola@eranet.gr>\n"
 "Language-Team: none\n"
 "Language: el\n"
 "MIME-Version: 1.0\n"
@ -23,11 +23,11 @@ msgstr "Αυτό δεν είναι εφικτό"

 #: messages.go:106
 msgid "Changes could not be saved"
-msgstr "Οι αλλαγές δεν μπόρεσαν να αποθηκευτούν"
+msgstr "Οι αλλαγές δεν ήταν δυνατό να αποθηκευτούν"

 #: messages.go:107
 msgid "Could not be deleted"
-msgstr "Δεν μπόρεσε να διαγραφεί"
+msgstr "Δεν ήταν εφικτή η διαγραφή"

 #: messages.go:108
 #, c-format
@ -48,7 +48,7 @@ msgstr "Πολύ μεγάλο αρχείο"

 #: messages.go:112
 msgid "Unsupported"
-msgstr "Ανυποστήρικτος"
+msgstr "Δεν υποστηρίζεται"

 #: messages.go:113
 msgid "Unsupported type"
@ -56,11 +56,11 @@ msgstr "Μη υποστηριζόμενος τύπος"

 #: messages.go:114
 msgid "Unsupported format"
-msgstr "Μη υποστηριζόμενη μορφή"
+msgstr "Μη υποστηριζόμενος μορφότυπος"

 #: messages.go:115
 msgid "Originals folder is empty"
-msgstr "Ο φάκελος Πρωτότυπα είναι άδειος"
+msgstr "Ο φάκελος πρωτότυπων είναι άδειος"

 #: messages.go:116
 msgid "Selection not found"
@ -84,19 +84,19 @@ msgstr "Η ετικέτα δεν βρέθηκε"

 #: messages.go:121
 msgid "Album not found"
-msgstr "Η Συλλογή δεν βρέθηκε"
+msgstr "Το άλμπουμ δεν βρέθηκε"

 #: messages.go:122
 msgid "Subject not found"
-msgstr "Το Θέμα δεν βρέθηκε"
+msgstr "Το θέμα δεν βρέθηκε"

 #: messages.go:123
 msgid "Person not found"
-msgstr "Το Άτομο δεν βρέθηκε"
+msgstr "Το άτομο δεν βρέθηκε"

 #: messages.go:124
 msgid "Face not found"
-msgstr "Το Πρόσωπο δεν βρέθηκε"
+msgstr "Το πρόσωπο δεν βρέθηκε"

 #: messages.go:125
 msgid "Not available in public mode"
@ -104,7 +104,7 @@ msgstr "Μη διαθέσιμο κατά τη δημόσια λειτουργί

 #: messages.go:126
 msgid "Not available in read-only mode"
-msgstr "μη διαθέσιμο στην κατάσταση \"μόνο για ανάγνωση\""
+msgstr "Μη διαθέσιμο στην κατάσταση \"μόνο ανάγνωση\""

 #: messages.go:127
 msgid "Please log in to your account"
@ -112,7 +112,7 @@ msgstr "Παρακαλούμε συνδεθείτε και δοκιμάστε ξ

 #: messages.go:128
 msgid "Permission denied"
-msgstr "Το Άτομο διαγράφηκε"
+msgstr "Δέν δόθηκε άδεια"

 #: messages.go:129
 msgid "Payment required"
@ -120,31 +120,31 @@ msgstr "Απαιτείται πληρωμή"

 #: messages.go:130
 msgid "Upload might be offensive"
-msgstr "Η φόρτωση μπορεί να είναι προσβλητική"
+msgstr "Το ανέβασμα μπορεί να είναι προσβλητικό"

 #: messages.go:131
 msgid "Upload failed"
-msgstr "Αποτυχία αποστολής"
+msgstr "Αποτυχία ανεβάσματος"

 #: messages.go:132
 msgid "No items selected"
-msgstr "Δεν έχουν επιλεγεί αντικείμενα"
+msgstr "Δεν έχουν επιλεγεί στοιχεία"

 #: messages.go:133
 msgid "Failed creating file, please check permissions"
-msgstr "Απέτυχε η δημιουργία αρχείου, παρακαλούμε ελέγξτε τα δικαιώματα"
+msgstr "Απέτυχε η δημιουργία αρχείου, ελέγξτε τα δικαιώματα"

 #: messages.go:134
 msgid "Failed creating folder, please check permissions"
-msgstr "Απέτυχε η δημιουργία φακέλου, παρακαλούμε ελέγξτε τα δικαιώματα"
+msgstr "Απέτυχε η δημιουργία φακέλου, ελέγξτε τα δικαιώματα"

 #: messages.go:135
 msgid "Could not connect, please try again"
-msgstr "Δεν ήταν δυνατή η σύνδεση, παρακαλώ δοκιμάστε ξανά"
+msgstr "Δεν ήταν δυνατή η σύνδεση, δοκιμάστε ξανά"

 #: messages.go:136
 msgid "Enter verification code"
-msgstr "βάλτε κωδικό επιβεβαίωσης"
+msgstr "Εισάγετε τον κωδικό επαλήθευσης"

 #: messages.go:137
 msgid "Invalid verification code, please try again"
@ -152,11 +152,11 @@ msgstr "Μη έγκυρος κωδικός επαλήθευσης, δοκιμά

 #: messages.go:138
 msgid "Invalid password, please try again"
-msgstr "Μη έγκυρος κωδικός πρόσβασης, παρακαλώ δοκιμάστε ξανά"
+msgstr "Μη έγκυρος κωδικός πρόσβασης, δοκιμάστε ξανά"

 #: messages.go:139
 msgid "Feature disabled"
-msgstr "Λειτουργία απενεργοποιημένη"
+msgstr "Απενεργοποιημένη δυνατότητα"

 #: messages.go:140
 msgid "No labels selected"
@ -164,7 +164,7 @@ msgstr "Δεν έχουν επιλεγεί ετικέτες"

 #: messages.go:141
 msgid "No albums selected"
-msgstr "Δεν έχουν επιλεγεί συλλογές"
+msgstr "Δεν έχουν επιλεγεί άλμπουμ"

 #: messages.go:142
 msgid "No files available for download"
@ -188,7 +188,7 @@ msgstr "Μη έγκυρο όνομα"

 #: messages.go:147
 msgid "Busy, please try again later"
-msgstr "Απασχολημένος, προσπαθήστε ξανά αργότερα"
+msgstr "Το σύστημα είναι απασχολημένο, προσπαθήστε ξανά αργότερα"

 #: messages.go:148
 #, c-format
@ -197,7 +197,7 @@ msgstr "Το διάστημα αφύπνισης είναι %s, αλλά πρέ

 #: messages.go:149
 msgid "Your account could not be connected"
-msgstr "Ο λογαριασμός σας δεν μπόρεσε να συνδεθεί"
+msgstr "Ο λογαριασμός σας δεν ήταν δυνατό να συνδεθεί"

 #: messages.go:150
 msgid "Too many requests"
@ -205,11 +205,11 @@ msgstr "Πάρα πολλά αιτήματα"

 #: messages.go:151
 msgid "Insufficient storage"
-msgstr "Ανεπαρκής αποθήκευση"
+msgstr "Ανεπαρκής χώρος"

 #: messages.go:152
 msgid "Quota exceeded"
-msgstr "Υπέρβαση ποσόστωσης"
+msgstr "Υπέρβαση ορίου"

 #: messages.go:155
 msgid "Changes successfully saved"
@ -217,20 +217,20 @@ msgstr "Οι αλλαγές αποθηκεύτηκαν επιτυχώς"

 #: messages.go:156
 msgid "Album created"
-msgstr "Η Συλλογή δημιουργήθηκε"
+msgstr "Το άλμπουμ δημιουργήθηκε"

 #: messages.go:157
 msgid "Album saved"
-msgstr "Η Συλλογή αποθηκεύθηκε"
+msgstr "Το άλμπουμ αποθηκεύθηκε"

 #: messages.go:158
 #, c-format
 msgid "Album %s deleted"
-msgstr "Η Συλλογή %s διαγράφηκε"
+msgstr "Το άλμπουμ %s διαγράφηκε"

 #: messages.go:159
 msgid "Album contents cloned"
-msgstr "Τα περιεχόμενα της Συλλογής αντιγράφηκαν"
+msgstr "Τα περιεχόμενα του άλμπουμ αντιγράφηκαν"

 #: messages.go:160
 msgid "File removed from stack"
@ -267,15 +267,15 @@ msgstr "%d καταχωρήσεις αφαιρέθηκαν από %s"

 #: messages.go:167
 msgid "Account created"
-msgstr "Ο Λογαριασμός δημιουργήθηκε"
+msgstr "Ο λογαριασμός δημιουργήθηκε"

 #: messages.go:168
 msgid "Account saved"
-msgstr "Ο Λογαριασμός αποθηκεύθηκε"
+msgstr "Ο λογαριασμός αποθηκεύθηκε"

 #: messages.go:169
 msgid "Account deleted"
-msgstr "Ο Λογαριασμός διαγράφηκε"
+msgstr "Ο λογαριασμός διαγράφηκε"

 #: messages.go:170
 msgid "Settings saved"
@ -297,7 +297,7 @@ msgstr "Η εισαγωγή ακυρώθηκε"
 #: messages.go:174
 #, c-format
 msgid "Indexing completed in %d s"
-msgstr "Η δημιουργία ευρετηρίου σε %d s"
+msgstr "Η ευρετηρίαση ολοκληρώθηκε σε %d s"

 #: messages.go:175
 msgid "Indexing originals..."
@ -329,27 +329,27 @@ msgstr "Αντιγραφή αρχείων από %s"

 #: messages.go:181
 msgid "Labels deleted"
-msgstr "Οι Ετικέτες διαγράφηκαν"
+msgstr "Οι ετικέτες διαγράφηκαν"

 #: messages.go:182
 msgid "Label saved"
-msgstr "Η Ετικέτα αποθηκεύτηκε"
+msgstr "Η ετικέτα αποθηκεύτηκε"

 #: messages.go:183
 msgid "Subject saved"
-msgstr "Το Θέμα αποθηκεύθηκε"
+msgstr "Το θέμα αποθηκεύθηκε"

 #: messages.go:184
 msgid "Subject deleted"
-msgstr "Το Θέμα διαγράφηκε"
+msgstr "Το θέμα διαγράφηκε"

 #: messages.go:185
 msgid "Person saved"
-msgstr "Το Άτομο αποθηκεύθηκε"
+msgstr "Το άτομο αποθηκεύθηκε"

 #: messages.go:186
 msgid "Person deleted"
-msgstr "Το Άτομο διαγράφηκε"
+msgstr "Το άτομο διαγράφηκε"

 #: messages.go:187
 msgid "File uploaded"
@ -358,15 +358,15 @@ msgstr "Το αρχείο διαγράφηκε"
 #: messages.go:188
 #, c-format
 msgid "%d files uploaded in %d s"
-msgstr "%d αρχεία μεταφορτώθηκαν σε %d s"
+msgstr "%d αρχεία ανεβάστηκαν σε %d s"

 #: messages.go:189
 msgid "Processing upload..."
-msgstr "Επεξεργασία μεταφόρτωσης..."
+msgstr "Επεξεργασία ανεβάσματος..."

 #: messages.go:190
 msgid "Upload has been processed"
-msgstr "Η φόρτωση έχει ολοκληρωθεί"
+msgstr "Το ανέβασμα έχει ολοκληρωθεί"

 #: messages.go:191
 msgid "Selection approved"
@ -382,16 +382,16 @@ msgstr "Η επιλογή αποκαταστάθηκε"

 #: messages.go:194
 msgid "Selection marked as private"
-msgstr "Η επιλογή χαρακτηρίστηκε ως ιδιωτική"
+msgstr "Η επιλογή μαρκαρίστηκε ως ιδιωτική"

 #: messages.go:195
 msgid "Albums deleted"
-msgstr "Οι Συλλογές διαγράφηκαν"
+msgstr "Διαγραμμένα άλμπουμ"

 #: messages.go:196
 #, c-format
 msgid "Zip created in %d s"
-msgstr "Το αρχείο συμπίεσης δημιουργήθηκε σε %d s"
+msgstr "Το αρχείο zip δημιουργήθηκε σε %d s"

 #: messages.go:197
 msgid "Permanently deleted"
@ -404,11 +404,11 @@ msgstr "%s έχει αποκατασταθεί"

 #: messages.go:199
 msgid "Successfully verified"
-msgstr "Επαληθεύτηκε με επιτυχία"
+msgstr "Επιτυχής επαλήθευση"

 #: messages.go:200
 msgid "Successfully activated"
-msgstr "Ενεργοποιήθηκε με επιτυχία"
+msgstr "Επιτυχής ενεργοποίηση"

 #~ msgid "Storage is full"
 #~ msgstr "Ο αποθηκευτικός χώρος είναι γεμάτος"
--- a/compose.yaml
+++ b/compose.yaml
@ -410,7 +410,8 @@ services:
  ## Login with "user / photoprism" and "admin / photoprism".
  keycloak:
    image: quay.io/keycloak/keycloak:25.0
-    stop_grace_period: 30s
+    stop_grace_period: 20s
+    profiles: [ "all", "auth", "keycloak" ]
    command: "start-dev" # development mode, do not use this in production!
    links:
      - "traefik:localssl.dev"
--- a/frontend/CODEMAP.md
+++ b/frontend/CODEMAP.md
@ -1,6 +1,6 @@
 PhotoPrism — Frontend CODEMAP

-**Last Updated:** October 13, 2025
+**Last Updated:** November 12, 2025

 Purpose
 - Help agents and contributors navigate the Vue 3 + Vuetify 3 app quickly and make safe changes.
@ -107,6 +107,10 @@ Common How‑Tos
  - Compute `key` from route + filter params and cap eager loads with `Rest.restoreCap(Model.batchSize())` (defaults to 10× the batch size).
  - Check `$view.wasBackwardNavigation()` when deciding whether to reuse stored state; `src/app.js` wires the router guards that keep the history direction in sync so no globals like `window.backwardsNavigationDetected` are needed.

+- Handle dialog shortcuts
+  - Persistent dialogs (`persistent` prop) must listen for Escape on `@keydown.esc.exact` to override Vuetify’s rejection animation; keep Enter and other actions on `@keyup` so child inputs can intercept them first.
+  - Global shortcuts go through `onShortCut(ev)` in `common/view.js`. It only forwards Escape and `ctrl`/`meta` combinations, so do not depend on it for plain character keys.
+
 Conventions & Safety
 - Avoid `v-html`; use `v-sanitize` or `$util.sanitizeHtml()` (build enforces this)
 - Keep big components lazy if needed; split views logically under `src/page`
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@ -20,7 +20,7 @@
        "@mdi/font": "^7.4.47",
        "@testing-library/jest-dom": "^6.9.1",
        "@testing-library/react": "^16.3.0",
-        "@vitejs/plugin-react": "^5.1.0",
+        "@vitejs/plugin-react": "^5.1.1",
        "@vitejs/plugin-vue": "^6.0.1",
        "@vitest/browser": "^3.2.4",
        "@vitest/coverage-v8": "^3.2.4",
@ -34,7 +34,7 @@
        "babel-loader": "^10.0.0",
        "babel-plugin-istanbul": "^7.0.1",
        "babel-plugin-polyfill-corejs3": "^0.13.0",
-        "browserslist": "^4.27.0",
+        "browserslist": "^4.28.0",
        "cheerio": "1.0.0-rc.12",
        "core-js": "^3.46.0",
        "cross-env": "^7.0.3",
@ -79,7 +79,7 @@
        "regenerator-runtime": "^0.14.1",
        "resolve-url-loader": "^5.0.0",
        "sanitize-html": "^2.17.0",
-        "sass": "^1.93.3",
+        "sass": "^1.94.0",
        "sass-loader": "^16.0.6",
        "sockette": "^2.0.6",
        "style-loader": "^4.0.0",
@ -98,7 +98,7 @@
        "vue-sanitize-directive": "^0.2.1",
        "vue-style-loader": "^4.1.3",
        "vue3-gettext": "^2.4.0",
-        "vuetify": "^3.10.9",
+        "vuetify": "^3.10.10",
        "webpack": "^5.102.1",
        "webpack-bundle-analyzer": "^4.10.2",
        "webpack-cli": "^6.0.1",
@ -4301,9 +4301,9 @@
      "license": "MIT"
    },
    "node_modules/@rolldown/pluginutils": {
-      "version": "1.0.0-beta.43",
-      "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.43.tgz",
-      "integrity": "sha512-5Uxg7fQUCmfhax7FJke2+8B6cqgeUJUD9o2uXIKXhD+mG0mL6NObmVoi9wXEU1tY89mZKgAYA6fTbftx3q2ZPQ==",
+      "version": "1.0.0-beta.47",
+      "resolved": "https://registry.npmjs.org/@rolldown/pluginutils/-/pluginutils-1.0.0-beta.47.tgz",
+      "integrity": "sha512-8QagwMH3kNCuzD8EWL8R2YPW5e4OrHNSAHRFDdmFqEwEaD/KcNKjVoumo+gP2vW5eKB2UPbM6vTYiGZX0ixLnw==",
      "license": "MIT"
    },
    "node_modules/@rollup/plugin-node-resolve": {
@ -4941,9 +4941,9 @@
      "license": "MIT"
    },
    "node_modules/@types/node": {
-      "version": "24.10.0",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.0.tgz",
-      "integrity": "sha512-qzQZRBqkFsYyaSWXuEHc2WR9c0a0CXwiE5FWUvn7ZM+vdy1uZLfCunD38UzhuB7YN/J11ndbDBcTmOdxJo9Q7A==",
+      "version": "24.10.1",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-24.10.1.tgz",
+      "integrity": "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ==",
      "license": "MIT",
      "dependencies": {
        "undici-types": "~7.16.0"
@ -4998,15 +4998,15 @@
      "license": "MIT"
    },
    "node_modules/@vitejs/plugin-react": {
-      "version": "5.1.0",
-      "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-5.1.0.tgz",
-      "integrity": "sha512-4LuWrg7EKWgQaMJfnN+wcmbAW+VSsCmqGohftWjuct47bv8uE4n/nPpq4XjJPsxgq00GGG5J8dvBczp8uxScew==",
+      "version": "5.1.1",
+      "resolved": "https://registry.npmjs.org/@vitejs/plugin-react/-/plugin-react-5.1.1.tgz",
+      "integrity": "sha512-WQfkSw0QbQ5aJ2CHYw23ZGkqnRwqKHD/KYsMeTkZzPT4Jcf0DcBxBtwMJxnu6E7oxw5+JC6ZAiePgh28uJ1HBA==",
      "license": "MIT",
      "dependencies": {
-        "@babel/core": "^7.28.4",
+        "@babel/core": "^7.28.5",
        "@babel/plugin-transform-react-jsx-self": "^7.27.1",
        "@babel/plugin-transform-react-jsx-source": "^7.27.1",
-        "@rolldown/pluginutils": "1.0.0-beta.43",
+        "@rolldown/pluginutils": "1.0.0-beta.47",
        "@types/babel__core": "^7.20.5",
        "react-refresh": "^0.18.0"
      },
@ -6157,9 +6157,9 @@
      }
    },
    "node_modules/autoprefixer": {
-      "version": "10.4.21",
-      "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.21.tgz",
-      "integrity": "sha512-O+A6LWV5LDHSJD3LjHYoNi4VLsj/Whi7k6zG12xTYaU4cQ8oxQGckXNX8cRHK5yOZ/ppVHe0ZBXGzSV9jXdVbQ==",
+      "version": "10.4.22",
+      "resolved": "https://registry.npmjs.org/autoprefixer/-/autoprefixer-10.4.22.tgz",
+      "integrity": "sha512-ARe0v/t9gO28Bznv6GgqARmVqcWOV3mfgUPn9becPHMiD3o9BwlRgaeccZnwTpZ7Zwqrm+c1sUSsMxIzQzc8Xg==",
      "funding": [
        {
          "type": "opencollective",
@ -6176,9 +6176,9 @@
      ],
      "license": "MIT",
      "dependencies": {
-        "browserslist": "^4.24.4",
-        "caniuse-lite": "^1.0.30001702",
-        "fraction.js": "^4.3.7",
+        "browserslist": "^4.27.0",
+        "caniuse-lite": "^1.0.30001754",
+        "fraction.js": "^5.3.4",
        "normalize-range": "^0.1.2",
        "picocolors": "^1.1.1",
        "postcss-value-parser": "^4.2.0"
@ -6328,9 +6328,9 @@
      "license": "MIT"
    },
    "node_modules/baseline-browser-mapping": {
-      "version": "2.8.25",
-      "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.25.tgz",
-      "integrity": "sha512-2NovHVesVF5TXefsGX1yzx1xgr7+m9JQenvz6FQY3qd+YXkKkYiv+vTCc7OriP9mcDZpTC5mAOYN4ocd29+erA==",
+      "version": "2.8.27",
+      "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.27.tgz",
+      "integrity": "sha512-2CXFpkjVnY2FT+B6GrSYxzYf65BJWEqz5tIRHCvNsZZ2F3CmsCB37h8SpYgKG7y9C4YAeTipIPWG7EmFmhAeXA==",
      "license": "Apache-2.0",
      "bin": {
        "baseline-browser-mapping": "dist/cli.js"
@ -6387,9 +6387,9 @@
      }
    },
    "node_modules/browserslist": {
-      "version": "4.27.0",
-      "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.27.0.tgz",
-      "integrity": "sha512-AXVQwdhot1eqLihwasPElhX2tAZiBjWdJ9i/Zcj2S6QYIjkx62OKSfnobkriB81C3l4w0rVy3Nt4jaTBltYEpw==",
+      "version": "4.28.0",
+      "resolved": "https://registry.npmjs.org/browserslist/-/browserslist-4.28.0.tgz",
+      "integrity": "sha512-tbydkR/CxfMwelN0vwdP/pLkDwyAASZ+VfWm4EOwlB6SWhx1sYnWLqo8N5j0rAzPfzfRaxt0mM/4wPU/Su84RQ==",
      "funding": [
        {
          "type": "opencollective",
@ -6407,10 +6407,10 @@
      "license": "MIT",
      "peer": true,
      "dependencies": {
-        "baseline-browser-mapping": "^2.8.19",
-        "caniuse-lite": "^1.0.30001751",
-        "electron-to-chromium": "^1.5.238",
-        "node-releases": "^2.0.26",
+        "baseline-browser-mapping": "^2.8.25",
+        "caniuse-lite": "^1.0.30001754",
+        "electron-to-chromium": "^1.5.249",
+        "node-releases": "^2.0.27",
        "update-browserslist-db": "^1.1.4"
      },
      "bin": {
@ -7690,9 +7690,9 @@
      }
    },
    "node_modules/electron-to-chromium": {
-      "version": "1.5.249",
-      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.249.tgz",
-      "integrity": "sha512-5vcfL3BBe++qZ5kuFhD/p8WOM1N9m3nwvJPULJx+4xf2usSlZFJ0qoNYO2fOX4hi3ocuDcmDobtA+5SFr4OmBg==",
+      "version": "1.5.250",
+      "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.250.tgz",
+      "integrity": "sha512-/5UMj9IiGDMOFBnN4i7/Ry5onJrAGSbOGo3s9FEKmwobGq6xw832ccET0CE3CkkMBZ8GJSlUIesZofpyurqDXw==",
      "license": "ISC"
    },
    "node_modules/emmet": {
@ -8895,15 +8895,15 @@
      }
    },
    "node_modules/fraction.js": {
-      "version": "4.3.7",
-      "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-4.3.7.tgz",
-      "integrity": "sha512-ZsDfxO51wGAXREY55a7la9LScWpwv9RxIrYABrlvOFBlH/ShPnrtsXeuUIfXKKOVicNxQ+o8JTbJvjS4M89yew==",
+      "version": "5.3.4",
+      "resolved": "https://registry.npmjs.org/fraction.js/-/fraction.js-5.3.4.tgz",
+      "integrity": "sha512-1X1NTtiJphryn/uLQz3whtY6jK3fTqoE3ohKs0tT+Ujr1W59oopxmoEh7Lu5p6vBaPbgoM0bzveAW4Qi5RyWDQ==",
      "license": "MIT",
      "engines": {
        "node": "*"
      },
      "funding": {
-        "type": "patreon",
+        "type": "github",
        "url": "https://github.com/sponsors/rawify"
      }
    },
@ -10487,9 +10487,9 @@
      "license": "MIT"
    },
    "node_modules/js-yaml": {
-      "version": "4.1.0",
-      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
-      "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz",
+      "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==",
      "license": "MIT",
      "dependencies": {
        "argparse": "^2.0.1"
@ -14039,9 +14039,9 @@
      }
    },
    "node_modules/sass": {
-      "version": "1.93.3",
-      "resolved": "https://registry.npmjs.org/sass/-/sass-1.93.3.tgz",
-      "integrity": "sha512-elOcIZRTM76dvxNAjqYrucTSI0teAF/L2Lv0s6f6b7FOwcwIuA357bIE871580AjHJuSvLIRUosgV+lIWx6Rgg==",
+      "version": "1.94.0",
+      "resolved": "https://registry.npmjs.org/sass/-/sass-1.94.0.tgz",
+      "integrity": "sha512-Dqh7SiYcaFtdv5Wvku6QgS5IGPm281L+ZtVD1U2FJa7Q0EFRlq8Z3sjYtz6gYObsYThUOz9ArwFqPZx+1azILQ==",
      "license": "MIT",
      "peer": true,
      "dependencies": {
@ -16338,9 +16338,9 @@
      }
    },
    "node_modules/vuetify": {
-      "version": "3.10.9",
-      "resolved": "https://registry.npmjs.org/vuetify/-/vuetify-3.10.9.tgz",
-      "integrity": "sha512-hVeBkkSJhbhbKOIfMQTmTJ1R1S+ZHp7k3r0wWwA28eUe3ulD/uY6nrF/AZ3ZIh+WRHiDqipWmG1RrrZGUtmZOg==",
+      "version": "3.10.10",
+      "resolved": "https://registry.npmjs.org/vuetify/-/vuetify-3.10.10.tgz",
+      "integrity": "sha512-4RRQrJCaiWRalciBVpIKuZmPlfGUGwJalXuca8nHVNTDKJq4LHYNLcIKEbfdyP/6VBiWG4jZPJDTmC0dpXu+sA==",
      "license": "MIT",
      "peer": true,
      "funding": {
@ -16790,9 +16790,9 @@
      }
    },
    "node_modules/webpack-plugin-vuetify/node_modules/yocto-queue": {
-      "version": "1.2.1",
-      "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.1.tgz",
-      "integrity": "sha512-AyeEbWOu/TAXdxlV9wmGcR0+yh2j3vYPGOECcIj2S7MkrLyC7ne+oye2BKTItt0ii2PHk4cDy+95+LshzbXnGg==",
+      "version": "1.2.2",
+      "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-1.2.2.tgz",
+      "integrity": "sha512-4LCcse/U2MHZ63HAJVE+v71o7yOdIe4cZ70Wpf8D/IyjDKYQLV5GD46B+hSTjJsvV5PztjvHoU580EftxjDZFQ==",
      "license": "MIT",
      "engines": {
        "node": ">=12.20"
--- a/frontend/package.json
+++ b/frontend/package.json
@ -44,7 +44,7 @@
    "@mdi/font": "^7.4.47",
    "@testing-library/jest-dom": "^6.9.1",
    "@testing-library/react": "^16.3.0",
-    "@vitejs/plugin-react": "^5.1.0",
+    "@vitejs/plugin-react": "^5.1.1",
    "@vitejs/plugin-vue": "^6.0.1",
    "@vitest/browser": "^3.2.4",
    "@vitest/coverage-v8": "^3.2.4",
@ -58,7 +58,7 @@
    "babel-loader": "^10.0.0",
    "babel-plugin-istanbul": "^7.0.1",
    "babel-plugin-polyfill-corejs3": "^0.13.0",
-    "browserslist": "^4.27.0",
+    "browserslist": "^4.28.0",
    "cheerio": "1.0.0-rc.12",
    "core-js": "^3.46.0",
    "cross-env": "^7.0.3",
@ -103,7 +103,7 @@
    "regenerator-runtime": "^0.14.1",
    "resolve-url-loader": "^5.0.0",
    "sanitize-html": "^2.17.0",
-    "sass": "^1.93.3",
+    "sass": "^1.94.0",
    "sass-loader": "^16.0.6",
    "sockette": "^2.0.6",
    "style-loader": "^4.0.0",
@ -122,7 +122,7 @@
    "vue-sanitize-directive": "^0.2.1",
    "vue-style-loader": "^4.1.3",
    "vue3-gettext": "^2.4.0",
-    "vuetify": "^3.10.9",
+    "vuetify": "^3.10.10",
    "webpack": "^5.102.1",
    "webpack-bundle-analyzer": "^4.10.2",
    "webpack-cli": "^6.0.1",
--- a/frontend/src/common/README.md
+++ b/frontend/src/common/README.md
@ -1,10 +1,10 @@
 # View Helper Guidelines

-**Last Updated:** November 11, 2025
+**Last Updated:** November 12, 2025

 ## Focus Management

-PhotoPrism maintains predictable keyboard focus across pages and dialogs by using a shared view helper:
+PhotoPrism uses a shared view helper to maintain predictable focus across pages and dialogs:

 - [`frontend/src/common/view.js`](https://github.com/photoprism/photoprism/blob/develop/frontend/src/common/view.js)

@ -64,10 +64,12 @@ Vuetify dialogs are teleported to the overlay container, so consistent refs and

   ```vue
   <v-card-actions class="action-buttons">
-     <v-btn variant="flat" color="button" class="action-cancel" @click.stop="close">
+     <v-btn variant="flat" color="button"
+         class="action-cancel" @click.stop="close">
       {{ $gettext(`Cancel`) }}
     </v-btn>
-     <v-btn variant="flat" color="highlight" class="action-confirm" @click.stop="confirm">
+     <v-btn variant="flat" color="highlight"
+         class="action-confirm" @click.stop="confirm">
       {{ $gettext(`Delete`) }}
     </v-btn>
   </v-card-actions>
@ -79,6 +81,72 @@ Vuetify dialogs are teleported to the overlay container, so consistent refs and

   Only add local `@focusout` handlers if a dialog needs custom behaviour. If you do, always call `ev.preventDefault()` when you redirect focus so you do not fight the global handler.

+### Keyboard Event Handling
+
+Dialogs and page shells often react to keyboard shortcuts (Escape to close, Enter to confirm, etc.). To keep those handlers compatible with text inputs and other interactive children:
+
+- Attach listeners to the focusable container that the view helper manages – the page wrapper with `tabindex="-1"` or the dialog root (`<v-dialog ref="dialog">`).
+- Prefer `@keyup` (for example, `@keyup.enter.exact="confirm"`) so elements inside the container receive `keydown` events first and can call `event.stopPropagation()` when they need to keep the key (such as pressing Enter inside a form field).
+- **Persistent dialogs (`persistent` attribute)** must handle the Escape key with `@keydown.esc.exact="close"`. Vuetify’s built-in Escape handler plays a “rejection” shake animation when the dialog refuses to close; attaching a direct keydown listener overrides the built-in handler and suppresses the animation while still allowing inner inputs to cancel the event.
+- Combine modifiers like `.exact` and `.stop` intentionally. Use `.stop` only when the handler fully resolves the action; otherwise allow events to bubble to ancestor traps.
+- If a component must react on `keydown`, scope the listener to the specific control instead of the container, and document why the early trigger is required.
+- When emitting from reusable components, forward the native event (`close(event)`) so parents can inspect `event.defaultPrevented` or `event.key` before acting.
+
+Note: To override Vuetify’s built-in `<v-dialog>` Escape handler (and stop the “rejection” animation on persistent dialogs), attach a direct `@keydown.esc.exact="close"` listener; the global `onShortCut(ev)` hook is not sufficient on its own.
+
+Example dialog wiring:
+
+```vue
+<v-dialog
+  ref="dialog"
+  persistent
+  @keydown.esc.exact="close"
+  @keyup.enter.exact="confirm"
+>
+  <v-card ref="content" tabindex="-1">
+    <!-- dialog body -->
+  </v-card>
+</v-dialog>
+```
+
+Example page container:
+
+```vue
+<template>
+  <div class="p-page p-settings" tabindex="-1" @keyup.esc.exact="maybeClose">
+    <!-- page content -->
+  </div>
+</template>
+```
+
+Both snippets allow focused inputs to veto shortcuts by calling `event.stopPropagation()` or `event.preventDefault()` before the key reaches the container listener, keeping focus management predictable across the app.
+
+#### Global Shortcut Forwarding
+
+`common/view.js` registers a single `keydown` listener that forwards shortcut keys to the active component:
+
+```js
+// onKeyDown forwards global shortcuts (Escape, Ctrl/⌘ combos)
+// to the active component when supported.
+onKeyDown(ev) {
+  if (!this.current || !ev || !(ev instanceof KeyboardEvent) || !ev.code) {
+    return;
+  } else if (!ev.ctrlKey && !ev.metaKey && ev.code !== "Escape") {
+    return;
+  } else if (typeof this.current?.onShortCut !== "function") {
+    return;
+  }
+
+  if (this.current.onShortCut(ev)) {
+    ev.preventDefault();
+  }
+}
+```
+
+- Implement `onShortCut(ev)` on pages or dialogs when you need to react to Ctrl / ⌘ combinations or global Escape handling. The helper only forwards events where `ev.ctrlKey` or `ev.metaKey` is `true`, or the Escape key is pressed, so it cannot be repurposed for arbitrary keys.
+- Persistent dialogs that must suppress Vuetify’s rejection animation should still attach a direct `@keydown.esc.exact` handler; `onShortCut(ev)` alone does not override the built-in dialog behaviour.
+- Return `true` from `onShortCut(ev)` after handling a shortcut to signal `preventDefault()`. Return `false` to fall back to the browser’s native behaviour.
+
 ### Example: Delete Confirmation Dialog

 ```vue
@ -90,19 +158,22 @@ Vuetify dialogs are teleported to the overlay container, so consistent refs and
    max-width="350"
    class="p-dialog p-file-delete-dialog"
    @keydown.esc.exact="close"
+    @keyup.enter.exact="confirm"
    @after-enter="afterEnter"
    @after-leave="afterLeave"
  >
    <v-card ref="content" tabindex="-1">
      <v-card-title class="d-flex justify-start align-center ga-3">
        <v-icon icon="mdi-delete-outline" size="54" color="primary"></v-icon>
-        <p class="text-subtitle-1">{{ $gettext(`Are you sure you want to permanently delete this file?`) }}</p>
+        <p class="text-subtitle-1">{{ $gettext(`Are you sure?`) }}</p>
      </v-card-title>
      <v-card-actions class="action-buttons mt-1">
-        <v-btn variant="flat" color="button" class="action-cancel" @click.stop="close">
+        <v-btn variant="flat" color="button"
+               class="action-cancel" @click.stop="close">
          {{ $gettext(`Cancel`) }}
        </v-btn>
-        <v-btn color="highlight" variant="flat" class="action-confirm" @click.stop="confirm">
+        <v-btn color="highlight" variant="flat"
+               class="action-confirm" @click.stop="confirm">
          {{ $gettext(`Delete`) }}
        </v-btn>
      </v-card-actions>
--- a/frontend/src/common/view.js
+++ b/frontend/src/common/view.js
@ -222,16 +222,6 @@ function resolveFocusTarget(root) {
    if (sentinel instanceof HTMLElement) {
      return sentinel;
    }
-
-    if (!window.$isMobile) {
-      const focusable = el.querySelector(
-        'input:not([type="hidden"]), select, textarea, button, [tabindex]:not([tabindex="-1"])'
-      );
-
-      if (focusable instanceof HTMLElement) {
-        return focusable;
-      }
-    }
  } catch {
    // Ignore.
  }
@ -294,17 +284,17 @@ export function findFocusElement(c) {
  if (c.$refs && c.$refs instanceof Object) {
    focusRefs.forEach((r) => {
      if (c.$refs[r]) {
-        candidates.push(c.$refs[r]);
+        const el = getHTMLElement(c.$refs[r]);
+        if (el) {
+          candidates.push(el);
+        }
      }
    });
  }

-  if (c.$el) {
-    candidates.push(c.$el);
-  }
-
-  if (c.$el?.parentElement) {
-    candidates.push(c.$el.parentElement);
+  const el = getHTMLElement(c);
+  if (el) {
+    candidates.push(el);
  }

  for (let i = 0; i < candidates.length; i++) {
--- a/frontend/src/component/confirm/dialog.vue
+++ b/frontend/src/component/confirm/dialog.vue
@ -8,7 +8,7 @@
    scrim
    max-width="360"
    class="p-dialog p-confirm-dialog"
-    @keyup.esc.exact="close"
+    @keydown.esc.exact="close"
    @keyup.enter.exact="confirm"
    @after-enter="afterEnter"
    @after-leave="afterLeave"
--- a/frontend/src/component/label/edit/dialog.vue
+++ b/frontend/src/component/label/edit/dialog.vue
@ -7,6 +7,7 @@
    class="p-dialog dialog-label-edit"
    color="background"
    @keydown.esc.exact="close"
+    @keyup.enter.exact="confirm"
    @after-enter="afterEnter"
    @after-leave="afterLeave"
  >
@ -34,7 +35,6 @@
                :label="$gettext('Name')"
                :disabled="disabled"
                class="input-title"
-                @keyup.enter="confirm"
              ></v-text-field>
            </v-col>
            <v-col sm="4">
--- a/frontend/src/component/lightbox.vue
+++ b/frontend/src/component/lightbox.vue
@ -17,7 +17,7 @@
    @keydown.space.exact="onKeyDown"
    @keydown.left.exact="onKeyDown"
    @keydown.right.exact="onKeyDown"
-    @keydown.esc.stop="close"
+    @keydown.esc.exact.stop="close"
    @click.capture="captureDialogClick"
    @pointerdown.capture="captureDialogPointerDown"
  >
--- a/frontend/src/component/location/dialog.vue
+++ b/frontend/src/component/location/dialog.vue
@ -8,11 +8,11 @@
    scrim
    scrollable
    class="p-location-dialog"
-    @keydown.esc="close"
+    @keydown.esc.exact="close"
    @after-enter="afterEnter"
    @after-leave="afterLeave"
  >
-    <v-card :tile="$vuetify.display.xs">
+    <v-card ref="content" tabindex="-1" :tile="$vuetify.display.xs">
      <v-toolbar v-if="$vuetify.display.xs" flat color="navigation" class="mb-4" density="compact">
        <v-btn icon @click.stop="close">
          <v-icon>mdi-close</v-icon>
@ -193,6 +193,18 @@ export default {
    },
  },
  methods: {
+    afterEnter() {
+      this.$view.enter(this);
+      if (this.currentLat && this.currentLng && !(this.currentLat === 0 && this.currentLng === 0)) {
+        this.fetchLocationInfo(this.currentLat, this.currentLng);
+      }
+    },
+    afterLeave() {
+      this.location = null;
+      this.locationLoading = false;
+      this.resetSearchState();
+      this.$view.leave(this);
+    },
    close() {
      this.$emit("close");
    },
@ -206,16 +218,6 @@ export default {
        });
      }
    },
-    afterEnter() {
-      if (this.currentLat && this.currentLng && !(this.currentLat === 0 && this.currentLng === 0)) {
-        this.fetchLocationInfo(this.currentLat, this.currentLng);
-      }
-    },
-    afterLeave() {
-      this.location = null;
-      this.locationLoading = false;
-      this.resetSearchState();
-    },
    onMarkerMoved(event) {
      this.setPositionAndFetchInfo(event.lat, event.lng);
    },
--- a/frontend/src/component/location/input.vue
+++ b/frontend/src/component/location/input.vue
@ -12,7 +12,7 @@
    autocorrect="off"
    autocapitalize="none"
    class="input-coordinates"
-    @keydown.enter="applyCoordinates"
+    @keydown.enter.stop="applyCoordinates"
    @update:model-value="onCoordinateInputChange"
    @paste="pastePosition"
  >
--- a/frontend/src/component/people/edit/dialog.vue
+++ b/frontend/src/component/people/edit/dialog.vue
@ -7,6 +7,7 @@
    class="dialog-person-edit"
    color="background"
    @keydown.esc.exact="close"
+    @keyup.enter.exact="confirm"
    @after-enter="afterEnter"
    @after-leave="afterLeave"
  >
@ -34,7 +35,6 @@
                :label="$gettext('Name')"
                :disabled="disabled"
                class="input-title"
-                @keyup.enter="confirm"
              ></v-text-field>
            </v-col>
            <v-col sm="4">
--- a/frontend/src/component/people/merge/dialog.vue
+++ b/frontend/src/component/people/merge/dialog.vue
@ -6,6 +6,8 @@
    max-width="350"
    class="p-dialog p-people-merge-dialog"
    @keydown.esc.exact="close"
+    @after-enter="afterEnter"
+    @after-leave="afterLeave"
  >
    <v-card>
      <v-card-title class="d-flex justify-start align-center ga-3">
@ -42,6 +44,7 @@ export default {
      default: new Subject(),
    },
  },
+  emits: ["close", "confirm"],
  data() {
    return {};
  },
@ -58,6 +61,12 @@ export default {
    },
  },
  methods: {
+    afterEnter() {
+      this.$view.enter(this);
+    },
+    afterLeave() {
+      this.$view.leave(this);
+    },
    close() {
      this.$emit("close");
    },
--- a/frontend/src/component/photo/edit/dialog.vue
+++ b/frontend/src/component/photo/edit/dialog.vue
@ -1,6 +1,7 @@
 <template>
  <v-dialog
    ref="dialog"
+    tabindex="-1"
    :model-value="visible"
    :fullscreen="$vuetify.display.smAndDown"
    scrim
@ -11,7 +12,7 @@
    @after-leave="afterLeave"
    @keydown.left.exact="onKeyLeft"
    @keydown.right.exact="onKeyRight"
-    @keydown.esc.stop="onClose"
+    @keydown.esc.exact.stop="onClose"
  >
    <v-card ref="content" tabindex="-1" :tile="$vuetify.display.smAndDown">
      <v-toolbar flat color="navigation" :density="$vuetify.display.smAndDown ? 'compact' : 'comfortable'">
@ -195,7 +196,7 @@ export default {
  },
  methods: {
    afterEnter() {
-      this.$view.enter(this);
+      this.$view.enter(this, this.$refs.content);
      this.ready = true;
    },
    afterLeave() {
--- a/frontend/src/locales/af.po
+++ b/frontend/src/locales/af.po
--- a/frontend/src/locales/ar.po
+++ b/frontend/src/locales/ar.po
--- a/frontend/src/locales/be.po
+++ b/frontend/src/locales/be.po
--- a/frontend/src/locales/bg.po
+++ b/frontend/src/locales/bg.po
--- a/frontend/src/locales/ca.po
+++ b/frontend/src/locales/ca.po
--- a/frontend/src/locales/cs.po
+++ b/frontend/src/locales/cs.po
--- a/frontend/src/locales/da.po
+++ b/frontend/src/locales/da.po
--- a/frontend/src/locales/de.po
+++ b/frontend/src/locales/de.po
--- a/frontend/src/locales/el.po
+++ b/frontend/src/locales/el.po
--- a/frontend/src/locales/en.po
+++ b/frontend/src/locales/en.po
--- a/frontend/src/locales/es.po
+++ b/frontend/src/locales/es.po
--- a/frontend/src/locales/et.po
+++ b/frontend/src/locales/et.po
--- a/frontend/src/locales/eu.po
+++ b/frontend/src/locales/eu.po
--- a/frontend/src/locales/fa.po
+++ b/frontend/src/locales/fa.po
--- a/frontend/src/locales/fi.po
+++ b/frontend/src/locales/fi.po
--- a/frontend/src/locales/fr.po
+++ b/frontend/src/locales/fr.po
--- a/frontend/src/locales/ga.po
+++ b/frontend/src/locales/ga.po
--- a/frontend/src/locales/he.po
+++ b/frontend/src/locales/he.po
--- a/frontend/src/locales/hi.po
+++ b/frontend/src/locales/hi.po
--- a/frontend/src/locales/hr.po
+++ b/frontend/src/locales/hr.po
--- a/frontend/src/locales/hu.po
+++ b/frontend/src/locales/hu.po
--- a/frontend/src/locales/id.po
+++ b/frontend/src/locales/id.po
--- a/frontend/src/locales/it.po
+++ b/frontend/src/locales/it.po
--- a/frontend/src/locales/ja.po
+++ b/frontend/src/locales/ja.po
--- a/frontend/src/locales/json/el.json
+++ b/frontend/src/locales/json/el.json
--- a/frontend/src/locales/ko.po
+++ b/frontend/src/locales/ko.po
--- a/frontend/src/locales/ku.po
+++ b/frontend/src/locales/ku.po
--- a/frontend/src/locales/lt.po
+++ b/frontend/src/locales/lt.po
--- a/frontend/src/locales/ms.po
+++ b/frontend/src/locales/ms.po
--- a/frontend/src/locales/nb.po
+++ b/frontend/src/locales/nb.po
--- a/frontend/src/locales/nl.po
+++ b/frontend/src/locales/nl.po
--- a/frontend/src/locales/pl.po
+++ b/frontend/src/locales/pl.po
--- a/frontend/src/locales/pt.po
+++ b/frontend/src/locales/pt.po
--- a/frontend/src/locales/pt_BR.po
+++ b/frontend/src/locales/pt_BR.po
--- a/frontend/src/locales/ro.po
+++ b/frontend/src/locales/ro.po
--- a/frontend/src/locales/ru.po
+++ b/frontend/src/locales/ru.po
--- a/frontend/src/locales/sk.po
+++ b/frontend/src/locales/sk.po
--- a/frontend/src/locales/sl.po
+++ b/frontend/src/locales/sl.po
--- a/frontend/src/locales/sv.po
+++ b/frontend/src/locales/sv.po
--- a/frontend/src/locales/th.po
+++ b/frontend/src/locales/th.po
--- a/frontend/src/locales/tr.po
+++ b/frontend/src/locales/tr.po
--- a/frontend/src/locales/translations.pot
+++ b/frontend/src/locales/translations.pot
--- a/frontend/src/locales/uk.po
+++ b/frontend/src/locales/uk.po
--- a/frontend/src/locales/vi.po
+++ b/frontend/src/locales/vi.po
--- a/frontend/src/locales/zh.po
+++ b/frontend/src/locales/zh.po
--- a/frontend/src/locales/zh_TW.po
+++ b/frontend/src/locales/zh_TW.po
--- a/frontend/src/page/library/errors.vue
+++ b/frontend/src/page/library/errors.vue
@ -111,7 +111,12 @@
      @close="dialog.delete = false"
      @confirm="onConfirmDelete"
    ></p-confirm-dialog>
-    <v-dialog :model-value="details.visible" max-width="550" class="p-dialog">
+    <v-dialog
+      :model-value="details.visible"
+      max-width="550"
+      class="p-dialog"
+      @keydown.esc.exact="details.visible = false"
+    >
      <v-card>
        <v-card-title class="d-flex justify-start align-center ga-3">
          <v-icon v-if="details.err.Level === 'error'" icon="mdi-alert-circle-outline" color="error"></v-icon>
--- a/frontend/tests/vitest/component/lightbox.basic.test.js
+++ b/frontend/tests/vitest/component/lightbox.basic.test.js
@ -0,0 +1,102 @@
+import { mount, config as VTUConfig } from "@vue/test-utils";
+import { describe, it, expect, beforeEach } from "vitest";
+import { nextTick } from "vue";
+import PLightbox from "component/lightbox.vue";
+
+const mountLightbox = () =>
+  mount(PLightbox, {
+    global: {
+      stubs: {
+        "v-dialog": true,
+        "v-icon": true,
+        "v-slider": true,
+        "p-lightbox-menu": true,
+        "p-sidebar-info": true,
+      },
+    },
+  });
+
+describe("PLightbox (low-mock, jsdom-friendly)", () => {
+  beforeEach(() => {
+    localStorage.removeItem("lightbox.info");
+    sessionStorage.removeItem("lightbox.muted");
+  });
+
+  it("toggleInfo updates info and localStorage when visible", async () => {
+    const wrapper = mountLightbox();
+    await wrapper.setData({ visible: true });
+
+    // Use exposed onShortCut to trigger info toggle (KeyI)
+    await wrapper.vm.onShortCut({ code: "KeyI" });
+    await nextTick();
+    expect(localStorage.getItem("lightbox.info")).toBe("true");
+
+    await wrapper.vm.onShortCut({ code: "KeyI" });
+    await nextTick();
+    expect(localStorage.getItem("lightbox.info")).toBe("false");
+  });
+
+  it("toggleMute writes sessionStorage without requiring video or exposed state", async () => {
+    const wrapper = mountLightbox();
+    expect(sessionStorage.getItem("lightbox.muted")).toBeNull();
+    await wrapper.vm.onShortCut({ code: "KeyM" });
+    expect(sessionStorage.getItem("lightbox.muted")).toBe("true");
+    await wrapper.vm.onShortCut({ code: "KeyM" });
+    expect(sessionStorage.getItem("lightbox.muted")).toBe("false");
+  });
+
+  it("getPadding returns expected structure for large and small screens", async () => {
+    const wrapper = mountLightbox();
+    // Large viewport
+    const large = wrapper.vm.$options.methods.getPadding.call(
+      wrapper.vm,
+      { x: 1200, y: 800 },
+      { width: 4000, height: 3000 }
+    );
+    expect(large).toHaveProperty("top");
+    expect(large).toHaveProperty("bottom");
+    expect(large).toHaveProperty("left");
+    expect(large).toHaveProperty("right");
+
+    // Small viewport (<= mobileBreakpoint) should yield zeros
+    const small = wrapper.vm.$options.methods.getPadding.call(
+      wrapper.vm,
+      { x: 360, y: 640 },
+      { width: 1200, height: 800 }
+    );
+    expect(small).toEqual({ top: 0, bottom: 0, left: 0, right: 0 });
+  });
+
+  it("KeyI is ignored when dialog is not visible", async () => {
+    const wrapper = mountLightbox();
+    expect(localStorage.getItem("lightbox.info")).toBeNull();
+    await wrapper.vm.onShortCut({ code: "KeyI" });
+    expect(localStorage.getItem("lightbox.info")).toBeNull();
+  });
+
+  it("getViewport falls back to window size without content ref", () => {
+    const wrapper = mountLightbox();
+    const vp = wrapper.vm.$options.methods.getViewport.call(wrapper.vm);
+    expect(vp.x).toBeGreaterThan(0);
+    expect(vp.y).toBeGreaterThan(0);
+  });
+
+  it("menuActions marks Download action visible when allowed", () => {
+    const wrapper = mountLightbox();
+    const ctx = {
+      $gettext: VTUConfig.global.mocks.$gettext,
+      $pgettext: VTUConfig.global.mocks.$pgettext,
+      // minimal state needed by menuActions visibility checks
+      canManageAlbums: false,
+      canArchive: false,
+      canDownload: true,
+      collection: null,
+      context: "",
+      model: {},
+    };
+    const actions = wrapper.vm.$options.methods.menuActions.call(ctx);
+    const download = actions.find((a) => a?.name === "download");
+    expect(download).toBeTruthy();
+    expect(download.visible).toBe(true);
+  });
+});
--- a/frontend/tests/vitest/setup.js
+++ b/frontend/tests/vitest/setup.js
@ -26,10 +26,48 @@ const vuetify = createVuetify({
 // Configure Vue Test Utils global configuration
 config.global.mocks = {
  $gettext: (text) => text,
+  $pgettext: (_ctx, text) => text,
  $isRtl: false,
  $config: {
-    feature: (_name) => true,
+    feature: () => true,
+    get: () => false,
+    getSettings: () => ({ features: { edit: true, favorites: true, download: true, archive: true } }),
+    allow: () => true,
+    featExperimental: () => false,
+    featDevelop: () => false,
+    values: {},
+    dir: () => "ltr",
  },
+  $event: {
+    subscribe: () => "sub-id",
+    subscribeOnce: () => "sub-id-once",
+    unsubscribe: () => {},
+    publish: () => {},
+  },
+  $view: {
+    enter: () => {},
+    leave: () => {},
+    isActive: () => true,
+  },
+  $notify: { success: () => {}, error: () => {}, warn: () => {} },
+  $fullscreen: {
+    isSupported: () => true,
+    isEnabled: () => false,
+    request: () => Promise.resolve(),
+    exit: () => Promise.resolve(),
+  },
+  $clipboard: { selection: [], has: () => false, toggle: () => {} },
+  $util: {
+    hasTouch: () => false,
+    encodeHTML: (s) => s,
+    sanitizeHtml: (s) => s,
+    formatSeconds: (n) => String(n),
+    formatRemainingSeconds: () => "0",
+    videoFormat: () => "avc",
+    videoFormatUrl: () => "/v.mp4",
+    thumb: () => ({ src: "/t.jpg", w: 100, h: 100 }),
+  },
+  $api: { post: vi.fn(), delete: vi.fn(), get: vi.fn() },
 };

 config.global.plugins = [vuetify];
--- a/go.mod
+++ b/go.mod
@ -14,7 +14,7 @@ require (
 	github.com/esimov/pigo v1.4.6
 	github.com/gin-contrib/gzip v1.2.3
 	github.com/gin-gonic/gin v1.11.0
-	github.com/golang/geo v0.0.0-20251110120158-2d428c1fd7a2
+	github.com/golang/geo v0.0.0-20251111181513-e7f3a1a58fb3
 	github.com/google/open-location-code/go v0.0.0-20250620134813-83986da0156b
 	github.com/gorilla/websocket v1.5.3
 	github.com/gosimple/slug v1.15.0
@ -38,15 +38,15 @@ require (
 	github.com/tidwall/gjson v1.18.0
 	github.com/ulule/deepcopier v0.0.0-20200430083143-45decc6639b6
 	go4.org v0.0.0-20230225012048-214862532bf5 // indirect
-	golang.org/x/crypto v0.43.0
-	golang.org/x/net v0.46.0
+	golang.org/x/crypto v0.44.0
+	golang.org/x/net v0.47.0
 	gonum.org/v1/gonum v0.16.0
 	gopkg.in/yaml.v2 v2.4.0
 )

 require (
 	github.com/go-xmlfmt/xmlfmt v1.1.3 // indirect
-	golang.org/x/image v0.32.0
+	golang.org/x/image v0.33.0
 )

 require github.com/olekukonko/tablewriter v1.1.0
@ -71,7 +71,7 @@ require (

 require github.com/dustinkirkland/golang-petname v0.0.0-20240428194347-eebcea082ee0

-require golang.org/x/text v0.30.0
+require golang.org/x/text v0.31.0

 require (
 	github.com/IGLOU-EU/go-wildcard v1.0.3
@ -89,7 +89,7 @@ require (
 	github.com/wamuir/graft v0.10.0
 	github.com/yalue/onnxruntime_go v1.22.0
 	github.com/zitadel/oidc/v3 v3.45.0
-	golang.org/x/mod v0.29.0
+	golang.org/x/mod v0.30.0
 	golang.org/x/sys v0.38.0
 	google.golang.org/protobuf v1.36.10
 	gorm.io/driver/mysql v1.5.7
--- a/go.sum
+++ b/go.sum
@ -198,8 +198,8 @@ github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGw
 github.com/golang/geo v0.0.0-20190916061304-5b978397cfec/go.mod h1:QZ0nwyI2jOfgRAoBvP+ab5aRr7c9x7lhGEJrKvBwjWI=
 github.com/golang/geo v0.0.0-20200319012246-673a6f80352d/go.mod h1:QZ0nwyI2jOfgRAoBvP+ab5aRr7c9x7lhGEJrKvBwjWI=
 github.com/golang/geo v0.0.0-20210211234256-740aa86cb551/go.mod h1:QZ0nwyI2jOfgRAoBvP+ab5aRr7c9x7lhGEJrKvBwjWI=
-github.com/golang/geo v0.0.0-20251110120158-2d428c1fd7a2 h1:y32Bz5DExjF7HJwv9PIr4xM34xYm7Y0FzFtk4iGBOTo=
-github.com/golang/geo v0.0.0-20251110120158-2d428c1fd7a2/go.mod h1:Mymr9kRGDc64JPr03TSZmuIBODZ3KyswLzm1xL0HFA8=
+github.com/golang/geo v0.0.0-20251111181513-e7f3a1a58fb3 h1:PO47XJrekjtVhITkwfywMBMbKW2WNU49y9LmqvDzwIc=
+github.com/golang/geo v0.0.0-20251111181513-e7f3a1a58fb3/go.mod h1:Mymr9kRGDc64JPr03TSZmuIBODZ3KyswLzm1xL0HFA8=
 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
 github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
 github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
@ -462,8 +462,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
 golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
 golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
 golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8=
-golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
-golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
+golang.org/x/crypto v0.44.0 h1:A97SsFvM3AIwEEmTBiaxPPTYpDC47w720rdiiUvgoAU=
+golang.org/x/crypto v0.44.0/go.mod h1:013i+Nw79BMiQiMsOPcVCB5ZIJbYkerPrGnOa00tvmc=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@ -478,8 +478,8 @@ golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+o
 golang.org/x/image v0.0.0-20200927104501-e162460cd6b5/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
 golang.org/x/image v0.0.0-20220902085622-e7cb96979f69/go.mod h1:doUCurBvlfPMKfmIpRIywoHmhN3VyhnoFDbvIEWF4hY=
 golang.org/x/image v0.18.0/go.mod h1:4yyo5vMFQjVjUcVk4jEQcU9MGy/rulF5WvUILseCM2E=
-golang.org/x/image v0.32.0 h1:6lZQWq75h7L5IWNk0r+SCpUJ6tUVd3v4ZHnbRKLkUDQ=
-golang.org/x/image v0.32.0/go.mod h1:/R37rrQmKXtO6tYXAjtDLwQgFLHmhW+V6ayXlxzP2Pc=
+golang.org/x/image v0.33.0 h1:LXRZRnv1+zGd5XBUVRFmYEphyyKJjQjCRiOuAP3sZfQ=
+golang.org/x/image v0.33.0/go.mod h1:DD3OsTYT9chzuzTQt+zMcOlBHgfoKQb1gry8p76Y1sc=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
 golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
@ -500,8 +500,8 @@ golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
 golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
-golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
-golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
+golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk=
+golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@ -530,8 +530,8 @@ golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
 golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
 golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
 golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
-golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
-golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
+golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
+golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@ -608,8 +608,8 @@ golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
 golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
 golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
-golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
-golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
+golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
+golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI=
--- a/internal/ai/vision/api_client.go
+++ b/internal/ai/vision/api_client.go
@ -9,6 +9,9 @@ import (
 	"io"
 	"net/http"

+	"github.com/sirupsen/logrus"
+
+	"github.com/photoprism/photoprism/internal/ai/vision/ollama"
 	"github.com/photoprism/photoprism/pkg/clean"
 	"github.com/photoprism/photoprism/pkg/http/header"
 )
@ -69,6 +72,10 @@ func PerformApiRequest(apiRequest *ApiRequest, uri, method, key string) (apiResp
 			return nil, parseErr
 		}

+		if log.IsLevelEnabled(logrus.TraceLevel) {
+			log.Tracef("vision: response %s", string(body))
+		}
+
 		return parsed, nil
 	}

@ -89,12 +96,12 @@ func PerformApiRequest(apiRequest *ApiRequest, uri, method, key string) (apiResp
 	return apiResponse, nil
 }

-func decodeOllamaResponse(data []byte) (*ApiResponseOllama, error) {
-	resp := &ApiResponseOllama{}
+func decodeOllamaResponse(data []byte) (*ollama.Response, error) {
+	resp := &ollama.Response{}
 	dec := json.NewDecoder(bytes.NewReader(data))

 	for {
-		var chunk ApiResponseOllama
+		var chunk ollama.Response
 		if err := dec.Decode(&chunk); err != nil {
 			if errors.Is(err, io.EOF) {
 				break
--- a/internal/ai/vision/api_client_test.go
+++ b/internal/ai/vision/api_client_test.go
@ -8,6 +8,7 @@ import (

 	"github.com/stretchr/testify/assert"

+	"github.com/photoprism/photoprism/internal/ai/vision/ollama"
 	"github.com/photoprism/photoprism/pkg/http/scheme"
 )

@ -49,7 +50,7 @@ func TestPerformApiRequestOllama(t *testing.T) {
 			var req ApiRequest
 			assert.NoError(t, json.NewDecoder(r.Body).Decode(&req))
 			assert.Equal(t, FormatJSON, req.Format)
-			assert.NoError(t, json.NewEncoder(w).Encode(ApiResponseOllama{
+			assert.NoError(t, json.NewEncoder(w).Encode(ollama.Response{
 				Model:    "qwen2.5vl:latest",
 				Response: `{"labels":[{"name":"test","confidence":0.9,"topicality":0.8}]}`,
 			}))
@ -72,7 +73,7 @@ func TestPerformApiRequestOllama(t *testing.T) {
 	})
 	t.Run("LabelsWithCodeFence", func(t *testing.T) {
 		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-			assert.NoError(t, json.NewEncoder(w).Encode(ApiResponseOllama{
+			assert.NoError(t, json.NewEncoder(w).Encode(ollama.Response{
 				Model:    "gemma3:latest",
 				Response: "```json\n{\"labels\":[{\"name\":\"lingerie\",\"confidence\":0.81,\"topicality\":0.73}]}\n```\nThe model provided additional commentary.",
 			}))
@ -95,7 +96,7 @@ func TestPerformApiRequestOllama(t *testing.T) {
 	})
 	t.Run("CaptionFallback", func(t *testing.T) {
 		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-			assert.NoError(t, json.NewEncoder(w).Encode(ApiResponseOllama{
+			assert.NoError(t, json.NewEncoder(w).Encode(ollama.Response{
 				Model:    "qwen2.5vl:latest",
 				Response: "plain text",
 			}))
--- a/internal/ai/vision/api_ollama.go
+++ b/internal/ai/vision/api_ollama.go
@ -1,10 +1,8 @@
 package vision

 import (
-	"errors"
 	"fmt"
 	"os"
-	"time"

 	"github.com/photoprism/photoprism/pkg/clean"
 	"github.com/photoprism/photoprism/pkg/http/scheme"
@ -12,53 +10,6 @@ import (
 	"github.com/photoprism/photoprism/pkg/rnd"
 )

-// ApiResponseOllama represents a Ollama API service response.
-type ApiResponseOllama struct {
-	Id                 string    `yaml:"Id,omitempty" json:"id,omitempty"`
-	Code               int       `yaml:"Code,omitempty" json:"code,omitempty"`
-	Error              string    `yaml:"Error,omitempty" json:"error,omitempty"`
-	Model              string    `yaml:"Model,omitempty" json:"model,omitempty"`
-	CreatedAt          time.Time `yaml:"CreatedAt,omitempty" json:"created_at,omitempty"`
-	Response           string    `yaml:"Response,omitempty" json:"response,omitempty"`
-	Done               bool      `yaml:"Done,omitempty" json:"done,omitempty"`
-	Context            []int     `yaml:"Context,omitempty" json:"context,omitempty"`
-	TotalDuration      int64     `yaml:"TotalDuration,omitempty" json:"total_duration,omitempty"`
-	LoadDuration       int       `yaml:"LoadDuration,omitempty" json:"load_duration,omitempty"`
-	PromptEvalCount    int       `yaml:"PromptEvalCount,omitempty" json:"prompt_eval_count,omitempty"`
-	PromptEvalDuration int       `yaml:"PromptEvalDuration,omitempty" json:"prompt_eval_duration,omitempty"`
-	EvalCount          int       `yaml:"EvalCount,omitempty" json:"eval_count,omitempty"`
-	EvalDuration       int64     `yaml:"EvalDuration,omitempty" json:"eval_duration,omitempty"`
-	Result             ApiResult `yaml:"Result,omitempty" json:"result,omitempty"`
-}
-
-// Err returns an error if the request has failed.
-func (r *ApiResponseOllama) Err() error {
-	if r == nil {
-		return errors.New("response is nil")
-	}
-
-	if r.Code >= 400 {
-		if r.Error != "" {
-			return errors.New(r.Error)
-		}
-
-		return fmt.Errorf("error %d", r.Code)
-	} else if r.Result.IsEmpty() {
-		return errors.New("no result")
-	}
-
-	return nil
-}
-
-// HasResult checks if there is at least one result in the response data.
-func (r *ApiResponseOllama) HasResult() bool {
-	if r == nil {
-		return false
-	}
-
-	return !r.Result.IsEmpty()
-}
-
 // NewApiRequestOllama returns a new Ollama API request with the specified images as payload.
 func NewApiRequestOllama(images Files, fileScheme scheme.Type) (*ApiRequest, error) {
 	imagesData := make(Files, len(images))
--- a/internal/ai/vision/api_request.go
+++ b/internal/ai/vision/api_request.go
@ -11,6 +11,8 @@ import (

 	"github.com/sirupsen/logrus"

+	"github.com/photoprism/photoprism/internal/ai/vision/openai"
+	"github.com/photoprism/photoprism/internal/ai/vision/schema"
 	"github.com/photoprism/photoprism/internal/api/download"
 	"github.com/photoprism/photoprism/pkg/clean"
 	"github.com/photoprism/photoprism/pkg/fs"
@ -58,6 +60,11 @@ type ApiRequestOptions struct {
 	UseMmap          bool     `yaml:"UseMmap,omitempty" json:"use_mmap,omitempty"`
 	UseMlock         bool     `yaml:"UseMlock,omitempty" json:"use_mlock,omitempty"`
 	NumThread        int      `yaml:"NumThread,omitempty" json:"num_thread,omitempty"`
+	MaxOutputTokens  int      `yaml:"MaxOutputTokens,omitempty" json:"max_output_tokens,omitempty"`
+	Detail           string   `yaml:"Detail,omitempty" json:"detail,omitempty"`
+	ForceJson        bool     `yaml:"ForceJson,omitempty" json:"force_json,omitempty"`
+	SchemaVersion    string   `yaml:"SchemaVersion,omitempty" json:"schema_version,omitempty"`
+	CombineOutputs   string   `yaml:"CombineOutputs,omitempty" json:"combine_outputs,omitempty"`
 }

 // ApiRequestContext represents a context parameter returned from a previous request.
@ -77,6 +84,7 @@ type ApiRequest struct {
 	Context        *ApiRequestContext `form:"context" yaml:"Context,omitempty" json:"context,omitempty"`
 	Stream         bool               `form:"stream" yaml:"Stream,omitempty" json:"stream"`
 	Images         Files              `form:"images" yaml:"Images,omitempty" json:"images,omitempty"`
+	Schema         json.RawMessage    `form:"schema" yaml:"Schema,omitempty" json:"schema,omitempty"`
 	ResponseFormat ApiFormat          `form:"-" yaml:"-" json:"-"`
 }

@ -195,6 +203,14 @@ func (r *ApiRequest) GetResponseFormat() ApiFormat {

 // JSON returns the request data as JSON-encoded bytes.
 func (r *ApiRequest) JSON() ([]byte, error) {
+	if r == nil {
+		return nil, errors.New("api request is nil")
+	}
+
+	if r.ResponseFormat == ApiFormatOpenAI {
+		return r.openAIJSON()
+	}
+
 	return json.Marshal(*r)
 }

@ -229,6 +245,8 @@ func (r *ApiRequest) sanitizedForLog() ApiRequest {

 	sanitized.Url = sanitizeLogPayload(r.Url)

+	sanitized.Schema = r.Schema
+
 	return sanitized
 }

@ -287,3 +305,134 @@ func isLikelyBase64(value string) bool {

 	return true
 }
+
+// openAIJSON converts the request data into an OpenAI Responses API payload.
+func (r *ApiRequest) openAIJSON() ([]byte, error) {
+	detail := openai.DefaultDetail
+
+	if opts := r.Options; opts != nil && strings.TrimSpace(opts.Detail) != "" {
+		detail = strings.TrimSpace(opts.Detail)
+	}
+
+	messages := make([]openai.InputMessage, 0, 2)
+
+	if system := strings.TrimSpace(r.System); system != "" {
+		messages = append(messages, openai.InputMessage{
+			Role: "system",
+			Type: "message",
+			Content: []openai.ContentItem{
+				{
+					Type: openai.ContentTypeText,
+					Text: system,
+				},
+			},
+		})
+	}
+
+	userContent := make([]openai.ContentItem, 0, len(r.Images)+1)
+
+	if prompt := strings.TrimSpace(r.Prompt); prompt != "" {
+		userContent = append(userContent, openai.ContentItem{
+			Type: openai.ContentTypeText,
+			Text: prompt,
+		})
+	}
+
+	for _, img := range r.Images {
+		if img == "" {
+			continue
+		}
+
+		userContent = append(userContent, openai.ContentItem{
+			Type:     openai.ContentTypeImage,
+			ImageURL: img,
+			Detail:   detail,
+		})
+	}
+
+	if len(userContent) > 0 {
+		messages = append(messages, openai.InputMessage{
+			Role:    "user",
+			Type:    "message",
+			Content: userContent,
+		})
+	}
+
+	if len(messages) == 0 {
+		return nil, errors.New("openai request requires at least one message")
+	}
+
+	payload := openai.HTTPRequest{
+		Model: strings.TrimSpace(r.Model),
+		Input: messages,
+	}
+
+	if payload.Model == "" {
+		payload.Model = openai.DefaultModel
+	}
+
+	if strings.HasPrefix(strings.ToLower(payload.Model), "gpt-5") {
+		payload.Reasoning = &openai.Reasoning{Effort: "low"}
+	}
+
+	if opts := r.Options; opts != nil {
+		if opts.MaxOutputTokens > 0 {
+			payload.MaxOutputTokens = opts.MaxOutputTokens
+		}
+
+		if opts.Temperature > 0 {
+			payload.Temperature = opts.Temperature
+		}
+
+		if opts.TopP > 0 {
+			payload.TopP = opts.TopP
+		}
+
+		if opts.PresencePenalty != 0 {
+			payload.PresencePenalty = opts.PresencePenalty
+		}
+
+		if opts.FrequencyPenalty != 0 {
+			payload.FrequencyPenalty = opts.FrequencyPenalty
+		}
+	}
+
+	if format := buildOpenAIResponseFormat(r); format != nil {
+		payload.Text = &openai.TextOptions{
+			Format: format,
+		}
+	}
+
+	return json.Marshal(payload)
+}
+
+// buildOpenAIResponseFormat determines which response_format to send to OpenAI.
+func buildOpenAIResponseFormat(r *ApiRequest) *openai.ResponseFormat {
+	if r == nil {
+		return nil
+	}
+
+	opts := r.Options
+	hasSchema := len(r.Schema) > 0
+
+	if !hasSchema && (opts == nil || !opts.ForceJson) {
+		return nil
+	}
+
+	result := &openai.ResponseFormat{}
+
+	if hasSchema {
+		result.Type = openai.ResponseFormatJSONSchema
+		result.Schema = r.Schema
+
+		if opts != nil && strings.TrimSpace(opts.SchemaVersion) != "" {
+			result.Name = strings.TrimSpace(opts.SchemaVersion)
+		} else {
+			result.Name = schema.JsonSchemaName(r.Schema, openai.DefaultSchemaVersion)
+		}
+	} else {
+		result.Type = openai.ResponseFormatJSONObject
+	}
+
+	return result
+}
--- a/internal/ai/vision/caption.go
+++ b/internal/ai/vision/caption.go
@ -53,7 +53,11 @@ func captionInternal(images Files, mediaSrc media.Src) (result *CaptionResult, m

 			apiRequest.System = model.GetSystemPrompt()
 			apiRequest.Prompt = model.GetPrompt()
-			apiRequest.Options = model.GetOptions()
+
+			if apiRequest.Options == nil {
+				apiRequest.Options = model.GetOptions()
+			}
+
 			apiRequest.WriteLog()

 			if apiResponse, err = PerformApiRequest(apiRequest, uri, method, model.EndpointKey()); err != nil {
--- a/internal/ai/vision/engine.go
+++ b/internal/ai/vision/engine.go
@ -58,14 +58,15 @@ func init() {
 	RegisterEngineAlias(EngineVision, EngineInfo{
 		RequestFormat:     ApiFormatVision,
 		ResponseFormat:    ApiFormatVision,
-		FileScheme:        string(scheme.Data),
+		FileScheme:        scheme.Data,
 		DefaultResolution: DefaultResolution,
 	})

 	RegisterEngineAlias(openai.EngineName, EngineInfo{
+		Uri:               "https://api.openai.com/v1/responses",
 		RequestFormat:     ApiFormatOpenAI,
 		ResponseFormat:    ApiFormatOpenAI,
-		FileScheme:        string(scheme.Data),
+		FileScheme:        scheme.Data,
 		DefaultResolution: openai.DefaultResolution,
 	})
 }
@ -79,6 +80,7 @@ func RegisterEngine(format ApiFormat, engine Engine) {

 // EngineInfo describes metadata that can be associated with an engine alias.
 type EngineInfo struct {
+	Uri               string
 	RequestFormat     ApiFormat
 	ResponseFormat    ApiFormat
 	FileScheme        string
--- a/internal/ai/vision/engine_ollama.go
+++ b/internal/ai/vision/engine_ollama.go
@ -28,7 +28,7 @@ func init() {
 	RegisterEngineAlias(ollama.EngineName, EngineInfo{
 		RequestFormat:     ApiFormatOllama,
 		ResponseFormat:    ApiFormatOllama,
-		FileScheme:        string(scheme.Base64),
+		FileScheme:        scheme.Base64,
 		DefaultResolution: ollama.DefaultResolution,
 	})

@ -72,7 +72,7 @@ func (ollamaDefaults) SchemaTemplate(model *Model) string {

 	switch model.Type {
 	case ModelTypeLabels:
-		return ollama.LabelsSchema(model.PromptContains("nsfw"))
+		return ollama.SchemaLabels(model.PromptContains("nsfw"))
 	}

 	return ""
@ -134,64 +134,99 @@ func (ollamaParser) Parse(ctx context.Context, req *ApiRequest, raw []byte, stat
 		return nil, err
 	}

-	result := &ApiResponse{
+	response := &ApiResponse{
 		Id:    req.GetId(),
 		Code:  status,
 		Model: &Model{Name: ollamaResp.Model},
 		Result: ApiResult{
-			Labels: append([]LabelResult{}, ollamaResp.Result.Labels...),
-			Caption: func() *CaptionResult {
-				if ollamaResp.Result.Caption != nil {
-					copyCaption := *ollamaResp.Result.Caption
-					return &copyCaption
-				}
-				return nil
-			}(),
+			Labels:  convertOllamaLabels(ollamaResp.Result.Labels),
+			Caption: convertOllamaCaption(ollamaResp.Result.Caption),
 		},
 	}

-	parsedLabels := len(result.Result.Labels) > 0
+	parsedLabels := len(response.Result.Labels) > 0

-	if !parsedLabels && strings.TrimSpace(ollamaResp.Response) != "" && req.Format == FormatJSON {
-		if labels, parseErr := parseOllamaLabels(ollamaResp.Response); parseErr != nil {
-			log.Debugf("vision: %s (parse ollama labels)", clean.Error(parseErr))
+	// Qwen3-VL models stream their JSON payload in the "Thinking" field.
+	fallbackJSON := strings.TrimSpace(ollamaResp.Response)
+	if fallbackJSON == "" {
+		fallbackJSON = strings.TrimSpace(ollamaResp.Thinking)
+	}
+
+	if !parsedLabels && fallbackJSON != "" && (req.Format == FormatJSON || strings.HasPrefix(fallbackJSON, "{")) {
+		if labels, parseErr := parseOllamaLabels(fallbackJSON); parseErr != nil {
+			log.Warnf("vision: %s (parse ollama labels)", clean.Error(parseErr))
 		} else if len(labels) > 0 {
-			result.Result.Labels = append(result.Result.Labels, labels...)
+			response.Result.Labels = append(response.Result.Labels, labels...)
 			parsedLabels = true
 		}
 	}

 	if parsedLabels {
-		filtered := result.Result.Labels[:0]
-		for i := range result.Result.Labels {
-			if result.Result.Labels[i].Confidence <= 0 {
-				result.Result.Labels[i].Confidence = ollama.LabelConfidenceDefault
+		filtered := response.Result.Labels[:0]
+		for i := range response.Result.Labels {
+			if response.Result.Labels[i].Confidence <= 0 {
+				response.Result.Labels[i].Confidence = ollama.LabelConfidenceDefault
 			}

-			if result.Result.Labels[i].Topicality <= 0 {
-				result.Result.Labels[i].Topicality = result.Result.Labels[i].Confidence
+			if response.Result.Labels[i].Topicality <= 0 {
+				response.Result.Labels[i].Topicality = response.Result.Labels[i].Confidence
 			}

 			// Apply thresholds and canonicalize the name.
-			normalizeLabelResult(&result.Result.Labels[i])
+			normalizeLabelResult(&response.Result.Labels[i])

-			if result.Result.Labels[i].Name == "" {
+			if response.Result.Labels[i].Name == "" {
 				continue
 			}

-			if result.Result.Labels[i].Source == "" {
-				result.Result.Labels[i].Source = entity.SrcOllama
+			if response.Result.Labels[i].Source == "" {
+				response.Result.Labels[i].Source = entity.SrcOllama
 			}

-			filtered = append(filtered, result.Result.Labels[i])
+			filtered = append(filtered, response.Result.Labels[i])
 		}
-		result.Result.Labels = filtered
+		response.Result.Labels = filtered
 	} else if caption := strings.TrimSpace(ollamaResp.Response); caption != "" {
-		result.Result.Caption = &CaptionResult{
+		response.Result.Caption = &CaptionResult{
 			Text:   caption,
 			Source: entity.SrcOllama,
 		}
 	}

-	return result, nil
+	return response, nil
+}
+
+func convertOllamaLabels(payload []ollama.LabelPayload) []LabelResult {
+	if len(payload) == 0 {
+		return nil
+	}
+
+	labels := make([]LabelResult, len(payload))
+
+	for i := range payload {
+		labels[i] = LabelResult{
+			Name:           payload[i].Name,
+			Source:         payload[i].Source,
+			Priority:       payload[i].Priority,
+			Confidence:     payload[i].Confidence,
+			Topicality:     payload[i].Topicality,
+			Categories:     payload[i].Categories,
+			NSFW:           payload[i].NSFW,
+			NSFWConfidence: payload[i].NSFWConfidence,
+		}
+	}
+
+	return labels
+}
+
+func convertOllamaCaption(payload *ollama.CaptionPayload) *CaptionResult {
+	if payload == nil {
+		return nil
+	}
+
+	return &CaptionResult{
+		Text:       payload.Text,
+		Source:     payload.Source,
+		Confidence: payload.Confidence,
+	}
 }
--- a/internal/ai/vision/engine_ollama_test.go
+++ b/internal/ai/vision/engine_ollama_test.go
@ -10,9 +10,9 @@ import (

 func TestOllamaDefaultConfidenceApplied(t *testing.T) {
 	req := &ApiRequest{Format: FormatJSON}
-	payload := ApiResponseOllama{
-		Result: ApiResult{
-			Labels: []LabelResult{{Name: "forest path", Confidence: 0, Topicality: 0}},
+	payload := ollama.Response{
+		Result: ollama.ResultPayload{
+			Labels: []ollama.LabelPayload{{Name: "forest path", Confidence: 0, Topicality: 0}},
 		},
 	}
 	raw, err := json.Marshal(payload)
@ -37,3 +37,46 @@ func TestOllamaDefaultConfidenceApplied(t *testing.T) {
 		t.Fatalf("expected topicality to default to confidence, got %.2f", resp.Result.Labels[0].Topicality)
 	}
 }
+
+func TestOllamaParserFallbacks(t *testing.T) {
+	t.Run("ThinkingFieldJSON", func(t *testing.T) {
+		req := &ApiRequest{Format: FormatJSON}
+		payload := ollama.Response{
+			Thinking: `{"labels":[{"name":"cat","confidence":0.9,"topicality":0.8}]}`,
+		}
+		raw, err := json.Marshal(payload)
+		if err != nil {
+			t.Fatalf("marshal: %v", err)
+		}
+
+		parser := ollamaParser{}
+		resp, err := parser.Parse(context.Background(), req, raw, 200)
+		if err != nil {
+			t.Fatalf("parse failed: %v", err)
+		}
+
+		if len(resp.Result.Labels) != 1 || resp.Result.Labels[0].Name != "Cat" {
+			t.Fatalf("expected cat label, got %+v", resp.Result.Labels)
+		}
+	})
+	t.Run("JsonPrefixedResponse", func(t *testing.T) {
+		req := &ApiRequest{} // no explicit format
+		payload := ollama.Response{
+			Response: `{"labels":[{"name":"cat","confidence":0.91,"topicality":0.81}]}`,
+		}
+		raw, err := json.Marshal(payload)
+		if err != nil {
+			t.Fatalf("marshal: %v", err)
+		}
+
+		parser := ollamaParser{}
+		resp, err := parser.Parse(context.Background(), req, raw, 200)
+		if err != nil {
+			t.Fatalf("parse failed: %v", err)
+		}
+
+		if len(resp.Result.Labels) != 1 || resp.Result.Labels[0].Name != "Cat" {
+			t.Fatalf("expected cat label, got %+v", resp.Result.Labels)
+		}
+	})
+}
--- a/internal/ai/vision/engine_openai.go
+++ b/internal/ai/vision/engine_openai.go
@ -1,18 +1,342 @@
 package vision

 import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"strings"
+
 	"github.com/photoprism/photoprism/internal/ai/vision/openai"
+	"github.com/photoprism/photoprism/internal/entity"
+	"github.com/photoprism/photoprism/pkg/clean"
 	"github.com/photoprism/photoprism/pkg/http/scheme"
 )

-// init registers the OpenAI engine alias so models can set Engine: "openai"
-// and inherit sensible defaults (request/response formats, file scheme, and
-// preferred thumbnail resolution).
+// openaiDefaults provides canned prompts, schema templates, and options for OpenAI engines.
+type openaiDefaults struct{}
+
+// openaiBuilder prepares ApiRequest objects for OpenAI's Responses API.
+type openaiBuilder struct{}
+
+// openaiParser converts Responses API payloads into ApiResponse instances.
+type openaiParser struct{}
+
 func init() {
-	RegisterEngineAlias(openai.EngineName, EngineInfo{
-		RequestFormat:     ApiFormatOpenAI,
-		ResponseFormat:    ApiFormatOpenAI,
-		FileScheme:        string(scheme.Base64),
-		DefaultResolution: openai.DefaultResolution,
+	RegisterEngine(ApiFormatOpenAI, Engine{
+		Builder:  openaiBuilder{},
+		Parser:   openaiParser{},
+		Defaults: openaiDefaults{},
 	})
 }
+
+// SystemPrompt returns the default OpenAI system prompt for the specified model type.
+func (openaiDefaults) SystemPrompt(model *Model) string {
+	if model == nil {
+		return ""
+	}
+
+	switch model.Type {
+	case ModelTypeCaption:
+		return openai.CaptionSystem
+	case ModelTypeLabels:
+		return openai.LabelSystem
+	default:
+		return ""
+	}
+}
+
+// UserPrompt returns the default OpenAI user prompt for the specified model type.
+func (openaiDefaults) UserPrompt(model *Model) string {
+	if model == nil {
+		return ""
+	}
+
+	switch model.Type {
+	case ModelTypeCaption:
+		return openai.CaptionPrompt
+	case ModelTypeLabels:
+		if DetectNSFWLabels {
+			return openai.LabelPromptNSFW
+		}
+		return openai.LabelPromptDefault
+	default:
+		return ""
+	}
+}
+
+// SchemaTemplate returns the JSON schema template for the model, if applicable.
+func (openaiDefaults) SchemaTemplate(model *Model) string {
+	if model == nil {
+		return ""
+	}
+
+	switch model.Type {
+	case ModelTypeLabels:
+		return string(openai.SchemaLabels(model.PromptContains("nsfw")))
+	default:
+		return ""
+	}
+}
+
+// Options returns default OpenAI request options for the model.
+func (openaiDefaults) Options(model *Model) *ApiRequestOptions {
+	if model == nil {
+		return nil
+	}
+
+	switch model.Type {
+	case ModelTypeCaption:
+		/*
+		  Options:
+		    Detail: low
+		    MaxOutputTokens: 512
+		    Temperature: 0.1
+		    TopP: 0.9
+		  (Sampling values are zeroed for GPT-5 models in openaiBuilder.Build.)
+		*/
+		return &ApiRequestOptions{
+			Detail:          openai.DefaultDetail,
+			MaxOutputTokens: openai.CaptionMaxTokens,
+			Temperature:     openai.DefaultTemperature,
+			TopP:            openai.DefaultTopP,
+		}
+	case ModelTypeLabels:
+		/*
+		  Options:
+		    Detail: low
+		    MaxOutputTokens: 1024
+		    Temperature: 0.1
+		    ForceJson: true
+		    SchemaVersion: "photoprism_vision_labels_v1"
+		  (Sampling values are zeroed for GPT-5 models in openaiBuilder.Build.)
+		*/
+		return &ApiRequestOptions{
+			Detail:          openai.DefaultDetail,
+			MaxOutputTokens: openai.LabelsMaxTokens,
+			Temperature:     openai.DefaultTemperature,
+			TopP:            openai.DefaultTopP,
+			ForceJson:       true,
+		}
+	default:
+		return nil
+	}
+}
+
+// Build constructs an OpenAI request payload using base64-encoded thumbnails.
+func (openaiBuilder) Build(ctx context.Context, model *Model, files Files) (*ApiRequest, error) {
+	if model == nil {
+		return nil, ErrInvalidModel
+	}
+
+	dataReq, err := NewApiRequestImages(files, scheme.Data)
+	if err != nil {
+		return nil, err
+	}
+
+	req := &ApiRequest{
+		Id:             dataReq.Id,
+		Images:         append(Files(nil), dataReq.Images...),
+		ResponseFormat: ApiFormatOpenAI,
+	}
+
+	if opts := model.GetOptions(); opts != nil {
+		req.Options = cloneOptions(opts)
+		if model.Type == ModelTypeCaption {
+			// Captions default to plain text responses; structured JSON is optional.
+			req.Options.ForceJson = false
+			if req.Options.MaxOutputTokens < openai.CaptionMaxTokens {
+				req.Options.MaxOutputTokens = openai.CaptionMaxTokens
+			}
+		} else if model.Type == ModelTypeLabels {
+			if req.Options.MaxOutputTokens < openai.LabelsMaxTokens {
+				req.Options.MaxOutputTokens = openai.LabelsMaxTokens
+			}
+		}
+
+		if strings.HasPrefix(strings.ToLower(strings.TrimSpace(model.Name)), "gpt-5") {
+			req.Options.Temperature = 0
+			req.Options.TopP = 0
+		}
+	}
+
+	if schema := strings.TrimSpace(model.SchemaTemplate()); schema != "" {
+		if raw, parseErr := parseOpenAISchema(schema); parseErr != nil {
+			log.Warnf("vision: failed to parse OpenAI schema template (%s)", clean.Error(parseErr))
+		} else {
+			req.Schema = raw
+		}
+	}
+
+	return req, nil
+}
+
+// Parse converts an OpenAI Responses API payload into the internal ApiResponse representation.
+func (openaiParser) Parse(ctx context.Context, req *ApiRequest, raw []byte, status int) (*ApiResponse, error) {
+	if status >= 300 {
+		if msg := openai.ParseErrorMessage(raw); msg != "" {
+			return nil, fmt.Errorf("openai: %s", msg)
+		}
+		return nil, fmt.Errorf("openai: status %d", status)
+	}
+
+	var resp openai.Response
+	if err := json.Unmarshal(raw, &resp); err != nil {
+		return nil, err
+	}
+
+	if resp.Error != nil && resp.Error.Message != "" {
+		return nil, errors.New(resp.Error.Message)
+	}
+
+	result := ApiResult{}
+	if jsonPayload := resp.FirstJSON(); len(jsonPayload) > 0 {
+		if err := populateOpenAIJSONResult(&result, jsonPayload); err != nil {
+			log.Debugf("vision: %s (parse openai json payload)", clean.Error(err))
+		}
+	}
+
+	if result.Caption == nil {
+		if text := resp.FirstText(); text != "" {
+			trimmed := strings.TrimSpace(text)
+			var parsedJSON bool
+
+			if len(trimmed) > 0 && (trimmed[0] == '{' || trimmed[0] == '[') {
+				if err := populateOpenAIJSONResult(&result, json.RawMessage(trimmed)); err != nil {
+					log.Debugf("vision: %s (parse openai json text payload)", clean.Error(err))
+				} else {
+					parsedJSON = true
+				}
+			}
+
+			if !parsedJSON && trimmed != "" {
+				result.Caption = &CaptionResult{
+					Text:   trimmed,
+					Source: entity.SrcOpenAI,
+				}
+			}
+		}
+	}
+
+	var responseID string
+	if req != nil {
+		responseID = req.GetId()
+	}
+
+	modelName := strings.TrimSpace(resp.Model)
+	if modelName == "" && req != nil {
+		modelName = strings.TrimSpace(req.Model)
+	}
+
+	return &ApiResponse{
+		Id:     responseID,
+		Code:   status,
+		Model:  &Model{Name: modelName},
+		Result: result,
+	}, nil
+}
+
+// parseOpenAISchema validates the provided JSON schema and returns it as a raw message.
+func parseOpenAISchema(schema string) (json.RawMessage, error) {
+	var raw json.RawMessage
+	if err := json.Unmarshal([]byte(schema), &raw); err != nil {
+		return nil, err
+	}
+	return normalizeOpenAISchema(raw)
+}
+
+// normalizeOpenAISchema upgrades legacy label schema definitions so they comply with
+// OpenAI's json_schema format requirements.
+func normalizeOpenAISchema(raw json.RawMessage) (json.RawMessage, error) {
+	if len(raw) == 0 {
+		return raw, nil
+	}
+
+	var doc map[string]any
+	if err := json.Unmarshal(raw, &doc); err != nil {
+		// Fallback to the original payload if it isn't a JSON object.
+		return raw, nil
+	}
+
+	if t, ok := doc["type"]; ok {
+		if typeStr, ok := t.(string); ok && strings.TrimSpace(typeStr) != "" {
+			return raw, nil
+		}
+	}
+
+	if _, ok := doc["properties"]; ok {
+		return raw, nil
+	}
+
+	labels, ok := doc["labels"]
+	if !ok {
+		return raw, nil
+	}
+
+	nsfw := false
+
+	if items, ok := labels.([]any); ok && len(items) > 0 {
+		if first, ok := items[0].(map[string]any); ok {
+			if _, hasNSFW := first["nsfw"]; hasNSFW {
+				nsfw = true
+			}
+			if _, hasNSFWConfidence := first["nsfw_confidence"]; hasNSFWConfidence {
+				nsfw = true
+			}
+		}
+	}
+
+	return openai.SchemaLabels(nsfw), nil
+}
+
+// populateOpenAIJSONResult unmarshals a structured OpenAI response into ApiResult fields.
+func populateOpenAIJSONResult(result *ApiResult, payload json.RawMessage) error {
+	if result == nil || len(payload) == 0 {
+		return nil
+	}
+
+	var envelope struct {
+		Caption *struct {
+			Text       string  `json:"text"`
+			Confidence float32 `json:"confidence"`
+		} `json:"caption"`
+		Labels []LabelResult `json:"labels"`
+	}
+
+	if err := json.Unmarshal(payload, &envelope); err != nil {
+		return err
+	}
+
+	if envelope.Caption != nil {
+		text := strings.TrimSpace(envelope.Caption.Text)
+		if text != "" {
+			result.Caption = &CaptionResult{
+				Text:       text,
+				Confidence: envelope.Caption.Confidence,
+				Source:     entity.SrcOpenAI,
+			}
+		}
+	}
+
+	if len(envelope.Labels) > 0 {
+		filtered := envelope.Labels[:0]
+
+		for i := range envelope.Labels {
+			if envelope.Labels[i].Source == "" {
+				envelope.Labels[i].Source = entity.SrcOpenAI
+			}
+
+			normalizeLabelResult(&envelope.Labels[i])
+
+			if envelope.Labels[i].Name == "" {
+				continue
+			}
+
+			filtered = append(filtered, envelope.Labels[i])
+		}
+
+		result.Labels = append(result.Labels, filtered...)
+	}
+
+	return nil
+}
--- a/internal/ai/vision/engine_openai_test.go
+++ b/internal/ai/vision/engine_openai_test.go
@ -0,0 +1,337 @@
+package vision
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/photoprism/photoprism/internal/ai/vision/openai"
+	"github.com/photoprism/photoprism/internal/ai/vision/schema"
+	"github.com/photoprism/photoprism/internal/entity"
+)
+
+func TestOpenAIBuilderBuild(t *testing.T) {
+	model := &Model{
+		Type:   ModelTypeLabels,
+		Name:   openai.DefaultModel,
+		Engine: openai.EngineName,
+	}
+	model.ApplyEngineDefaults()
+
+	request, err := openaiBuilder{}.Build(context.Background(), model, Files{examplesPath + "/chameleon_lime.jpg"})
+	require.NoError(t, err)
+	require.NotNil(t, request)
+
+	assert.Equal(t, ApiFormatOpenAI, request.ResponseFormat)
+	assert.NotEmpty(t, request.Images)
+	assert.NotNil(t, request.Options)
+	assert.Equal(t, openai.DefaultDetail, request.Options.Detail)
+	assert.True(t, request.Options.ForceJson)
+	assert.GreaterOrEqual(t, request.Options.MaxOutputTokens, openai.LabelsMaxTokens)
+}
+
+func TestOpenAIBuilderBuildCaptionDisablesForceJSON(t *testing.T) {
+	model := &Model{
+		Type:    ModelTypeCaption,
+		Name:    openai.DefaultModel,
+		Engine:  openai.EngineName,
+		Options: &ApiRequestOptions{ForceJson: true},
+	}
+	model.ApplyEngineDefaults()
+
+	request, err := openaiBuilder{}.Build(context.Background(), model, Files{examplesPath + "/chameleon_lime.jpg"})
+	require.NoError(t, err)
+	require.NotNil(t, request)
+	require.NotNil(t, request.Options)
+	assert.False(t, request.Options.ForceJson)
+	assert.GreaterOrEqual(t, request.Options.MaxOutputTokens, openai.CaptionMaxTokens)
+}
+
+func TestApiRequestJSONForOpenAI(t *testing.T) {
+	req := &ApiRequest{
+		Model:          "gpt-5-mini",
+		System:         "system",
+		Prompt:         "describe the scene",
+		Images:         []string{"data:image/jpeg;base64,AA=="},
+		ResponseFormat: ApiFormatOpenAI,
+		Options: &ApiRequestOptions{
+			Detail:          openai.DefaultDetail,
+			MaxOutputTokens: 128,
+			Temperature:     0.2,
+			TopP:            0.8,
+			ForceJson:       true,
+		},
+		Schema: json.RawMessage(`{"type":"object","properties":{"caption":{"type":"object"}}}`),
+	}
+
+	payload, err := req.JSON()
+	require.NoError(t, err)
+
+	var decoded struct {
+		Model string `json:"model"`
+		Input []struct {
+			Role    string `json:"role"`
+			Content []struct {
+				Type string `json:"type"`
+			} `json:"content"`
+		} `json:"input"`
+		Text struct {
+			Format struct {
+				Type   string          `json:"type"`
+				Name   string          `json:"name"`
+				Schema json.RawMessage `json:"schema"`
+				Strict bool            `json:"strict"`
+			} `json:"format"`
+		} `json:"text"`
+		Reasoning struct {
+			Effort string `json:"effort"`
+		} `json:"reasoning"`
+		MaxOutputTokens int `json:"max_output_tokens"`
+	}
+
+	require.NoError(t, json.Unmarshal(payload, &decoded))
+	assert.Equal(t, "gpt-5-mini", decoded.Model)
+	require.Len(t, decoded.Input, 2)
+	assert.Equal(t, "system", decoded.Input[0].Role)
+	assert.Equal(t, openai.ResponseFormatJSONSchema, decoded.Text.Format.Type)
+	assert.Equal(t, schema.JsonSchemaName(decoded.Text.Format.Schema, openai.DefaultSchemaVersion), decoded.Text.Format.Name)
+	assert.False(t, decoded.Text.Format.Strict)
+	assert.NotNil(t, decoded.Text.Format.Schema)
+	assert.Equal(t, "low", decoded.Reasoning.Effort)
+	assert.Equal(t, 128, decoded.MaxOutputTokens)
+}
+
+func TestApiRequestJSONForOpenAIDefaultSchemaName(t *testing.T) {
+	req := &ApiRequest{
+		Model:          "gpt-5-mini",
+		Images:         []string{"data:image/jpeg;base64,AA=="},
+		ResponseFormat: ApiFormatOpenAI,
+		Options: &ApiRequestOptions{
+			Detail:          openai.DefaultDetail,
+			MaxOutputTokens: 64,
+			ForceJson:       true,
+		},
+		Schema: json.RawMessage(`{"type":"object"}`),
+	}
+
+	payload, err := req.JSON()
+	require.NoError(t, err)
+
+	var decoded struct {
+		Text struct {
+			Format struct {
+				Name string `json:"name"`
+			} `json:"format"`
+		} `json:"text"`
+	}
+
+	require.NoError(t, json.Unmarshal(payload, &decoded))
+	assert.Equal(t, schema.JsonSchemaName(req.Schema, openai.DefaultSchemaVersion), decoded.Text.Format.Name)
+}
+
+func TestOpenAIParserParsesJSONFromTextPayload(t *testing.T) {
+	respPayload := `{
+		"id": "resp_123",
+		"model": "gpt-5-mini",
+		"output": [{
+			"role": "assistant",
+			"content": [{
+				"type": "output_text",
+				"text": "{\"labels\":[{\"name\":\"deer\",\"confidence\":0.98,\"topicality\":0.99}]}"
+			}]
+		}]
+	}`
+
+	req := &ApiRequest{
+		Id:             "test",
+		Model:          "gpt-5-mini",
+		ResponseFormat: ApiFormatOpenAI,
+	}
+
+	resp, err := openaiParser{}.Parse(context.Background(), req, []byte(respPayload), http.StatusOK)
+	require.NoError(t, err)
+	require.NotNil(t, resp)
+	require.Len(t, resp.Result.Labels, 1)
+	assert.Equal(t, "Deer", resp.Result.Labels[0].Name)
+	assert.Nil(t, resp.Result.Caption)
+}
+
+func TestParseOpenAISchemaLegacyUpgrade(t *testing.T) {
+	legacy := `{
+		"labels": [{
+			"name": "",
+			"confidence": 0,
+			"topicality": 0
+		}]
+	}`
+
+	raw, err := parseOpenAISchema(legacy)
+	require.NoError(t, err)
+
+	var decoded map[string]any
+	require.NoError(t, json.Unmarshal(raw, &decoded))
+
+	assert.Equal(t, "object", decoded["type"])
+
+	props, ok := decoded["properties"].(map[string]any)
+	require.True(t, ok)
+	labels, ok := props["labels"].(map[string]any)
+	require.True(t, ok)
+	assert.Equal(t, "array", labels["type"])
+}
+
+func TestParseOpenAISchemaLegacyUpgradeNSFW(t *testing.T) {
+	legacy := `{
+		"labels": [{
+			"name": "",
+			"confidence": 0,
+			"topicality": 0,
+			"nsfw": false,
+			"nsfw_confidence": 0
+		}]
+	}`
+
+	raw, err := parseOpenAISchema(legacy)
+	require.NoError(t, err)
+
+	var decoded map[string]any
+	require.NoError(t, json.Unmarshal(raw, &decoded))
+
+	props := decoded["properties"].(map[string]any)
+	labels := props["labels"].(map[string]any)
+	items := labels["items"].(map[string]any)
+	_, hasNSFW := items["properties"].(map[string]any)["nsfw"]
+	assert.True(t, hasNSFW)
+}
+
+func TestPerformApiRequestOpenAISuccess(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var reqPayload struct {
+			Model string `json:"model"`
+		}
+		assert.NoError(t, json.NewDecoder(r.Body).Decode(&reqPayload))
+		assert.Equal(t, "gpt-5-mini", reqPayload.Model)
+
+		response := map[string]any{
+			"id":    "resp_123",
+			"model": "gpt-5-mini",
+			"output": []any{
+				map[string]any{
+					"role": "assistant",
+					"content": []any{
+						map[string]any{
+							"type": "output_json",
+							"json": map[string]any{
+								"caption": map[string]any{
+									"text":       "A cat rests on a windowsill.",
+									"confidence": 0.91,
+								},
+								"labels": []map[string]any{
+									{
+										"name":       "cat",
+										"confidence": 0.92,
+										"topicality": 0.88,
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+		}
+
+		assert.NoError(t, json.NewEncoder(w).Encode(response))
+	}))
+	defer server.Close()
+
+	req := &ApiRequest{
+		Id:             "test",
+		Model:          "gpt-5-mini",
+		Images:         []string{"data:image/jpeg;base64,AA=="},
+		ResponseFormat: ApiFormatOpenAI,
+		Options: &ApiRequestOptions{
+			Detail: openai.DefaultDetail,
+		},
+		Schema: json.RawMessage(`{"type":"object"}`),
+	}
+
+	resp, err := PerformApiRequest(req, server.URL, http.MethodPost, "secret")
+	require.NoError(t, err)
+	require.NotNil(t, resp)
+
+	require.NotNil(t, resp.Result.Caption)
+	assert.Equal(t, entity.SrcOpenAI, resp.Result.Caption.Source)
+	assert.Equal(t, "A cat rests on a windowsill.", resp.Result.Caption.Text)
+
+	require.Len(t, resp.Result.Labels, 1)
+	assert.Equal(t, entity.SrcOpenAI, resp.Result.Labels[0].Source)
+	assert.Equal(t, "Cat", resp.Result.Labels[0].Name)
+}
+
+func TestPerformApiRequestOpenAITextFallback(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		response := map[string]any{
+			"id":    "resp_456",
+			"model": "gpt-5-mini",
+			"output": []any{
+				map[string]any{
+					"role": "assistant",
+					"content": []any{
+						map[string]any{
+							"type": "output_text",
+							"text": "Two hikers reach the summit at sunset.",
+						},
+					},
+				},
+			},
+		}
+		assert.NoError(t, json.NewEncoder(w).Encode(response))
+	}))
+	defer server.Close()
+
+	req := &ApiRequest{
+		Id:             "fallback",
+		Model:          "gpt-5-mini",
+		Images:         []string{"data:image/jpeg;base64,AA=="},
+		ResponseFormat: ApiFormatOpenAI,
+		Options: &ApiRequestOptions{
+			Detail: openai.DefaultDetail,
+		},
+		Schema: nil,
+	}
+
+	resp, err := PerformApiRequest(req, server.URL, http.MethodPost, "")
+	require.NoError(t, err)
+	require.NotNil(t, resp.Result.Caption)
+	assert.Equal(t, "Two hikers reach the summit at sunset.", resp.Result.Caption.Text)
+	assert.Equal(t, entity.SrcOpenAI, resp.Result.Caption.Source)
+}
+
+func TestPerformApiRequestOpenAIError(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusBadRequest)
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"error": map[string]any{
+				"message": "Invalid image payload",
+			},
+		})
+	}))
+	defer server.Close()
+
+	req := &ApiRequest{
+		Id:             "error",
+		Model:          "gpt-5-mini",
+		ResponseFormat: ApiFormatOpenAI,
+		Schema:         nil,
+		Images:         []string{"data:image/jpeg;base64,AA=="},
+	}
+
+	_, err := PerformApiRequest(req, server.URL, http.MethodPost, "")
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "Invalid image payload")
+}
--- a/internal/ai/vision/labels.go
+++ b/internal/ai/vision/labels.go
@ -96,8 +96,10 @@ func labelsInternal(images Files, mediaSrc media.Src, labelSrc entity.Src) (resu
 				apiRequest.Prompt = prompt
 			}

-			if options := model.GetOptions(); options != nil {
-				apiRequest.Options = options
+			if apiRequest.Options == nil {
+				if options := model.GetOptions(); options != nil {
+					apiRequest.Options = options
+				}
 			}

 			apiRequest.WriteLog()
--- a/internal/ai/vision/model.go
+++ b/internal/ai/vision/model.go
@ -154,9 +154,11 @@ func (m *Model) EndpointKey() (key string) {

 	if key = m.Service.EndpointKey(); key != "" {
 		return key
-	} else {
-		return ServiceKey
 	}
+
+	ensureEnv()
+
+	return strings.TrimSpace(os.ExpandEnv(ServiceKey))
 }

 // EndpointFileScheme returns the endpoint API request file scheme type. Nil
@ -348,6 +350,26 @@ func mergeOptionDefaults(target, defaults *ApiRequestOptions) {
 	if len(target.Stop) == 0 && len(defaults.Stop) > 0 {
 		target.Stop = append([]string(nil), defaults.Stop...)
 	}
+
+	if target.MaxOutputTokens <= 0 && defaults.MaxOutputTokens > 0 {
+		target.MaxOutputTokens = defaults.MaxOutputTokens
+	}
+
+	if strings.TrimSpace(target.Detail) == "" && strings.TrimSpace(defaults.Detail) != "" {
+		target.Detail = strings.TrimSpace(defaults.Detail)
+	}
+
+	if !target.ForceJson && defaults.ForceJson {
+		target.ForceJson = true
+	}
+
+	if target.SchemaVersion == "" && defaults.SchemaVersion != "" {
+		target.SchemaVersion = defaults.SchemaVersion
+	}
+
+	if target.CombineOutputs == "" && defaults.CombineOutputs != "" {
+		target.CombineOutputs = defaults.CombineOutputs
+	}
 }

 func normalizeOptions(opts *ApiRequestOptions) {
@ -422,6 +444,10 @@ func (m *Model) ApplyEngineDefaults() {
 	}

 	if info, ok := EngineInfoFor(engine); ok {
+		if m.Service.Uri == "" {
+			m.Service.Uri = info.Uri
+		}
+
 		if m.Service.RequestFormat == "" {
 			m.Service.RequestFormat = info.RequestFormat
 		}
@ -439,6 +465,10 @@ func (m *Model) ApplyEngineDefaults() {
 		}
 	}

+	if engine == openai.EngineName && strings.TrimSpace(m.Service.Key) == "" {
+		m.Service.Key = "${OPENAI_API_KEY}"
+	}
+
 	m.Engine = engine
 }

@ -490,7 +520,7 @@ func (m *Model) SchemaTemplate() string {
 			}

 			if m.schema == "" {
-				m.schema = visionschema.Labels(m.PromptContains("nsfw"))
+				m.schema = visionschema.LabelsJson(m.PromptContains("nsfw"))
 			}
 		}
 	})
--- a/internal/ai/vision/model_test.go
+++ b/internal/ai/vision/model_test.go
@ -1,13 +1,17 @@
 package vision

 import (
+	"os"
+	"path/filepath"
 	"testing"

 	"github.com/stretchr/testify/assert"

 	"github.com/photoprism/photoprism/internal/ai/tensorflow"
 	"github.com/photoprism/photoprism/internal/ai/vision/ollama"
+	"github.com/photoprism/photoprism/internal/ai/vision/openai"
 	"github.com/photoprism/photoprism/internal/entity"
+	"github.com/photoprism/photoprism/pkg/http/scheme"
 )

 func TestModelGetOptionsDefaultsOllamaLabels(t *testing.T) {
@ -108,6 +112,85 @@ func TestModelApplyEngineDefaultsSetsResolution(t *testing.T) {
 	}
 }

+func TestModelApplyEngineDefaultsSetsServiceDefaults(t *testing.T) {
+	t.Run("OpenAIEngine", func(t *testing.T) {
+		model := &Model{
+			Type:   ModelTypeCaption,
+			Engine: openai.EngineName,
+		}
+
+		model.ApplyEngineDefaults()
+
+		assert.Equal(t, "https://api.openai.com/v1/responses", model.Service.Uri)
+		assert.Equal(t, ApiFormatOpenAI, model.Service.RequestFormat)
+		assert.Equal(t, ApiFormatOpenAI, model.Service.ResponseFormat)
+		assert.Equal(t, scheme.Data, model.Service.FileScheme)
+	})
+	t.Run("PreserveExistingService", func(t *testing.T) {
+		model := &Model{
+			Type:   ModelTypeCaption,
+			Engine: openai.EngineName,
+			Service: Service{
+				Uri:           "https://custom.example",
+				FileScheme:    scheme.Base64,
+				RequestFormat: ApiFormatOpenAI,
+			},
+		}
+
+		model.ApplyEngineDefaults()
+
+		assert.Equal(t, "https://custom.example", model.Service.Uri)
+		assert.Equal(t, scheme.Base64, model.Service.FileScheme)
+	})
+}
+
+func TestModelEndpointKeyOpenAIFallbacks(t *testing.T) {
+	t.Run("EnvFile", func(t *testing.T) {
+		dir := t.TempDir()
+		path := filepath.Join(dir, "openai.key")
+		if err := os.WriteFile(path, []byte("from-file\n"), 0o600); err != nil {
+			t.Fatalf("write key file: %v", err)
+		}
+
+		t.Setenv("OPENAI_API_KEY", "")
+		t.Setenv("OPENAI_API_KEY_FILE", path)
+
+		model := &Model{Type: ModelTypeCaption, Engine: openai.EngineName}
+		model.ApplyEngineDefaults()
+
+		if got := model.EndpointKey(); got != "from-file" {
+			t.Fatalf("expected file key, got %q", got)
+		}
+	})
+	t.Run("CustomPlaceholder", func(t *testing.T) {
+		t.Setenv("OPENAI_API_KEY", "env-secret")
+
+		model := &Model{Type: ModelTypeCaption, Engine: openai.EngineName}
+		model.ApplyEngineDefaults()
+		if got := model.EndpointKey(); got != "env-secret" {
+			t.Fatalf("expected env secret, got %q", got)
+		}
+
+		model.Service.Key = "${CUSTOM_KEY}"
+		t.Setenv("CUSTOM_KEY", "custom-secret")
+		if got := model.EndpointKey(); got != "custom-secret" {
+			t.Fatalf("expected custom secret, got %q", got)
+		}
+	})
+	t.Run("GlobalFallback", func(t *testing.T) {
+		prev := ServiceKey
+		ServiceKey = "${GLOBAL_KEY}"
+		defer func() { ServiceKey = prev }()
+
+		t.Setenv("GLOBAL_KEY", "global-secret")
+
+		model := &Model{}
+		if got := model.EndpointKey(); got != "global-secret" {
+			t.Fatalf("expected global secret, got %q", got)
+		}
+	})
+}
+
 func TestModelGetSource(t *testing.T) {
 	t.Run("NilModel", func(t *testing.T) {
 		var model *Model
@ -115,21 +198,18 @@ func TestModelGetSource(t *testing.T) {
 			t.Fatalf("expected SrcAuto for nil model, got %s", src)
 		}
 	})
-
 	t.Run("EngineAlias", func(t *testing.T) {
 		model := &Model{Engine: ollama.EngineName}
 		if src := model.GetSource(); src != entity.SrcOllama {
 			t.Fatalf("expected SrcOllama, got %s", src)
 		}
 	})
-
 	t.Run("RequestFormat", func(t *testing.T) {
 		model := &Model{Service: Service{RequestFormat: ApiFormatOpenAI}}
 		if src := model.GetSource(); src != entity.SrcOpenAI {
 			t.Fatalf("expected SrcOpenAI, got %s", src)
 		}
 	})
-
 	t.Run("DefaultImage", func(t *testing.T) {
 		model := &Model{}
 		if src := model.GetSource(); src != entity.SrcImage {
--- a/internal/ai/vision/ollama/README.md
+++ b/internal/ai/vision/ollama/README.md
@ -0,0 +1,152 @@
+## PhotoPrism — Ollama Engine Integration
+
+**Last Updated:** November 14, 2025
+
+### Overview
+
+This package provides PhotoPrism’s native adapter for Ollama-compatible multimodal models. It lets Caption, Labels, and future Generate workflows call locally hosted models without changing worker logic, reusing the shared API client (`internal/ai/vision/api_client.go`) and result types (`LabelResult`, `CaptionResult`). Requests stay inside your infrastructure, rely on base64 thumbnails, and honor the same ACL, timeout, and logging hooks as the default TensorFlow engines.
+
+#### Context & Constraints
+
+- Engine defaults live in `internal/ai/vision/ollama` and are applied whenever a model sets `Engine: ollama`. Aliases map to `ApiFormatOllama`, `scheme.Base64`, and a default 720 px thumbnail.  
+- Responses may arrive as newline-delimited JSON chunks. `decodeOllamaResponse` keeps the most recent chunk, while `parseOllamaLabels` replays plain JSON strings found in `response`.  
+- Structured JSON is optional for captions but enforced for labels when `Format: json` (default for label models targeting the Ollama engine).  
+- The adapter never overwrites TensorFlow defaults. If an Ollama call fails, downstream code still has Nasnet, NSFW, and Face models available.  
+- Workers assume a single-image payload per request. Run `photoprism vision run` to validate multi-image prompts before changing that invariant.
+
+#### Goals
+
+- Let operators opt into local, private LLMs for captions and labels via `vision.yml`.  
+- Provide safe defaults (prompts, schema, sampling) so most deployments only need to specify `Name`, `Engine`, and `Service.Uri`.  
+- Surface reproducible logs, metrics, and CLI commands that make it easy to compare Ollama output against TensorFlow/OpenAI engines.
+
+#### Non-Goals
+
+- Managing Ollama itself (model downloads, GPU scheduling, or authentication). Use the Compose profiles provided in the repository.  
+- Adding new HTTP endpoints or bypassing the existing `photoprism vision` CLI.  
+- Replacing TensorFlow workers—Ollama engines are additive and opt-in.
+
+### Architecture & Request Flow
+
+1. **Model Selection** — `Config.Model(ModelType)` returns the top-most enabled entry. When `Engine: ollama`, `ApplyEngineDefaults()` fills in the request/response format, base64 file scheme, and a 720 px resolution unless overridden.  
+2. **Request Build** — `ollamaBuilder.Build` wraps thumbnails with `NewApiRequestOllama`, which encodes them as base64 strings. `Model.Model()` resolves the exact Ollama tag (`gemma3:4b`, `qwen2.5vl:7b`, etc.).  
+3. **Transport** — `PerformApiRequest` uses a single HTTP POST (default timeout 10 min). Authentication is optional; provide `Service.Key` if you proxy through an API gateway.  
+4. **Parsing** — `ollamaParser.Parse` converts payloads into `ApiResponse`. It normalizes confidences (`LabelConfidenceDefault = 0.5` when missing), copies NSFW scores, and canonicalizes label names via `normalizeLabelResult`.  
+5. **Persistence** — `entity.SrcOllama` is stamped on labels/captions so UI badges and audits reflect the new source.
+
+### Prompt, Schema, & Options Guidance
+
+- **System Prompts**  
+  - Labels: `LabelSystem` enforces single-word nouns. Set `System` to override; assign `LabelSystemSimple` when you need descriptive phrases.  
+  - Captions: no system prompt by default; rely on user prompt or set one explicitly for stylistic needs.
+- **User Prompts**  
+  - Captions use `CaptionPrompt`, which requests one sentence in active voice.  
+  - Labels default to `LabelPromptDefault`; when `DetectNSFWLabels` is true, the adapter swaps in `LabelPromptNSFW`.  
+  - For stricter noun enforcement, set `Prompt` to `LabelPromptStrict`.  
+- **Schemas**  
+  - Labels rely on `schema.LabelsJson(nsfw)` (simple JSON template). Setting `Format: json` auto-attaches a reminder (`model.SchemaInstructions()`).  
+  - Override via `Schema` (inline YAML) or `SchemaFile`. `PHOTOPRISM_VISION_LABEL_SCHEMA_FILE` always wins if present.  
+- **Options**  
+  - Labels: default `Temperature` equals `DefaultTemperature` (0.1 unless configured), `TopP=0.9`, `Stop=["\n\n"]`.  
+  - Captions: only `Temperature` is set; other parameters inherit global defaults.  
+  - Custom `Options` merge with engine defaults. Leave `ForceJson=true` for labels so PhotoPrism can reject malformed payloads early.
+
+### Supported Ollama Vision Models
+
+| Model (Ollama Tag)      | Size & Footprint                                                                                                                                    | Strengths                                                                                                                   | JSON & Language Notes                                                                                                        | When To Use                                                                                                                                                                  |
+|-------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `gemma3:4b / 12b / 27b` | 4B/12B/27B parameters, ~3.3 GB → 17 GB downloads, 128 K context                                                                                     | Multimodal text+image reasoning with SigLIP encoder, handles OCR/long documents, supports tool/function calling             | Emits structured JSON reliably; >140 languages with strong default English output                                            | High-quality captions + multilingual labels when you have ≥12 GB VRAM (4B works on 8 GB with Q4_K_M)                                                                         |
+| `qwen2.5vl:7b`          | 8.29 B params (Q4_K_M) ≈6 GB download, 125 K context                                                                                                | Excellent charts, GUI grounding, DocVQA, multi-image reasoning, agentic tool use                                            | JSON mode tuned for schema compliance; supports 20+ languages with strong Chinese/English parity                             | Label extraction for mixed-language archives or UI/diagram analysis                                                                                                          |
+| `qwen3-vl:2b / 4b / 8b` | Dense 2B/4B/8B tiers (~3 GB, ~3.5 GB, ~6 GB downloads) with native 256 K context extendable to 1 M; fits single 12–24 GB GPUs or high-end CPUs (2B) | Spatial + video reasoning upgrades (Interleaved-MRoPE, DeepStack), 32-language OCR, GUI/agent control, long-document ingest | Emits JSON reliably when prompts specify schema; multilingual captions/labels with Thinking variants boosting STEM reasoning | General-purpose captions/labels when you need long-context doc/video support without cloud APIs; 2B for CPU/edge, 4B as balanced default, 8B when accuracy outweighs latency |
+| `llama3.2-vision:11b`   | 11 B params, ~7.8 GB download, requires ≥8 GB VRAM; 90 B variant needs ≥64 GB                                                                       | Strong general reasoning, captioning, OCR, supported by Meta ecosystem tooling                                              | Vision tasks officially supported in English; text-only tasks cover eight major languages                                    | Keep captions consistent with Meta-compatible prompts or when teams already standardize on Llama 3.x                                                                         |
+| `minicpm-v:8b-2.6`      | 8 B params, ~5.5 GB download, 32 K context                                                                                                          | Optimized for edge GPUs, high OCR accuracy, multi-image/video support, low token count (≈640 tokens for 1.8 MP)             | Multilingual (EN/ZH/DE/FR/IT/KR). Emits concise JSON but may need stricter stopping sequences                                | Memory-constrained deployments that still require NSFW/OCR-aware label output                                                                                                |
+
+> Tip: pull models inside the dev container with `docker compose --profile ollama up -d` and then `docker compose exec ollama ollama pull gemma3:4b`. Keep the profile stopped when you do not need extra GPU/CPU load.
+
+> Qwen3-VL models stream their JSON payload via the `thinking` field. PhotoPrism v2025.11+ captures this automatically; if you run older builds, upgrade before enabling these models or responses will appear empty.
+
+### Configuration
+
+#### Environment Variables
+
+- `PHOTOPRISM_VISION_LABEL_SCHEMA_FILE` — Absolute path to a JSON snippet that overrides the default label schema (applies to every Ollama label model).  
+- `PHOTOPRISM_VISION_YAML` — Custom `vision.yml` path. Keep it synced in Git if you automate deployments.  
+- `OLLAMA_HOST`, `OLLAMA_MODELS`, `OLLAMA_MAX_QUEUE`, `OLLAMA_NUM_PARALLEL`, etc. — Provided in `compose*.yaml` to tune the Ollama daemon. Adjust `OLLAMA_KEEP_ALIVE` if you want models to stay loaded between worker batches.  
+- `PHOTOPRISM_LOG_LEVEL=trace` — Enables verbose request/response previews (truncated to avoid leaking images). Use temporarily when debugging parsing issues.
+
+#### `vision.yml` Example
+
+```yaml
+Models:
+  - Type: labels
+    Name: qwen2.5vl:7b
+    Engine: ollama
+    Run: newly-indexed
+    Resolution: 720
+    Format: json
+    Options:
+      Temperature: 0.05
+      Stop: ["\n\n"]
+      ForceJson: true
+    Service:
+      Uri: http://ollama:11434/api/generate
+      RequestFormat: ollama
+      ResponseFormat: ollama
+      FileScheme: base64
+
+  - Type: caption
+    Name: gemma3:4b
+    Engine: ollama
+    Disabled: false
+    Options:
+      Temperature: 0.2
+    Service:
+      Uri: http://ollama:11434/api/generate
+```
+
+Guidelines:
+
+- Place new entries after the default TensorFlow models so they take precedence while Nasnet/NSFW remain as fallbacks.  
+- Always specify the exact Ollama tag (`model:version`) so upgrades are deliberate.  
+- Keep option flags before positional arguments in CLI snippets (`photoprism vision run -m labels --count 1`).  
+- If you proxy requests (e.g., through Traefik), set `Service.Key` to `Bearer <token>` and configure the proxy to inject/validate it.
+
+### Operational Checklist
+
+- **Scheduling** — Use `Run: newly-indexed` for incremental runs, `Run: manual` for ad-hoc CLI calls, or `Run: on-schedule` when paired with the scheduler. Leave `Run: auto` if you want the worker to decide based on other model states.  
+- **Timeouts & Retries** — Default timeout is 10 minutes (`ServiceTimeout`). Ollama streaming responses complete faster in practice; if you need stricter SLAs, wrap `photoprism vision run` in a job runner and retry failed batches manually.  
+- **Fallbacks** — Keep Nasnet configured even when Ollama labels are primary. `labels.go` stops at the first successful engine, so duplicates are avoided.  
+- **Security** — When exposing Ollama beyond localhost, terminate TLS at Traefik and enable API keys. Never return full JSON payloads in logs; rely on trace mode only for debugging and sanitize before sharing.  
+- **Model Storage** — Bind-mount `./storage/services/ollama:/root/.ollama` (see Compose) so pulled models survive container restarts. Run `docker compose exec ollama ollama list` during deployments to verify availability.
+
+### Observability & Testing
+
+- **CLI Smoke Tests**  
+  - Captions: `photoprism vision run -m caption --count 5 --force`.  
+  - Labels: `photoprism vision run -m labels --count 5 --force`.  
+  - After each run, check `photoprism vision ls` for `source=ollama`.  
+- **Unit Tests**  
+  - `go test ./internal/ai/vision/ollama ./internal/ai/vision -run Ollama -count=1` covers transport parsing and model defaults.  
+  - Add fixtures under `internal/ai/vision/testdata` when capturing new response shapes; keep files small and anonymized.  
+- **Logging**  
+  - Set `PHOTOPRISM_LOG_LEVEL=debug` to watch summary lines (“processed labels/caption via ollama”).  
+  - Use `log.Trace` sparingly; it prints truncated JSON blobs for troubleshooting.  
+- **Metrics**  
+  - `/api/v1/metrics` exposes counts per label source; scrape after a batch to compare throughput with TensorFlow/OpenAI runs.
+
+### Code Map
+
+- `internal/ai/vision/ollama/*.go` — Engine defaults, schema helpers, transport structs.  
+- `internal/ai/vision/engine_ollama.go` — Builder/parser glue plus label/caption normalization.  
+- `internal/ai/vision/api_ollama.go` — Base64 payload builder.  
+- `internal/ai/vision/api_client.go` — Streaming decoder shared among engines.  
+- `internal/ai/vision/models.go` — Default caption model definition (`gemma3`).  
+- `compose*.yaml` — Ollama service profile, Traefik labels, and persistent volume wiring.  
+- `frontend/src/common/util.js` — Maps `src="ollama"` to the correct badge; keep it updated when adding new source strings.
+
+### Next Steps
+
+- [ ] Add formal schema validation (JSON Schema or JTD) so malformed label responses fail fast before normalization.  
+- [ ] Support multiple thumbnails per request once core workflows confirm the API contract (requires worker + UI changes).  
+- [ ] Emit per-model latency and success metrics from the vision worker to simplify tuning when several Ollama engines run side-by-side.  
+- [ ] Mirror any loader changes into PhotoPrism Plus/Pro templates to keep splash + browser checks consistent after enabling external engines.
--- a/internal/ai/vision/ollama/defaults.go
+++ b/internal/ai/vision/ollama/defaults.go
@ -1,7 +1,5 @@
 package ollama

-import "github.com/photoprism/photoprism/internal/ai/vision/schema"
-
 const (
 	// CaptionPrompt instructs Ollama caption models to emit a single, active-voice sentence.
 	CaptionPrompt = "Create a caption with exactly one sentence in the active voice that describes the main visual content. Begin with the main subject and clear action. Avoid text formatting, meta-language, and filler words."
@ -22,12 +20,3 @@ const (
 	// DefaultResolution is the default thumbnail size submitted to Ollama models.
 	DefaultResolution = 720
 )
-
-// LabelsSchema returns the canonical label schema string consumed by Ollama models.
-func LabelsSchema(nsfw bool) string {
-	if nsfw {
-		return schema.LabelsNSFW
-	} else {
-		return schema.LabelsDefault
-	}
-}
--- a/internal/ai/vision/ollama/schema.go
+++ b/internal/ai/vision/ollama/schema.go
@ -0,0 +1,14 @@
+package ollama
+
+import (
+	"github.com/photoprism/photoprism/internal/ai/vision/schema"
+)
+
+// SchemaLabels returns the canonical label schema string consumed by Ollama models.
+//
+// Related documentation and references:
+// - https://www.alibabacloud.com/help/en/model-studio/json-mode
+// - https://www.json.org/json-en.html
+func SchemaLabels(nsfw bool) string {
+	return schema.LabelsJson(nsfw)
+}
--- a/internal/ai/vision/ollama/transport.go
+++ b/internal/ai/vision/ollama/transport.go
@ -0,0 +1,80 @@
+package ollama
+
+import (
+	"errors"
+	"fmt"
+	"time"
+)
+
+// Response encapsulates the subset of the Ollama generate API response we care about.
+type Response struct {
+	ID                 string        `yaml:"Id,omitempty" json:"id,omitempty"`
+	Code               int           `yaml:"Code,omitempty" json:"code,omitempty"`
+	Error              string        `yaml:"Error,omitempty" json:"error,omitempty"`
+	Model              string        `yaml:"Model,omitempty" json:"model,omitempty"`
+	CreatedAt          time.Time     `yaml:"CreatedAt,omitempty" json:"created_at,omitempty"`
+	Response           string        `yaml:"Response,omitempty" json:"response,omitempty"`
+	Thinking           string        `yaml:"Thinking,omitempty" json:"thinking,omitempty"`
+	Done               bool          `yaml:"Done,omitempty" json:"done,omitempty"`
+	Context            []int         `yaml:"Context,omitempty" json:"context,omitempty"`
+	TotalDuration      int64         `yaml:"TotalDuration,omitempty" json:"total_duration,omitempty"`
+	LoadDuration       int           `yaml:"LoadDuration,omitempty" json:"load_duration,omitempty"`
+	PromptEvalCount    int           `yaml:"PromptEvalCount,omitempty" json:"prompt_eval_count,omitempty"`
+	PromptEvalDuration int           `yaml:"PromptEvalDuration,omitempty" json:"prompt_eval_duration,omitempty"`
+	EvalCount          int           `yaml:"EvalCount,omitempty" json:"eval_count,omitempty"`
+	EvalDuration       int64         `yaml:"EvalDuration,omitempty" json:"eval_duration,omitempty"`
+	Result             ResultPayload `yaml:"Result,omitempty" json:"result,omitempty"`
+}
+
+// Err returns an error if the request has failed.
+func (r *Response) Err() error {
+	if r == nil {
+		return errors.New("response is nil")
+	}
+
+	if r.Code >= 400 {
+		if r.Error != "" {
+			return errors.New(r.Error)
+		}
+
+		return fmt.Errorf("error %d", r.Code)
+	} else if len(r.Result.Labels) == 0 && r.Result.Caption == nil {
+		return errors.New("no result")
+	}
+
+	return nil
+}
+
+// HasResult checks if there is at least one result in the response data.
+func (r *Response) HasResult() bool {
+	if r == nil {
+		return false
+	}
+
+	return len(r.Result.Labels) > 0 || r.Result.Caption != nil
+}
+
+// ResultPayload mirrors the structure returned by Ollama for result data.
+type ResultPayload struct {
+	Labels  []LabelPayload  `json:"labels"`
+	Caption *CaptionPayload `json:"caption,omitempty"`
+}
+
+// LabelPayload represents a single label object emitted by the Ollama adapter.
+type LabelPayload struct {
+	Name           string   `json:"name"`
+	Source         string   `json:"source,omitempty"`
+	Priority       int      `json:"priority,omitempty"`
+	Confidence     float32  `json:"confidence,omitempty"`
+	Topicality     float32  `json:"topicality,omitempty"`
+	Categories     []string `json:"categories,omitempty"`
+	NSFW           bool     `json:"nsfw,omitempty"`
+	NSFWConfidence float32  `json:"nsfw_confidence,omitempty"`
+}
+
+// CaptionPayload represents the caption object emitted by the Ollama adapter.
+type CaptionPayload struct {
+	Text       string  `json:"text"`
+	Source     string  `json:"source,omitempty"`
+	Confidence float32 `json:"confidence,omitempty"`
+}
--- a/internal/ai/vision/ollama/transport_test.go
+++ b/internal/ai/vision/ollama/transport_test.go
@ -0,0 +1,90 @@
+package ollama
+
+import (
+	"testing"
+	"time"
+)
+
+func TestResponseErr(t *testing.T) {
+	t.Run("NilResponse", func(t *testing.T) {
+		if err := (*Response)(nil).Err(); err == nil || err.Error() != "response is nil" {
+			t.Fatalf("expected nil-response error, got %v", err)
+		}
+	})
+
+	t.Run("HTTPErrorWithMessage", func(t *testing.T) {
+		resp := &Response{Code: 429, Error: "too many requests"}
+		if err := resp.Err(); err == nil || err.Error() != "too many requests" {
+			t.Fatalf("expected message error, got %v", err)
+		}
+	})
+
+	t.Run("HTTPErrorWithoutMessage", func(t *testing.T) {
+		resp := &Response{Code: 500}
+		if err := resp.Err(); err == nil || err.Error() != "error 500" {
+			t.Fatalf("expected formatted error, got %v", err)
+		}
+	})
+
+	t.Run("NoResult", func(t *testing.T) {
+		resp := &Response{Code: 200}
+		if err := resp.Err(); err == nil || err.Error() != "no result" {
+			t.Fatalf("expected no-result error, got %v", err)
+		}
+	})
+
+	t.Run("HasLabels", func(t *testing.T) {
+		resp := &Response{
+			Code:   200,
+			Result: ResultPayload{Labels: []LabelPayload{{Name: "sky"}}},
+			Model:  "qwen",
+		}
+		if err := resp.Err(); err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+	})
+
+	t.Run("HasCaption", func(t *testing.T) {
+		resp := &Response{
+			Code:   200,
+			Result: ResultPayload{Caption: &CaptionPayload{Text: "Caption"}},
+		}
+		if err := resp.Err(); err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+	})
+}
+
+func TestResponseHasResult(t *testing.T) {
+	if (*Response)(nil).HasResult() {
+		t.Fatal("nil response should not have result")
+	}
+
+	resp := &Response{}
+	if resp.HasResult() {
+		t.Fatal("expected false when result payload is empty")
+	}
+
+	resp.Result.Labels = []LabelPayload{{Name: "sun"}}
+	if !resp.HasResult() {
+		t.Fatal("expected true when labels present")
+	}
+
+	resp.Result.Labels = nil
+	resp.Result.Caption = &CaptionPayload{Text: "Sky", Confidence: 0.9}
+	if !resp.HasResult() {
+		t.Fatal("expected true when caption present")
+	}
+}
+
+func TestResponseJSONTagsAreOptional(t *testing.T) {
+	// Guard against accidental breaking changes to essential fields
+	resp := Response{
+		ID:        "test",
+		Model:     "ollama",
+		CreatedAt: time.Now(),
+	}
+	if resp.ID == "" || resp.Model == "" {
+		t.Fatalf("response fields should persist, got %+v", resp)
+	}
+}
--- a/internal/ai/vision/openai/README.md
+++ b/internal/ai/vision/openai/README.md
@ -0,0 +1,128 @@
+## PhotoPrism — OpenAI API Integration
+
+**Last Updated:** November 14, 2025
+
+### Overview
+
+This package contains PhotoPrism’s adapter for the OpenAI Responses API. It enables existing caption and label workflows (`GenerateCaption`, `GenerateLabels`, and the `photoprism vision run` CLI) to call OpenAI models alongside TensorFlow and Ollama without changing worker or API code. The implementation focuses on predictable results, structured outputs, and clear observability so operators can opt in gradually.
+
+#### Context & Constraints
+
+- OpenAI requests flow through the existing vision client (`internal/ai/vision/api_client.go`) and must honour PhotoPrism’s timeout, logging, and ACL rules.
+- Structured outputs are preferred but the adapter must gracefully handle free-form text; `output_text` responses are parsed both as JSON and as plain captions.
+- Costs should remain predictable: requests are limited to a single 720 px thumbnail (`detail=low`) and capped token budgets (512 caption, 1024 labels).
+- Secrets are supplied per model (`Service.Key`) with fallbacks to `OPENAI_API_KEY` / `_FILE`. Logs must redact sensitive data.
+
+#### Goals
+
+- Provide drop-in OpenAI support for captions and labels using `vision.yml`.
+- Keep configuration ergonomic by auto-populating prompts, schema names, token limits, and sampling defaults.
+- Expose enough logging and tests so operators can compare OpenAI output with existing engines before enabling it broadly.
+
+#### Non-Goals
+
+- Introducing a new `generate` model type or combined caption/label endpoint (reserved for a later phase).
+- Replacing the default TensorFlow models; they remain active as fallbacks.
+- Managing OpenAI billing or quota dashboards beyond surfacing token counts in logs and metrics.
+
+### Prompt, Model, & Schema Guidance
+
+- **Models:** The adapter targets GPT‑5 vision tiers (e.g. `gpt-5-nano`, `gpt-5-mini`). These models support image inputs, structured outputs, and deterministic settings. Set `Name` to the exact provider identifier so defaults are applied correctly. Caption models share the same configuration surface and run through the same adapter.
+- **Prompts:** Defaults live in `defaults.go`. Captions use a single-sentence instruction; labels use `LabelPromptDefault` (or `LabelPromptNSFW` when PhotoPrism requests NSFW metadata). Custom prompts should retain schema reminders so structured outputs stay valid.
+- **Schemas:** Labels use the JSON schema returned by `schema.LabelsJsonSchema(nsfw)`; the response format name is derived via `schema.JsonSchemaName` (e.g. `photoprism_vision_labels_v1`). Captions omit schemas unless operators explicitly request a structured format.
+- **When to keep defaults:** For most deployments, leaving `System`, `Prompt`, `Schema`, and `Options` unset yields stable output with minimal configuration. Override them only when domain-specific language or custom scoring is necessary, and add regression tests alongside.
+
+Budget-conscious operators can experiment with lighter prompts or lower-resolution thumbnails, but should keep token limits and determinism settings intact to avoid unexpected bills and UI churn.
+
+#### Performance & Cost Estimates
+
+- **Token budgets:** Captions request up to 512 output tokens; labels request up to 1024. Input tokens are typically ≤700 for a single 720 px thumbnail plus prompts.
+- **Latency:** GPT‑5 nano/mini vision calls typically complete in 3–8 s, depending on OpenAI region. Including reasoning metadata (`reasoning.effort=low`) has negligible impact but improves traceability.
+- **Costs:** Consult OpenAI’s pricing for the selected model. Multiply input/output tokens by the published rate. PhotoPrism currently sends one image per request to keep costs linear with photo count.
+
+### Configuration
+
+#### Environment Variables
+
+- `OPENAI_API_KEY` / `OPENAI_API_KEY_FILE` — fallback credentials when a model’s `Service.Key` is unset.
+- Existing `PHOTOPRISM_VISION_*` variables remain authoritative (see the [Getting Started Guide](https://docs.photoprism.app/getting-started/config-options/#computer-vision) for full lists).
+
+#### `vision.yml` Examples
+
+```yaml
+Models:
+  - Type: caption
+    Name: gpt-5-nano
+    Engine: openai
+    Disabled: false    # opt in manually
+    Resolution: 720    # optional; default is 720
+    Options:
+      Detail: low      # optional; defaults to low
+      MaxOutputTokens: 512
+    Service:
+      Uri: https://api.openai.com/v1/responses
+      FileScheme: data
+      Key: ${OPENAI_API_KEY}
+
+  - Type: labels
+    Name: gpt-5-mini
+    Engine: openai
+    Disabled: false
+    Resolution: 720
+    Options:
+      Detail: low
+      MaxOutputTokens: 1024
+      ForceJson: true  # redundant but explicit
+    Service:
+      Uri: https://api.openai.com/v1/responses
+      FileScheme: data
+      Key: ${OPENAI_API_KEY}
+```
+
+Keep TensorFlow entries in place so PhotoPrism falls back when the external service is unavailable.
+
+#### Defaults
+
+- File scheme: `data:` URLs (base64) for all OpenAI models.
+- Resolution: 720 px thumbnails (`vision.Thumb(ModelTypeCaption|Labels)`).
+- Options: `MaxOutputTokens` raised to 512 (caption) / 1024 (labels); `ForceJson=false` for captions, `true` for labels; `reasoning.effort="low"`.
+- Sampling: `Temperature` and `TopP` set to `0` for `gpt-5*` models; inherited values (0.1/0.9) remain for other engines. `openaiBuilder.Build` performs this override while preserving the struct defaults for non-OpenAI adapters.
+- Schema naming: Automatically derived via `schema.JsonSchemaName`, so operators may omit `SchemaVersion`.
+
+### Documentation
+
+- Label Generation: <https://docs.photoprism.app/developer-guide/vision/label-generation/>
+- Caption Generation: <https://docs.photoprism.app/developer-guide/vision/caption-generation/>
+- Vision CLI Commands: <https://docs.photoprism.app/developer-guide/vision/cli/>
+
+### Implementation Details
+
+#### Core Concepts
+
+- **Structured outputs:** PhotoPrism leverages OpenAI’s structured output capability as documented at <https://platform.openai.com/docs/guides/structured-outputs>. When a JSON schema is supplied, the adapter emits `text.format` with `type: "json_schema"` and a schema name derived from the content. The parser then prefers `output_json`, but also attempts to decode `output_text` payloads that contain JSON objects.
+- **Deterministic sampling:** GPT‑5 models are run with `temperature=0` and `top_p=0` to minimise variance, while still allowing developers to override values in `vision.yml` if needed.
+- **Reasoning metadata:** Requests include `reasoning.effort="low"` so OpenAI returns structured reasoning usage counters, helping operators track token consumption.
+- **Worker summaries:** The vision worker now logs either “updated …” or “processed … (no metadata changes detected)”, making reruns easy to audit.
+
+#### Rate Limiting
+
+OpenAI calls respect the existing `limiter.Auth` configuration used by the vision service. Failed requests surface standard HTTP errors and are not automatically retried; operators should ensure they have adequate account limits and consider external rate limiting when sharing credentials.
+
+#### Testing & Validation
+
+1. Unit tests: `go test ./internal/ai/vision/openai ./internal/ai/vision -run OpenAI -count=1`. Fixtures under `internal/ai/vision/openai/testdata/` replay real Responses payloads (captions and labels).
+2. CLI smoke test: `photoprism vision run -m labels --count 1 --force` with trace logging enabled to inspect sanitised Responses.
+3. Compare worker summaries and label sources (`openai`) in the UI or via `photoprism vision ls`.
+
+#### Code Map
+
+- **Adapter & defaults:** `internal/ai/vision/openai` (defaults, schema helpers, transport, tests).
+- **Request/response plumbing:** `internal/ai/vision/api_request.go`, `api_client.go`, `engine_openai.go`, `engine_openai_test.go`.
+- **Workers & CLI:** `internal/workers/vision.go`, `internal/commands/vision_run.go`.
+- **Shared utilities:** `internal/ai/vision/schema`, `pkg/clean`, `pkg/media`.
+
+#### Next Steps
+
+- [ ] Introduce the future `generate` model type that combines captions, labels, and optional markers.
+- [ ] Evaluate additional OpenAI models as pricing and capabilities evolve.
+- [ ] Expose token usage metrics (input/output/reasoning) via Prometheus once the schema stabilises.
--- a/internal/ai/vision/openai/defaults.go
+++ b/internal/ai/vision/openai/defaults.go
@ -1,6 +1,29 @@
 package openai

-import "github.com/photoprism/photoprism/internal/ai/vision/schema"
+const (
+	// CaptionSystem defines the default system prompt for caption models.
+	CaptionSystem = "You are a PhotoPrism vision model. Return concise, user-friendly captions that describe the main subjects accurately."
+	// CaptionPrompt instructs caption models to respond with a single sentence.
+	CaptionPrompt = "Provide exactly one sentence describing the key subject and action in the image. Avoid filler words and technical jargon."
+	// LabelSystem defines the system prompt for label generation.
+	LabelSystem = "You are a PhotoPrism vision model. Emit JSON that matches the provided schema and keep label names short, singular nouns."
+	// LabelPromptDefault requests general-purpose labels.
+	LabelPromptDefault = "Analyze the image and return label objects with name, confidence (0-1), and topicality (0-1)."
+	// LabelPromptNSFW requests labels including NSFW metadata when required.
+	LabelPromptNSFW = "Analyze the image and return label objects with name, confidence (0-1), topicality (0-1), nsfw (true when sensitive), and nsfw_confidence (0-1)."
+	// DefaultDetail specifies the preferred thumbnail detail level for Requests API calls.
+	DefaultDetail = "low"
+	// CaptionMaxTokens suggests the output budget for caption responses.
+	CaptionMaxTokens = 512
+	// LabelsMaxTokens suggests the output budget for label responses.
+	LabelsMaxTokens = 1024
+	// DefaultTemperature configures deterministic replies.
+	DefaultTemperature = 0.1
+	// DefaultTopP limits nucleus sampling.
+	DefaultTopP = 0.9
+	// DefaultSchemaVersion is used when callers do not specify an explicit schema version.
+	DefaultSchemaVersion = "v1"
+)

 var (
 	// DefaultModel is the model used by default when accessing the OpenAI API.
@ -8,8 +31,3 @@ var (
 	// DefaultResolution is the default thumbnail size submitted to the OpenAI.
 	DefaultResolution = 720
 )
-
-// LabelsSchema returns the canonical label schema string consumed by OpenAI models.
-func LabelsSchema() string {
-	return schema.LabelsDefault
-}
--- a/internal/ai/vision/openai/schema.go
+++ b/internal/ai/vision/openai/schema.go
@ -0,0 +1,16 @@
+package openai
+
+import (
+	"encoding/json"
+
+	"github.com/photoprism/photoprism/internal/ai/vision/schema"
+)
+
+// SchemaLabels returns the canonical labels JSON Schema string consumed by Ollama models.
+//
+// Related documentation and references:
+// - https://platform.openai.com/docs/guides/structured-outputs
+// - https://json-schema.org/learn/miscellaneous-examples
+func SchemaLabels(nsfw bool) json.RawMessage {
+	return schema.LabelsJsonSchema(nsfw)
+}
--- a/internal/ai/vision/openai/testdata/caption-response.json
+++ b/internal/ai/vision/openai/testdata/caption-response.json
@ -0,0 +1,73 @@
+{
+  "id": "resp_0d356718505119f3006916e5d8730881a0b91de2aa700f6196",
+  "object": "response",
+  "created_at": 1763108312,
+  "status": "completed",
+  "background": false,
+  "billing": {
+    "payer": "developer"
+  },
+  "error": null,
+  "incomplete_details": null,
+  "instructions": null,
+  "max_output_tokens": 512,
+  "max_tool_calls": null,
+  "model": "gpt-5-nano-2025-08-07",
+  "output": [
+    {
+      "id": "rs_0d356718505119f3006916e5d8efd481a0a4f9cc1823cc6c83",
+      "type": "reasoning",
+      "summary": []
+    },
+    {
+      "id": "msg_0d356718505119f3006916e5d9433881a0bc79197d2cfc2027",
+      "type": "message",
+      "status": "completed",
+      "content": [
+        {
+          "type": "output_text",
+          "annotations": [],
+          "logprobs": [],
+          "text": "A bee gathers nectar from the vibrant red poppy\u2019s center."
+        }
+      ],
+      "role": "assistant"
+    }
+  ],
+  "parallel_tool_calls": true,
+  "previous_response_id": null,
+  "prompt_cache_key": null,
+  "prompt_cache_retention": null,
+  "reasoning": {
+    "effort": "low",
+    "summary": null
+  },
+  "safety_identifier": null,
+  "service_tier": "default",
+  "store": true,
+  "temperature": 1.0,
+  "text": {
+    "format": {
+      "type": "text"
+    },
+    "verbosity": "medium"
+  },
+  "tool_choice": "auto",
+  "tools": [],
+  "top_logprobs": 0,
+  "top_p": 1.0,
+  "truncation": "disabled",
+  "usage": {
+    "input_tokens": 576,
+    "input_tokens_details": {
+      "cached_tokens": 0
+    },
+    "output_tokens": 19,
+    "output_tokens_details": {
+      "reasoning_tokens": 0
+    },
+    "total_tokens": 595
+  },
+  "user": null,
+  "metadata": {}
+}
--- a/internal/ai/vision/openai/testdata/labels-response.json
+++ b/internal/ai/vision/openai/testdata/labels-response.json
@ -0,0 +1,114 @@
+{
+  "id": "resp_0fa91dfb69b7d644006916ea0b72ac819f84ff3152a38dfcdb",
+  "object": "response",
+  "created_at": 1763109387,
+  "status": "completed",
+  "background": false,
+  "billing": {
+    "payer": "developer"
+  },
+  "error": null,
+  "incomplete_details": null,
+  "instructions": null,
+  "max_output_tokens": 1024,
+  "max_tool_calls": null,
+  "model": "gpt-5-mini-2025-08-07",
+  "output": [
+    {
+      "id": "rs_0fa91dfb69b7d644006916ea0c3450819f8a13396bf377f474",
+      "type": "reasoning",
+      "summary": []
+    },
+    {
+      "id": "msg_0fa91dfb69b7d644006916ea0d2dfc819faf52b11334fc10a4",
+      "type": "message",
+      "status": "completed",
+      "content": [
+        {
+          "type": "output_text",
+          "annotations": [],
+          "logprobs": [],
+          "text": "{\"labels\":[{\"name\":\"flower\",\"confidence\":0.99,\"topicality\":0.99},{\"name\":\"bee\",\"confidence\":0.95,\"topicality\":0.95},{\"name\":\"petal\",\"confidence\":0.92,\"topicality\":0.88},{\"name\":\"pollen\",\"confidence\":0.85,\"topicality\":0.8},{\"name\":\"insect\",\"confidence\":0.9,\"topicality\":0.85},{\"name\":\"red\",\"confidence\":0.88,\"topicality\":0.6},{\"name\":\"close-up\",\"confidence\":0.86,\"topicality\":0.7},{\"name\":\"nature\",\"confidence\":0.8,\"topicality\":0.5}]}"
+        }
+      ],
+      "role": "assistant"
+    }
+  ],
+  "parallel_tool_calls": true,
+  "previous_response_id": null,
+  "prompt_cache_key": null,
+  "prompt_cache_retention": null,
+  "reasoning": {
+    "effort": "low",
+    "summary": null
+  },
+  "safety_identifier": null,
+  "service_tier": "default",
+  "store": true,
+  "temperature": 1.0,
+  "text": {
+    "format": {
+      "type": "json_schema",
+      "description": null,
+      "name": "photoprism_vision_labels_v1",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "labels": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "name": {
+                  "type": "string",
+                  "minLength": 1
+                },
+                "confidence": {
+                  "type": "number",
+                  "minimum": 0,
+                  "maximum": 1
+                },
+                "topicality": {
+                  "type": "number",
+                  "minimum": 0,
+                  "maximum": 1
+                }
+              },
+              "required": [
+                "name",
+                "confidence",
+                "topicality"
+              ],
+              "additionalProperties": false
+            },
+            "default": []
+          }
+        },
+        "required": [
+          "labels"
+        ],
+        "additionalProperties": false
+      },
+      "strict": true
+    },
+    "verbosity": "medium"
+  },
+  "tool_choice": "auto",
+  "tools": [],
+  "top_logprobs": 0,
+  "top_p": 1.0,
+  "truncation": "disabled",
+  "usage": {
+    "input_tokens": 724,
+    "input_tokens_details": {
+      "cached_tokens": 0
+    },
+    "output_tokens": 169,
+    "output_tokens_details": {
+      "reasoning_tokens": 0
+    },
+    "total_tokens": 893
+  },
+  "user": null,
+  "metadata": {}
+}
--- a/internal/ai/vision/openai/transport.go
+++ b/internal/ai/vision/openai/transport.go
@ -0,0 +1,142 @@
+package openai
+
+import (
+	"encoding/json"
+	"strings"
+)
+
+const (
+	// ContentTypeText identifies text input segments for the Responses API.
+	ContentTypeText = "input_text"
+	// ContentTypeImage identifies image input segments for the Responses API.
+	ContentTypeImage = "input_image"
+
+	// ResponseFormatJSONSchema requests JSON constrained by a schema.
+	ResponseFormatJSONSchema = "json_schema"
+	// ResponseFormatJSONObject requests a free-form JSON object.
+	ResponseFormatJSONObject = "json_object"
+)
+
+// HTTPRequest represents the payload expected by OpenAI's Responses API.
+type HTTPRequest struct {
+	Model            string         `json:"model"`
+	Input            []InputMessage `json:"input"`
+	Text             *TextOptions   `json:"text,omitempty"`
+	Reasoning        *Reasoning     `json:"reasoning,omitempty"`
+	MaxOutputTokens  int            `json:"max_output_tokens,omitempty"`
+	Temperature      float64        `json:"temperature,omitempty"`
+	TopP             float64        `json:"top_p,omitempty"`
+	PresencePenalty  float64        `json:"presence_penalty,omitempty"`
+	FrequencyPenalty float64        `json:"frequency_penalty,omitempty"`
+}
+
+// TextOptions carries formatting preferences for textual responses.
+type TextOptions struct {
+	Format *ResponseFormat `json:"format,omitempty"`
+}
+
+// Reasoning configures the effort level for reasoning models.
+type Reasoning struct {
+	Effort string `json:"effort,omitempty"`
+}
+
+// InputMessage captures a single system or user message in the request.
+type InputMessage struct {
+	Role    string        `json:"role"`
+	Type    string        `json:"type,omitempty"`
+	Content []ContentItem `json:"content"`
+}
+
+// ContentItem represents a text or image entry within a message.
+type ContentItem struct {
+	Type     string `json:"type"`
+	Text     string `json:"text,omitempty"`
+	ImageURL string `json:"image_url,omitempty"`
+	Detail   string `json:"detail,omitempty"`
+}
+
+// ResponseFormat describes how OpenAI should format its response.
+type ResponseFormat struct {
+	Type        string          `json:"type"`
+	Name        string          `json:"name,omitempty"`
+	Schema      json.RawMessage `json:"schema,omitempty"`
+	Description string          `json:"description,omitempty"`
+	Strict      bool            `json:"strict,omitempty"`
+}
+
+// Response mirrors the subset of the Responses API response we need.
+type Response struct {
+	ID     string           `json:"id"`
+	Model  string           `json:"model"`
+	Output []ResponseOutput `json:"output"`
+	Error  *struct {
+		Message string `json:"message"`
+		Type    string `json:"type"`
+	} `json:"error,omitempty"`
+}
+
+// ResponseOutput captures assistant messages within the response.
+type ResponseOutput struct {
+	Role    string            `json:"role"`
+	Content []ResponseContent `json:"content"`
+}
+
+// ResponseContent contains individual message parts (JSON or text).
+type ResponseContent struct {
+	Type string          `json:"type"`
+	Text string          `json:"text,omitempty"`
+	JSON json.RawMessage `json:"json,omitempty"`
+}
+
+// FirstJSON returns the first JSON payload contained in the response.
+func (r *Response) FirstJSON() json.RawMessage {
+	if r == nil {
+		return nil
+	}
+
+	for i := range r.Output {
+		for j := range r.Output[i].Content {
+			if len(r.Output[i].Content[j].JSON) > 0 {
+				return r.Output[i].Content[j].JSON
+			}
+		}
+	}
+
+	return nil
+}
+
+// FirstText returns the first textual payload contained in the response.
+func (r *Response) FirstText() string {
+	if r == nil {
+		return ""
+	}
+
+	for i := range r.Output {
+		for j := range r.Output[i].Content {
+			if text := strings.TrimSpace(r.Output[i].Content[j].Text); text != "" {
+				return text
+			}
+		}
+	}
+
+	return ""
+}
+
+// ParseErrorMessage extracts a human readable error message from a Responses API payload.
+func ParseErrorMessage(raw []byte) string {
+	var errResp struct {
+		Error *struct {
+			Message string `json:"message"`
+		} `json:"error"`
+	}
+
+	if err := json.Unmarshal(raw, &errResp); err != nil {
+		return ""
+	}
+
+	if errResp.Error != nil {
+		return strings.TrimSpace(errResp.Error.Message)
+	}
+
+	return ""
+}
--- a/internal/ai/vision/openai/transport_test.go
+++ b/internal/ai/vision/openai/transport_test.go
@ -0,0 +1,120 @@
+package openai
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func loadTestResponse(t *testing.T, name string) *Response {
+	t.Helper()
+
+	filePath := filepath.Join("testdata", name)
+
+	data, err := os.ReadFile(filePath)
+	if err != nil {
+		t.Fatalf("failed to read %s: %v", filePath, err)
+	}
+
+	var resp Response
+	if err := json.Unmarshal(data, &resp); err != nil {
+		t.Fatalf("failed to unmarshal %s: %v", filePath, err)
+	}
+
+	return &resp
+}
+
+func TestParseErrorMessage(t *testing.T) {
+	t.Run("returns message when present", func(t *testing.T) {
+		raw := []byte(`{"error":{"message":"Invalid schema"}}`)
+		msg := ParseErrorMessage(raw)
+		if msg != "Invalid schema" {
+			t.Fatalf("expected message, got %q", msg)
+		}
+	})
+
+	t.Run("returns empty string when error is missing", func(t *testing.T) {
+		raw := []byte(`{"output":[]}`)
+		if msg := ParseErrorMessage(raw); msg != "" {
+			t.Fatalf("expected empty message, got %q", msg)
+		}
+	})
+}
+
+func TestResponseFirstTextCaption(t *testing.T) {
+	resp := loadTestResponse(t, "caption-response.json")
+
+	if jsonPayload := resp.FirstJSON(); len(jsonPayload) != 0 {
+		t.Fatalf("expected no JSON payload, got: %s", jsonPayload)
+	}
+
+	text := resp.FirstText()
+	expected := "A bee gathers nectar from the vibrant red poppy’s center."
+	if text != expected {
+		t.Fatalf("unexpected caption text: %q", text)
+	}
+}
+
+func TestResponseFirstTextLabels(t *testing.T) {
+	resp := loadTestResponse(t, "labels-response.json")
+
+	if jsonPayload := resp.FirstJSON(); len(jsonPayload) != 0 {
+		t.Fatalf("expected no JSON payload, got: %s", jsonPayload)
+	}
+
+	text := resp.FirstText()
+	if len(text) == 0 {
+		t.Fatal("expected structured JSON string in text payload")
+	}
+	if text[0] != '{' {
+		t.Fatalf("expected JSON object in text payload, got %q", text)
+	}
+}
+
+func TestResponseFirstJSONFromStructuredPayload(t *testing.T) {
+	resp := &Response{
+		ID:    "resp_structured",
+		Model: "gpt-5-mini",
+		Output: []ResponseOutput{
+			{
+				Role: "assistant",
+				Content: []ResponseContent{
+					{
+						Type: "output_json",
+						JSON: json.RawMessage(`{"labels":[{"name":"sunset"}]}`),
+					},
+				},
+			},
+		},
+	}
+
+	jsonPayload := resp.FirstJSON()
+	if len(jsonPayload) == 0 {
+		t.Fatal("expected JSON payload, got empty result")
+	}
+
+	var decoded struct {
+		Labels []map[string]string `json:"labels"`
+	}
+	if err := json.Unmarshal(jsonPayload, &decoded); err != nil {
+		t.Fatalf("failed to decode JSON payload: %v", err)
+	}
+
+	if len(decoded.Labels) != 1 || decoded.Labels[0]["name"] != "sunset" {
+		t.Fatalf("unexpected JSON payload: %+v", decoded.Labels)
+	}
+}
+
+func TestSchemaLabelsReturnsValidJSON(t *testing.T) {
+	raw := SchemaLabels(false)
+
+	var decoded map[string]any
+	if err := json.Unmarshal(raw, &decoded); err != nil {
+		t.Fatalf("schema should be valid JSON: %v", err)
+	}
+
+	if decoded["type"] != "object" {
+		t.Fatalf("expected type object, got %v", decoded["type"])
+	}
+}
--- a/internal/ai/vision/schema/README.md
+++ b/internal/ai/vision/schema/README.md
@ -0,0 +1,52 @@
+## PhotoPrism — Vision Schema Reference
+
+**Last Updated:** November 14, 2025
+
+### Overview
+
+This package contains the canonical label response specifications used by PhotoPrism’s external vision engines. It exposes two helpers:
+
+- `LabelsJsonSchema(nsfw bool)` — returns a JSON **Schema** document tailored for OpenAI Responses requests, enabling strict validation of structured outputs.
+- `LabelsJson(nsfw bool)` — returns a literal JSON **sample** that Ollama-style models can mirror when they only support prompt-enforced structures.
+
+Both helpers build on the same field set (`name`, `confidence`, `topicality`, and optional NSFW flags) so downstream parsing logic (`LabelResult`) can remain engine-agnostic.
+
+### Schema Types & Differences
+
+| Helper                    | Target Engine            | Format                                                 | Validation Style                                                                    | When To Use                                                                                                     |
+|---------------------------|--------------------------|--------------------------------------------------------|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|
+| `LabelsJsonSchema(false)` | OpenAI (standard labels) | JSON Schema Draft                                      | Strong: OpenAI enforces field types/ranges server-side before returning a response. | When calling GPT‑vision models via `ApiFormatOpenAI` to ensure PhotoPrism receives well-formed label arrays.    |
+| `LabelsJsonSchema(true)`  | OpenAI (labels + NSFW)   | JSON Schema Draft with additional boolean/float fields | Strong: same enforcement plus required NSFW fields.                                 | When `DetectNSFWLabels` or NSFW-specific prompts are active and the model must emit `nsfw` + `nsfw_confidence`. |
+| `LabelsJson(false)`       | Ollama (standard labels) | Plain JSON example                                     | Soft: model is nudged to mimic the structure through prompt instructions.           | When running self-hosted Ollama models that support “JSON mode” but do not consume JSON Schema definitions.     |
+| `LabelsJson(true)`        | Ollama (labels + NSFW)   | Plain JSON example with NSFW keys                      | Soft: prompts describe the required keys; the adapter validates after parsing.      | When Ollama prompts mention NSFW scoring or PhotoPrism sets `DetectNSFWLabels=true`.                            |
+
+**Key technical distinction:** OpenAI’s Responses API accepts a JSON Schema (see `LabelsJsonSchema*`) and guarantees compliance by rejecting invalid responses, while Ollama currently relies on prompt-directed output. For Ollama integrations we provide a representative JSON document (`LabelsJson*`) that models can imitate; PhotoPrism then normalizes and validates the results in Go.
+
+### Field Definitions
+
+- `name` — single-word noun describing the subject (string, required).
+- `confidence` — normalized score between `0` and `1` (float, required).
+- `topicality` — relative relevance score between `0` and `1` (float, required; defaults to `confidence` if omitted after parsing).
+- `nsfw` — boolean flag indicating sensitive content (required only in NSFW variants).
+- `nsfw_confidence` — normalized probability for the NSFW assessment (required only in NSFW variants).
+
+OpenAI schemas enforce these ranges/types, while Ollama prompts remind the model to emit matching keys. After parsing, PhotoPrism applies `LabelConfidenceDefault` and `normalizeLabelResult` to fill gaps and enforce naming rules.
+
+### Usage Guidance
+
+1. **OpenAI models** (`Engine: openai`, `RequestFormat: openai`):
+   - Leave `Schema` unset in `vision.yml`; the engine defaults call `LabelsJsonSchema(model.PromptContains("nsfw"))`.
+   - Optionally override the schema via `Schema`/`SchemaFile` if you extend fields, but keep required keys so `LabelResult` parsing succeeds.
+2. **Ollama models** (`Engine: ollama`, `RequestFormat: ollama`):
+   - Rely on the built-in samples from `LabelsJson` or include them directly in prompts via `model.SchemaInstructions()`.
+   - Because enforcement happens after the response arrives, keep `Format: json` (default) and `Options.ForceJson=true` for label models to make parsing stricter.
+3. **Custom engines**:
+   - Reuse these helpers to stay compatible with PhotoPrism’s label DTOs.
+   - When adding new fields, update both schema/sample versions so OpenAI and Ollama adapters remain aligned.
+
+### References
+
+- JSON Schema primer: https://json-schema.org/learn/miscellaneous-examples  
+- OpenAI structured outputs: https://platform.openai.com/docs/guides/structured-outputs  
+- JSON mode background (Ollama-style prompts): https://www.alibabacloud.com/help/en/model-studio/json-mode  
+- JSON syntax refresher: https://www.json.org/json-en.html
--- a/internal/ai/vision/schema/labels.go
+++ b/internal/ai/vision/schema/labels.go
@ -1,16 +1,115 @@
 package schema

-// LabelsDefault provides the minimal JSON schema for label responses used across engines.
-const (
-	LabelsDefault = "{\n  \"labels\": [{\n    \"name\": \"\",\n    \"confidence\": 0,\n    \"topicality\": 0 }]\n}"
-	LabelsNSFW    = "{\n  \"labels\": [{\n    \"name\": \"\",\n    \"confidence\": 0,\n    \"topicality\": 0,\n    \"nsfw\": false,\n    \"nsfw_confidence\": 0\n  }]\n}"
+import (
+	"encoding/json"
 )

-// Labels returns the canonical label schema string.
-func Labels(nsfw bool) string {
+// LabelsJsonSchemaDefault provides the minimal JSON schema for label responses used across engines.
+const (
+	LabelsJsonSchemaDefault = `{
+  "type": "object",
+  "properties": {
+    "labels": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string",
+            "minLength": 1
+          },
+          "confidence": {
+            "type": "number",
+            "minimum": 0,
+            "maximum": 1
+          },
+          "topicality": {
+            "type": "number",
+            "minimum": 0,
+            "maximum": 1
+          }
+        },
+        "required": ["name", "confidence", "topicality"],
+        "additionalProperties": false
+      },
+      "default": []
+    }
+  },
+  "required": ["labels"],
+  "additionalProperties": false
+}`
+	LabelsJsonDefault    = "{\n  \"labels\": [{\n    \"name\": \"\",\n    \"confidence\": 0,\n    \"topicality\": 0 }]\n}"
+	LabelsJsonSchemaNSFW = `{
+  "type": "object",
+  "properties": {
+    "labels": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string",
+            "minLength": 1
+          },
+          "confidence": {
+            "type": "number",
+            "minimum": 0,
+            "maximum": 1
+          },
+          "topicality": {
+            "type": "number",
+            "minimum": 0,
+            "maximum": 1
+          },
+          "nsfw": {
+            "type": "boolean"
+          },
+          "nsfw_confidence": {
+            "type": "number",
+            "minimum": 0,
+            "maximum": 1
+          }
+        },
+        "required": [
+          "name",
+          "confidence",
+          "topicality",
+          "nsfw",
+          "nsfw_confidence"
+        ],
+        "additionalProperties": false
+      },
+      "default": []
+    }
+  },
+  "required": ["labels"],
+  "additionalProperties": false
+}`
+	LabelsJsonNSFW = "{\n  \"labels\": [{\n    \"name\": \"\",\n    \"confidence\": 0,\n    \"topicality\": 0,\n    \"nsfw\": false,\n    \"nsfw_confidence\": 0\n  }]\n}"
+)
+
+// LabelsJsonSchema returns the canonical label JSON Schema string for OpenAI API endpoints.
+//
+// Related documentation and references:
+// - https://platform.openai.com/docs/guides/structured-outputs
+// - https://json-schema.org/learn/miscellaneous-examples
+func LabelsJsonSchema(nsfw bool) json.RawMessage {
 	if nsfw {
-		return LabelsNSFW
+		return json.RawMessage(LabelsJsonSchemaNSFW)
 	} else {
-		return LabelsDefault
+		return json.RawMessage(LabelsJsonSchemaDefault)
+	}
+}
+
+// LabelsJson returns the canonical label JSON string for Ollama vision models.
+//
+// Related documentation and references:
+// - https://www.alibabacloud.com/help/en/model-studio/json-mode
+// - https://www.json.org/json-en.html
+func LabelsJson(nsfw bool) string {
+	if nsfw {
+		return LabelsJsonNSFW
+	} else {
+		return LabelsJsonDefault
 	}
 }
--- a/internal/ai/vision/schema/name.go
+++ b/internal/ai/vision/schema/name.go
@ -0,0 +1,36 @@
+package schema
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+
+	"github.com/photoprism/photoprism/pkg/clean"
+)
+
+const (
+	NamePrefix = "photoprism_vision"
+)
+
+// JsonSchemaName returns the schema version string to be used for API requests.
+func JsonSchemaName(schema json.RawMessage, version string) string {
+	var schemaName string
+
+	switch {
+	case bytes.Contains(schema, []byte("labels")):
+		schemaName = "labels"
+	case bytes.Contains(schema, []byte("labels")):
+		schemaName = "caption"
+	default:
+		schemaName = "schema"
+	}
+
+	version = clean.TypeLowerUnderscore(version)
+
+	if version == "" {
+		version = "v1"
+	}
+
+	return fmt.Sprintf("%s_%s_%s", NamePrefix, schemaName, version)
+
+}
--- a/internal/ai/vision/schema/name_test.go
+++ b/internal/ai/vision/schema/name_test.go
@ -0,0 +1,23 @@
+package schema
+
+import (
+	"encoding/json"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestJsonSchemaName(t *testing.T) {
+	t.Run("Default", func(t *testing.T) {
+		assert.Equal(t, "photoprism_vision_schema_v1", JsonSchemaName(nil, ""))
+	})
+	t.Run("Labels", func(t *testing.T) {
+		assert.Equal(t, "photoprism_vision_labels_v1", JsonSchemaName(json.RawMessage(LabelsJsonSchemaDefault), ""))
+	})
+	t.Run("LabelsV1", func(t *testing.T) {
+		assert.Equal(t, "photoprism_vision_labels_v2", JsonSchemaName([]byte("labels"), "v2"))
+	})
+	t.Run("LabelsJsonSchema", func(t *testing.T) {
+		assert.Equal(t, "photoprism_vision_labels_v1", JsonSchemaName(LabelsJsonSchema(false), "v1"))
+	})
+}
--- a/internal/ai/vision/schema/schema.go
+++ b/internal/ai/vision/schema/schema.go
@ -1,5 +1,5 @@
 /*
-Package schema defines canonical JSON schema templates shared by PhotoPrism's AI vision engines.
+Package schema defines canonical JSON and JSON Schema templates shared by PhotoPrism's AI vision engines.

 Copyright (c) 2018 - 2025 PhotoPrism UG. All rights reserved.

--- a/Show more
+++ b/Show more