Merge branch 'develop' into feature/batch-edit

2026-01-23 02:24:24 +00:00 · 2025-11-15 15:42:59 +01:00 · 2025-11-15 15:42:59 +01:00 · d2541e674a
commit d2541e674a
parent 9ce195f93c d1991b19c3
59 changed files with 2571 additions and 216 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@ -1,6 +1,6 @@
 # PhotoPrism® Repository Guidelines

-**Last Updated:** November 12, 2025
+**Last Updated:** November 14, 2025

 ## Purpose

@ -17,6 +17,7 @@ Learn more: https://agents.md/
 - REST API: https://docs.photoprism.dev/ (Swagger), https://docs.photoprism.app/developer-guide/api/ (Docs)
 - Code Maps: [`CODEMAP.md`](CODEMAP.md) (Backend/Go), [`frontend/CODEMAP.md`](frontend/CODEMAP.md) (Frontend/JS)
 - Face Detection & Embeddings Notes: [`internal/ai/face/README.md`](internal/ai/face/README.md)
+- Vision Engine Guides: [`internal/ai/vision/openai/README.md`](internal/ai/vision/openai/README.md), [`internal/ai/vision/ollama/README.md`](internal/ai/vision/ollama/README.md)

 > Quick Tip: to inspect GitHub issue details without leaving the terminal, run `curl -s https://api.github.com/repos/photoprism/photoprism/issues/<id>`.

--- a/CODEMAP.md
+++ b/CODEMAP.md
@ -1,6 +1,6 @@
 PhotoPrism — Backend CODEMAP

-**Last Updated:** November 2, 2025
+**Last Updated:** November 14, 2025

 Purpose
 - Give agents and contributors a fast, reliable map of where things live and how they fit together, so you can add features, fix bugs, and write tests without spelunking.
@ -35,6 +35,7 @@ High-Level Package Map (Go)
 - `internal/config` — configuration, flags/env/options, client config, DB init/migrate
 - `internal/entity` — GORM v1 models, queries, search helpers, migrations
 - `internal/photoprism` — core domain logic (indexing, import, faces, thumbnails, cleanup)
+- `internal/ai/vision` — multi-engine computer vision pipeline (models, adapters, schema). Adapter docs: [`internal/ai/vision/openai/README.md`](internal/ai/vision/openai/README.md) and [`internal/ai/vision/ollama/README.md`](internal/ai/vision/ollama/README.md).
 - `internal/workers` — background schedulers (index, vision, sync, meta, backup)
 - `internal/auth` — ACL, sessions, OIDC
 - `internal/service` — cluster/portal, maps, hub, webdav
--- a/2
+++ b/2
@ -1,5 +1,5 @@
 # Ubuntu 25.10 (Questing Quokka)
-FROM photoprism/develop:251018-questing
+FROM photoprism/develop:251113-questing

 # Harden npm usage by default (applies to npm ci / install in dev container)
 ENV NPM_CONFIG_IGNORE_SCRIPTS=true
--- a/compose.yaml
+++ b/compose.yaml
@ -388,7 +388,8 @@ services:
  ## Login with "user / photoprism" and "admin / photoprism".
  keycloak:
    image: quay.io/keycloak/keycloak:25.0
-    stop_grace_period: 30s
+    stop_grace_period: 20s
+    profiles: [ "all", "auth", "keycloak" ]
    command: "start-dev" # development mode, do not use this in production!
    links:
      - "traefik:localssl.dev"
--- a/frontend/src/locales/el.po
+++ b/frontend/src/locales/el.po
@ -2,7 +2,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: \n"
 "Report-Msgid-Bugs-To: ci@photoprism.app\n"
-"PO-Revision-Date: 2025-11-11 22:02+0000\n"
+"PO-Revision-Date: 2025-11-14 22:02+0000\n"
 "Last-Translator: dtsolakis <dtsola@eranet.gr>\n"
 "Language: el\n"
 "Content-Type: text/plain; charset=UTF-8\n"
@ -88,7 +88,7 @@ msgstr "12 ώρες"
 #: src/component/user/edit/dialog.vue:304 src/page/settings/account.vue:168
 #: src/component/settings/passcode.vue:25
 msgid "2-Factor Authentication"
-msgstr "Αυθεντικοποίηση 2 παραγόντων"
+msgstr "Αυθεντικοποίηση 2 Παραγόντων"

 #: src/component/user/edit/dialog.vue:303 src/options/auth.js:47
 msgid "2FA"
@ -246,7 +246,7 @@ msgstr "Προστέθηκε"
 #: src/component/location/dialog.vue:21 src/component/location/dialog.vue:26
 #: src/component/photo/edit/details.vue:158
 msgid "Adjust Location"
-msgstr "Ρύθμιση τοποθεσίας"
+msgstr "Ρύθμιση Τοποθεσίας"

 #: src/options/admin.js:6 src/common/util.js:798 src/options/auth.js:6
 msgid "Admin"
@ -254,7 +254,7 @@ msgstr "Διαχειριστής"

 #: src/page/settings.vue:86
 msgid "Advanced"
-msgstr "Σύνθετο"
+msgstr "Προηγμένες Ρυθμίσεις"

 #: src/options/options.js:378
 msgid "After 1 day"
@ -298,7 +298,7 @@ msgstr "Άλμπουμ"

 #: src/page/settings/advanced.vue:193
 msgid "Album Backups"
-msgstr "Αντίγραφα ασφαλείας άλμπουμ"
+msgstr "Αντίγραφα Ασφαλείας Άλμπουμ"

 #: src/page/albums.vue:1265
 msgid "Album created"
@ -374,7 +374,7 @@ msgstr "Όλα τα πρωτότυπα"

 #: src/component/photo/toolbar.vue:381 src/page/albums.vue:455
 msgid "All Years"
-msgstr "Όλα τα έτη"
+msgstr "Όλα τα Χρόνια"

 #: src/component/share/dialog.vue:134
 msgid "Alternatively, you can upload files directly to WebDAV servers like Nextcloud."
@ -440,7 +440,7 @@ msgstr "Έγκριση και αποθήκευση αλλαγών"

 #: src/page/settings/account.vue:183 src/component/settings/apps.vue:25
 msgid "Apps and Devices"
-msgstr "Εφαρμογές και συσκευές"
+msgstr "Εφαρμογές και Συσκευές"

 #: src/component/lightbox.vue:2315 src/component/photo/edit/info.vue:238
 #: src/component/photo/edit/info.vue:239
@ -558,7 +558,7 @@ msgstr "Βιογραφικό"

 #: src/page/settings/account.vue:212
 msgid "Birth Date"
-msgstr "Ημερομηνία γέννησης"
+msgstr "Ημερομηνία Γέννησης"

 #: src/options/options.js:403
 msgid "Black"
@ -697,7 +697,7 @@ msgstr "Αλλαγή Άβαταρ"

 #: src/page/settings/account.vue:155 src/component/settings/password.vue:16
 msgid "Change Password"
-msgstr "Νέος κωδικός πρόσβασης"
+msgstr "Αλλαγή Κωδικού Πρόσβασης"

 #: src/page/settings/general.vue:328
 msgid "Change personal profile and security settings."
@ -733,7 +733,7 @@ msgstr "Οι αλλαγές αποθηκεύτηκαν επιτυχώς"

 #: src/page/settings/advanced.vue:16
 msgid "Changes to the advanced settings require a restart to take effect."
-msgstr "Οι αλλαγές στις ρυθμίσεις για προχωρημένους απαιτούν επανεκκίνηση για να τεθούν σε ισχύ."
+msgstr "Οι αλλαγές στις προηγμένες ρυθμίσεις απαιτούν επανεκκίνηση για να τεθούν σε ισχύ."

 #: src/component/photo/edit/info.vue:230 src/component/photo/edit/info.vue:231
 msgid "Checked"
@ -988,7 +988,7 @@ msgstr "Βάση δεδομένων"

 #: src/page/settings/advanced.vue:177
 msgid "Database Backups"
-msgstr "Αντίγραφα ασφαλείας βάσης δεδομένων"
+msgstr "Αντίγραφα Ασφαλείας Βάσης Δεδομένων"

 #: src/locales.js:328
 msgid "Databases"
@ -1066,7 +1066,7 @@ msgstr "Διαστάσεις"

 #: src/page/settings/advanced.vue:84
 msgid "Disable Backups"
-msgstr "Απενεργοποίηση αντιγράφων ασφαλείας"
+msgstr "Απενεργοποίηση Αντιγράφων Ασφαλείας"

 #: src/page/settings/advanced.vue:366
 msgid "Disable Darktable"
@ -1094,7 +1094,7 @@ msgstr "Απενεργοποίηση των διαδραστικών παγκό

 #: src/page/settings/advanced.vue:116
 msgid "Disable Places"
-msgstr "Απενεργοποίηση Places"
+msgstr "Απενεργοποίηση Τοποθεσιών"

 #: src/page/settings/advanced.vue:382
 msgid "Disable RawTherapee"
@ -1106,7 +1106,7 @@ msgstr "Απενεργοποίηση TensorFlow"

 #: src/page/settings/advanced.vue:446
 msgid "Disable Vectors"
-msgstr "Απενεργοποίηση διανυσμάτων"
+msgstr "Απενεργοποίηση Διανυσμάτων"

 #: src/page/settings/advanced.vue:100
 msgid "Disable WebDAV"
@ -1142,7 +1142,7 @@ msgstr "Απόρριψη"
 #: src/page/admin/users.vue:267 src/page/settings/account.vue:76
 #: src/page/settings/account.vue:78 src/locales.js:321
 msgid "Display Name"
-msgstr "Εμφανιζόμενο όνομα"
+msgstr "Εμφανιζόμενο Όνομα"

 #: src/page/settings/content.vue:170
 msgid "Display picture captions in search results."
@ -1247,11 +1247,11 @@ msgstr "Διάρκεια"

 #: src/page/settings/advanced.vue:285
 msgid "Dynamic Previews"
-msgstr "Δυναμικές προεπισκοπήσεις"
+msgstr "Δυναμικές Προεπισκοπήσεις"

 #: src/page/settings/advanced.vue:261
 msgid "Dynamic Size Limit: %{n}px"
-msgstr "Όριο δυναμικού μεγέθους: %{n}px"
+msgstr "Όριο Δυναμικού Μεγέθους: %{n}px"

 #: src/page/about/feedback.vue:80 src/page/about/feedback.vue:79
 msgid "E-Mail"
@ -1369,7 +1369,7 @@ msgstr "Η εξαγωγή μεταδεδομένων με το ExifTool απαι

 #: src/page/settings/advanced.vue:52
 msgid "Experimental Features"
-msgstr "Πειραματικά Χαρακτηριστικά"
+msgstr "Πειραματικές Λειτουργίες"

 #: src/page/admin/sessions.vue:203 src/page/admin/sessions.vue:296
 #: src/component/service/edit.vue:69 src/component/settings/apps.vue:160
@ -1416,7 +1416,7 @@ msgstr "Επίθετο"

 #: src/options/options.js:222
 msgid "Fast"
-msgstr "Γρήγορα"
+msgstr "Γρήγορο"

 #: src/component/album/edit/dialog.vue:91
 #: src/component/label/edit/dialog.vue:44
@ -1457,7 +1457,7 @@ msgstr "Πρόγραμμα περιήγησης αρχείων"

 #: src/page/settings/advanced.vue:354
 msgid "File Conversion"
-msgstr "Μετατροπή αρχείου"
+msgstr "Μετατροπή Αρχείων"

 #: src/component/album/edit/dialog.vue:147 src/component/photo/toolbar.vue:424
 #: src/component/photo/toolbar.vue:435 src/component/photo/toolbar.vue:446
@ -1537,7 +1537,7 @@ msgstr "Πλήρης πρόσβαση"

 #: src/component/lightbox.vue:1264 src/component/lightbox.vue:1265
 msgid "Fullscreen"
-msgstr "Πλήρης οθόνη"
+msgstr "Πλήρης Οθόνη"

 #: src/page/settings.vue:60
 msgid "General"
@ -1655,7 +1655,7 @@ msgstr "Εικόνα"

 #: src/page/settings/advanced.vue:301
 msgid "Image Quality"
-msgstr "Ποιότητα εικόνας"
+msgstr "Ποιότητα Εικόνας"

 #: src/page/library.vue:74 src/page/library/import.vue:44
 #: src/page/library/import.vue:45 src/page/library/import.vue:73
@ -1809,7 +1809,7 @@ msgstr "Ποιότητα JPEG: %{n}"

 #: src/page/settings/advanced.vue:323
 msgid "JPEG Size Limit: %{n}px"
-msgstr "Όριο μεγέθους JPEG: %{n}px"
+msgstr "Όριο Μεγέθους JPEG: %{n}px"

 #: src/page/library/import.vue:58
 msgid "JPEGs and thumbnails are automatically rendered as needed."
@ -1856,7 +1856,7 @@ msgstr "Τελευταία φορά ενεργός"

 #: src/page/admin/users.vue:276 src/locales.js:335
 msgid "Last Login"
-msgstr "Τελευταία σύνδεση"
+msgstr "Τελευταία Σύνδεση"

 #: src/locales.js:235 src/locales.js:293
 msgid "Last page"
@ -1938,7 +1938,7 @@ msgstr "Λίστα"

 #: src/page/settings/content.vue:141
 msgid "List View"
-msgstr "Προβολή λίστας"
+msgstr "Προβολή σε Λίστα"

 #: src/component/photo/view/cards.vue:139
 #: src/component/photo/view/cards.vue:280 src/component/photo/view/list.vue:94
@ -1951,7 +1951,7 @@ msgstr "Ζωντανό"
 #: src/component/navigation.vue:222 src/component/navigation.vue:237
 #: src/component/navigation.vue:331
 msgid "Live Photos"
-msgstr "Φωτογραφίες"
+msgstr "Ζωντανές Εικόνες"

 #: src/locales.js:307
 msgid "Load more"
@ -2188,7 +2188,7 @@ msgstr "Νέος κωδικός πρόσβασης"
 #: src/component/photo/toolbar.vue:431 src/component/photo/toolbar.vue:441
 #: src/page/albums.vue:462
 msgid "Newest First"
-msgstr "Το νεότερο πρώτα"
+msgstr "Πρώτα τα πιο Πρόσφατα"

 #: src/component/lightbox.vue:412 src/locales.js:297
 msgid "Next"
@ -2285,7 +2285,7 @@ msgstr "Οι μη φωτογραφικές εικόνες και οι εικόν
 #: src/options/admin.js:51 src/options/auth.js:33 src/options/options.js:218
 #: src/options/options.js:334
 msgid "None"
-msgstr "Κανένα"
+msgstr "Καθόλου"

 #: src/component/lightbox.vue:786 src/component/service/upload.vue:159
 #: src/component/service/upload.vue:171
@ -2345,7 +2345,7 @@ msgstr "OK"
 #: src/component/photo/toolbar.vue:432 src/component/photo/toolbar.vue:442
 #: src/page/albums.vue:463
 msgid "Oldest First"
-msgstr "Ο παλαιότερος πρώτος"
+msgstr "Πρώτα τα πιο Παλιά"

 #: src/component/settings/webdav.vue:17 src/component/settings/webdav.vue:18
 #: src/component/settings/webdav.vue:27 src/component/settings/webdav.vue:38
@ -2608,7 +2608,7 @@ msgstr "ΜΜ"

 #: src/page/settings/advanced.vue:338
 msgid "PNG Size Limit: %{n}px"
-msgstr "Όριο μεγέθους PNG: %{n}px"
+msgstr "Όριο Μεγέθους PNG: %{n}px"

 #: src/locales.js:323
 msgid "Portal"
@ -2642,7 +2642,7 @@ msgstr "Προεπισκόπηση"

 #: src/page/settings/advanced.vue:222
 msgid "Preview Images"
-msgstr "Εικόνες προεπισκόπισης"
+msgstr "Εικόνες Προεπισκόπισης"

 #: src/component/lightbox.vue:411 src/locales.js:298
 msgid "Previous"
@ -2719,13 +2719,13 @@ msgstr "Δημιουργία ευρετηρίου όλων των πρωτοτύ

 #: src/page/settings/advanced.vue:68
 msgid "Read-Only Mode"
-msgstr "Λειτουργία μόνο για ανάγνωση"
+msgstr "Λειτουργία Μόνο Ανάγνωσης"

 #: src/component/album/edit/dialog.vue:145 src/component/photo/toolbar.vue:421
 #: src/component/photo/toolbar.vue:433 src/component/photo/toolbar.vue:443
 #: src/page/albums.vue:464
 msgid "Recently Added"
-msgstr "Πρόσφατα προστέθηκε"
+msgstr "Πρόσφατες Προσθήκες"

 #: src/component/photo/toolbar.vue:422
 msgid "Recently Archived"
@ -2742,7 +2742,7 @@ msgstr "Η αναγνώριση ξεκινά μετά την ολοκλήρωσ

 #: src/page/settings/general.vue:88
 msgid "Recognize faces so people can be assigned and found."
-msgstr "Αναγνωρίζει πρόσωπα ώστε να μπορούν να βρεθούν συγκεκριμένα άτομα."
+msgstr "Αναγνώριση προσώπων ώστε να μπορούν να ορίζονται και να εντοπίζονται συγκεκριμένα άτομα."

 #: src/page/people.vue:61
 msgid "Recognized"
@ -3039,7 +3039,7 @@ msgstr "URL υπηρεσίας"
 #: src/locales.js:359 src/page/settings.vue:99
 #: src/page/settings/general.vue:267
 msgid "Services"
-msgstr "URL υπηρεσίας"
+msgstr "Υπηρεσίες"

 #: src/locales.js:342 src/model/session.js:83 src/options/auth.js:42
 #: src/options/auth.js:91
@ -3114,7 +3114,7 @@ msgstr "Εμφάνιση όλων των νέων προσώπων"

 #: src/page/settings/content.vue:169
 msgid "Show Captions"
-msgstr "Εμφάνιση λεζάντων"
+msgstr "Εμφάνιση Λεζάντων"

 #: src/page/people/new.vue:12 src/page/people/recognized.vue:45
 msgid "Show hidden"
@ -3130,7 +3130,7 @@ msgstr "Εμφάνιση των αρχείων καταγραφής του δι

 #: src/page/settings/content.vue:155
 msgid "Show Titles"
-msgstr "Εμφάνιση τίτλων"
+msgstr "Εμφάνιση Τίτλων"

 #: src/model/file.js:190 src/page/settings/content.vue:221
 msgid "Sidecar"
@ -3164,7 +3164,7 @@ msgstr "Μέγεθος"

 #: src/component/lightbox.vue:1247 src/component/lightbox.vue:1248
 msgid "Slideshow"
-msgstr "Παρουσίαση διαφανειών"
+msgstr "Παρουσίαση"

 #: src/options/options.js:230
 msgid "Slow"
@ -3266,7 +3266,7 @@ msgstr "Σελίδα έναρξης"

 #: src/page/settings/advanced.vue:244
 msgid "Static Size Limit: %{n}px"
-msgstr "Όριο στατικού μεγέθους: %{n}px"
+msgstr "Όριο Στατικού Μεγέθους: %{n}px"

 #: src/component/photo/edit/files.vue:463
 msgid "Status"
@ -3339,7 +3339,7 @@ msgstr "Γαλαζοπράσινο"

 #: src/page/settings/advanced.vue:150
 msgid "TensorFlow is required for image classification, facial recognition, and detecting unsafe content."
-msgstr "Το TensorFlow απαιτείται για την ταξινόμηση εικόνων, την αναγνώριση προσώπου και την ανίχνευση μη ασφαλούς περιεχομένου."
+msgstr "Το TensorFlow απαιτείται για την ταξινόμηση εικόνων, την αναγνώριση προσώπων και την ανίχνευση μη ασφαλούς περιεχομένου."

 #: src/options/options.js:267
 msgid "Terrain"
@ -3443,7 +3443,7 @@ msgstr "Σήμερα"

 #: src/component/album/toolbar.vue:28 src/component/photo/toolbar.vue:55
 msgid "Toggle View"
-msgstr "Εναλλαγή προβολής"
+msgstr "Εναλλαγή Προβολής"

 #: src/component/share/dialog.vue:89
 msgid "Token"
@ -3606,7 +3606,7 @@ msgstr "Διαδρομή φόρτωσης"

 #: src/page/settings/general.vue:209
 msgid "Upload to WebDAV and share links with friends."
-msgstr "Ανεβάστε σε WebDAV και μοιραστείτε συνδέσμους με φίλους."
+msgstr "Ανεβάστε στο WebDAV και μοιραστείτε συνδέσμους με φίλους."

 #: src/component/upload/dialog.vue:40
 msgid "Uploading %{n} of %{t}…"
@ -3658,7 +3658,7 @@ msgstr "Διεπαφή χρήστη"
 #: src/component/service/add.vue:38 src/component/service/edit.vue:172
 #: src/component/share/dialog.vue:174 src/locales.js:320
 msgid "Username"
-msgstr "Όνομα χρήστη"
+msgstr "Όνομα Χρήστη"

 #: src/component/navigation.vue:400 src/component/navigation.vue:401
 #: src/component/navigation.vue:425 src/component/navigation.vue:431
--- a/frontend/tests/vitest/component/lightbox.basic.test.js
+++ b/frontend/tests/vitest/component/lightbox.basic.test.js
@ -0,0 +1,102 @@
+import { mount, config as VTUConfig } from "@vue/test-utils";
+import { describe, it, expect, beforeEach } from "vitest";
+import { nextTick } from "vue";
+import PLightbox from "component/lightbox.vue";
+
+const mountLightbox = () =>
+  mount(PLightbox, {
+    global: {
+      stubs: {
+        "v-dialog": true,
+        "v-icon": true,
+        "v-slider": true,
+        "p-lightbox-menu": true,
+        "p-sidebar-info": true,
+      },
+    },
+  });
+
+describe("PLightbox (low-mock, jsdom-friendly)", () => {
+  beforeEach(() => {
+    localStorage.removeItem("lightbox.info");
+    sessionStorage.removeItem("lightbox.muted");
+  });
+
+  it("toggleInfo updates info and localStorage when visible", async () => {
+    const wrapper = mountLightbox();
+    await wrapper.setData({ visible: true });
+
+    // Use exposed onShortCut to trigger info toggle (KeyI)
+    await wrapper.vm.onShortCut({ code: "KeyI" });
+    await nextTick();
+    expect(localStorage.getItem("lightbox.info")).toBe("true");
+
+    await wrapper.vm.onShortCut({ code: "KeyI" });
+    await nextTick();
+    expect(localStorage.getItem("lightbox.info")).toBe("false");
+  });
+
+  it("toggleMute writes sessionStorage without requiring video or exposed state", async () => {
+    const wrapper = mountLightbox();
+    expect(sessionStorage.getItem("lightbox.muted")).toBeNull();
+    await wrapper.vm.onShortCut({ code: "KeyM" });
+    expect(sessionStorage.getItem("lightbox.muted")).toBe("true");
+    await wrapper.vm.onShortCut({ code: "KeyM" });
+    expect(sessionStorage.getItem("lightbox.muted")).toBe("false");
+  });
+
+  it("getPadding returns expected structure for large and small screens", async () => {
+    const wrapper = mountLightbox();
+    // Large viewport
+    const large = wrapper.vm.$options.methods.getPadding.call(
+      wrapper.vm,
+      { x: 1200, y: 800 },
+      { width: 4000, height: 3000 }
+    );
+    expect(large).toHaveProperty("top");
+    expect(large).toHaveProperty("bottom");
+    expect(large).toHaveProperty("left");
+    expect(large).toHaveProperty("right");
+
+    // Small viewport (<= mobileBreakpoint) should yield zeros
+    const small = wrapper.vm.$options.methods.getPadding.call(
+      wrapper.vm,
+      { x: 360, y: 640 },
+      { width: 1200, height: 800 }
+    );
+    expect(small).toEqual({ top: 0, bottom: 0, left: 0, right: 0 });
+  });
+
+  it("KeyI is ignored when dialog is not visible", async () => {
+    const wrapper = mountLightbox();
+    expect(localStorage.getItem("lightbox.info")).toBeNull();
+    await wrapper.vm.onShortCut({ code: "KeyI" });
+    expect(localStorage.getItem("lightbox.info")).toBeNull();
+  });
+
+  it("getViewport falls back to window size without content ref", () => {
+    const wrapper = mountLightbox();
+    const vp = wrapper.vm.$options.methods.getViewport.call(wrapper.vm);
+    expect(vp.x).toBeGreaterThan(0);
+    expect(vp.y).toBeGreaterThan(0);
+  });
+
+  it("menuActions marks Download action visible when allowed", () => {
+    const wrapper = mountLightbox();
+    const ctx = {
+      $gettext: VTUConfig.global.mocks.$gettext,
+      $pgettext: VTUConfig.global.mocks.$pgettext,
+      // minimal state needed by menuActions visibility checks
+      canManageAlbums: false,
+      canArchive: false,
+      canDownload: true,
+      collection: null,
+      context: "",
+      model: {},
+    };
+    const actions = wrapper.vm.$options.methods.menuActions.call(ctx);
+    const download = actions.find((a) => a?.name === "download");
+    expect(download).toBeTruthy();
+    expect(download.visible).toBe(true);
+  });
+});
--- a/frontend/tests/vitest/setup.js
+++ b/frontend/tests/vitest/setup.js
@ -38,10 +38,48 @@ if (typeof global.ResizeObserver === "undefined") {
 // Configure Vue Test Utils global configuration
 config.global.mocks = {
  $gettext: (text) => text,
+  $pgettext: (_ctx, text) => text,
  $isRtl: false,
  $config: {
-    feature: (_name) => true,
+    feature: () => true,
+    get: () => false,
+    getSettings: () => ({ features: { edit: true, favorites: true, download: true, archive: true } }),
+    allow: () => true,
+    featExperimental: () => false,
+    featDevelop: () => false,
+    values: {},
+    dir: () => "ltr",
  },
+  $event: {
+    subscribe: () => "sub-id",
+    subscribeOnce: () => "sub-id-once",
+    unsubscribe: () => {},
+    publish: () => {},
+  },
+  $view: {
+    enter: () => {},
+    leave: () => {},
+    isActive: () => true,
+  },
+  $notify: { success: () => {}, error: () => {}, warn: () => {} },
+  $fullscreen: {
+    isSupported: () => true,
+    isEnabled: () => false,
+    request: () => Promise.resolve(),
+    exit: () => Promise.resolve(),
+  },
+  $clipboard: { selection: [], has: () => false, toggle: () => {} },
+  $util: {
+    hasTouch: () => false,
+    encodeHTML: (s) => s,
+    sanitizeHtml: (s) => s,
+    formatSeconds: (n) => String(n),
+    formatRemainingSeconds: () => "0",
+    videoFormat: () => "avc",
+    videoFormatUrl: () => "/v.mp4",
+    thumb: () => ({ src: "/t.jpg", w: 100, h: 100 }),
+  },
+  $api: { post: vi.fn(), delete: vi.fn(), get: vi.fn() },
 };

 config.global.plugins = [vuetify];
--- a/internal/ai/vision/api_client.go
+++ b/internal/ai/vision/api_client.go
@ -9,6 +9,9 @@ import (
 	"io"
 	"net/http"

+	"github.com/sirupsen/logrus"
+
+	"github.com/photoprism/photoprism/internal/ai/vision/ollama"
 	"github.com/photoprism/photoprism/pkg/clean"
 	"github.com/photoprism/photoprism/pkg/http/header"
 )
@ -69,6 +72,10 @@ func PerformApiRequest(apiRequest *ApiRequest, uri, method, key string) (apiResp
 			return nil, parseErr
 		}

+		if log.IsLevelEnabled(logrus.TraceLevel) {
+			log.Tracef("vision: response %s", string(body))
+		}
+
 		return parsed, nil
 	}

@ -89,12 +96,12 @@ func PerformApiRequest(apiRequest *ApiRequest, uri, method, key string) (apiResp
 	return apiResponse, nil
 }

-func decodeOllamaResponse(data []byte) (*ApiResponseOllama, error) {
-	resp := &ApiResponseOllama{}
+func decodeOllamaResponse(data []byte) (*ollama.Response, error) {
+	resp := &ollama.Response{}
 	dec := json.NewDecoder(bytes.NewReader(data))

 	for {
-		var chunk ApiResponseOllama
+		var chunk ollama.Response
 		if err := dec.Decode(&chunk); err != nil {
 			if errors.Is(err, io.EOF) {
 				break
--- a/internal/ai/vision/api_client_test.go
+++ b/internal/ai/vision/api_client_test.go
@ -8,6 +8,7 @@ import (

 	"github.com/stretchr/testify/assert"

+	"github.com/photoprism/photoprism/internal/ai/vision/ollama"
 	"github.com/photoprism/photoprism/pkg/http/scheme"
 )

@ -49,7 +50,7 @@ func TestPerformApiRequestOllama(t *testing.T) {
 			var req ApiRequest
 			assert.NoError(t, json.NewDecoder(r.Body).Decode(&req))
 			assert.Equal(t, FormatJSON, req.Format)
-			assert.NoError(t, json.NewEncoder(w).Encode(ApiResponseOllama{
+			assert.NoError(t, json.NewEncoder(w).Encode(ollama.Response{
 				Model:    "qwen2.5vl:latest",
 				Response: `{"labels":[{"name":"test","confidence":0.9,"topicality":0.8}]}`,
 			}))
@ -72,7 +73,7 @@ func TestPerformApiRequestOllama(t *testing.T) {
 	})
 	t.Run("LabelsWithCodeFence", func(t *testing.T) {
 		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-			assert.NoError(t, json.NewEncoder(w).Encode(ApiResponseOllama{
+			assert.NoError(t, json.NewEncoder(w).Encode(ollama.Response{
 				Model:    "gemma3:latest",
 				Response: "```json\n{\"labels\":[{\"name\":\"lingerie\",\"confidence\":0.81,\"topicality\":0.73}]}\n```\nThe model provided additional commentary.",
 			}))
@ -95,7 +96,7 @@ func TestPerformApiRequestOllama(t *testing.T) {
 	})
 	t.Run("CaptionFallback", func(t *testing.T) {
 		server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-			assert.NoError(t, json.NewEncoder(w).Encode(ApiResponseOllama{
+			assert.NoError(t, json.NewEncoder(w).Encode(ollama.Response{
 				Model:    "qwen2.5vl:latest",
 				Response: "plain text",
 			}))
--- a/internal/ai/vision/api_ollama.go
+++ b/internal/ai/vision/api_ollama.go
@ -1,10 +1,8 @@
 package vision

 import (
-	"errors"
 	"fmt"
 	"os"
-	"time"

 	"github.com/photoprism/photoprism/pkg/clean"
 	"github.com/photoprism/photoprism/pkg/http/scheme"
@ -12,53 +10,6 @@ import (
 	"github.com/photoprism/photoprism/pkg/rnd"
 )

-// ApiResponseOllama represents a Ollama API service response.
-type ApiResponseOllama struct {
-	Id                 string    `yaml:"Id,omitempty" json:"id,omitempty"`
-	Code               int       `yaml:"Code,omitempty" json:"code,omitempty"`
-	Error              string    `yaml:"Error,omitempty" json:"error,omitempty"`
-	Model              string    `yaml:"Model,omitempty" json:"model,omitempty"`
-	CreatedAt          time.Time `yaml:"CreatedAt,omitempty" json:"created_at,omitempty"`
-	Response           string    `yaml:"Response,omitempty" json:"response,omitempty"`
-	Done               bool      `yaml:"Done,omitempty" json:"done,omitempty"`
-	Context            []int     `yaml:"Context,omitempty" json:"context,omitempty"`
-	TotalDuration      int64     `yaml:"TotalDuration,omitempty" json:"total_duration,omitempty"`
-	LoadDuration       int       `yaml:"LoadDuration,omitempty" json:"load_duration,omitempty"`
-	PromptEvalCount    int       `yaml:"PromptEvalCount,omitempty" json:"prompt_eval_count,omitempty"`
-	PromptEvalDuration int       `yaml:"PromptEvalDuration,omitempty" json:"prompt_eval_duration,omitempty"`
-	EvalCount          int       `yaml:"EvalCount,omitempty" json:"eval_count,omitempty"`
-	EvalDuration       int64     `yaml:"EvalDuration,omitempty" json:"eval_duration,omitempty"`
-	Result             ApiResult `yaml:"Result,omitempty" json:"result,omitempty"`
-}
-
-// Err returns an error if the request has failed.
-func (r *ApiResponseOllama) Err() error {
-	if r == nil {
-		return errors.New("response is nil")
-	}
-
-	if r.Code >= 400 {
-		if r.Error != "" {
-			return errors.New(r.Error)
-		}
-
-		return fmt.Errorf("error %d", r.Code)
-	} else if r.Result.IsEmpty() {
-		return errors.New("no result")
-	}
-
-	return nil
-}
-
-// HasResult checks if there is at least one result in the response data.
-func (r *ApiResponseOllama) HasResult() bool {
-	if r == nil {
-		return false
-	}
-
-	return !r.Result.IsEmpty()
-}
-
 // NewApiRequestOllama returns a new Ollama API request with the specified images as payload.
 func NewApiRequestOllama(images Files, fileScheme scheme.Type) (*ApiRequest, error) {
 	imagesData := make(Files, len(images))
--- a/internal/ai/vision/api_request.go
+++ b/internal/ai/vision/api_request.go
@ -11,6 +11,8 @@ import (

 	"github.com/sirupsen/logrus"

+	"github.com/photoprism/photoprism/internal/ai/vision/openai"
+	"github.com/photoprism/photoprism/internal/ai/vision/schema"
 	"github.com/photoprism/photoprism/internal/api/download"
 	"github.com/photoprism/photoprism/pkg/clean"
 	"github.com/photoprism/photoprism/pkg/fs"
@ -58,6 +60,11 @@ type ApiRequestOptions struct {
 	UseMmap          bool     `yaml:"UseMmap,omitempty" json:"use_mmap,omitempty"`
 	UseMlock         bool     `yaml:"UseMlock,omitempty" json:"use_mlock,omitempty"`
 	NumThread        int      `yaml:"NumThread,omitempty" json:"num_thread,omitempty"`
+	MaxOutputTokens  int      `yaml:"MaxOutputTokens,omitempty" json:"max_output_tokens,omitempty"`
+	Detail           string   `yaml:"Detail,omitempty" json:"detail,omitempty"`
+	ForceJson        bool     `yaml:"ForceJson,omitempty" json:"force_json,omitempty"`
+	SchemaVersion    string   `yaml:"SchemaVersion,omitempty" json:"schema_version,omitempty"`
+	CombineOutputs   string   `yaml:"CombineOutputs,omitempty" json:"combine_outputs,omitempty"`
 }

 // ApiRequestContext represents a context parameter returned from a previous request.
@ -77,6 +84,7 @@ type ApiRequest struct {
 	Context        *ApiRequestContext `form:"context" yaml:"Context,omitempty" json:"context,omitempty"`
 	Stream         bool               `form:"stream" yaml:"Stream,omitempty" json:"stream"`
 	Images         Files              `form:"images" yaml:"Images,omitempty" json:"images,omitempty"`
+	Schema         json.RawMessage    `form:"schema" yaml:"Schema,omitempty" json:"schema,omitempty"`
 	ResponseFormat ApiFormat          `form:"-" yaml:"-" json:"-"`
 }

@ -195,6 +203,14 @@ func (r *ApiRequest) GetResponseFormat() ApiFormat {

 // JSON returns the request data as JSON-encoded bytes.
 func (r *ApiRequest) JSON() ([]byte, error) {
+	if r == nil {
+		return nil, errors.New("api request is nil")
+	}
+
+	if r.ResponseFormat == ApiFormatOpenAI {
+		return r.openAIJSON()
+	}
+
 	return json.Marshal(*r)
 }

@ -229,6 +245,8 @@ func (r *ApiRequest) sanitizedForLog() ApiRequest {

 	sanitized.Url = sanitizeLogPayload(r.Url)

+	sanitized.Schema = r.Schema
+
 	return sanitized
 }

@ -287,3 +305,134 @@ func isLikelyBase64(value string) bool {

 	return true
 }
+
+// openAIJSON converts the request data into an OpenAI Responses API payload.
+func (r *ApiRequest) openAIJSON() ([]byte, error) {
+	detail := openai.DefaultDetail
+
+	if opts := r.Options; opts != nil && strings.TrimSpace(opts.Detail) != "" {
+		detail = strings.TrimSpace(opts.Detail)
+	}
+
+	messages := make([]openai.InputMessage, 0, 2)
+
+	if system := strings.TrimSpace(r.System); system != "" {
+		messages = append(messages, openai.InputMessage{
+			Role: "system",
+			Type: "message",
+			Content: []openai.ContentItem{
+				{
+					Type: openai.ContentTypeText,
+					Text: system,
+				},
+			},
+		})
+	}
+
+	userContent := make([]openai.ContentItem, 0, len(r.Images)+1)
+
+	if prompt := strings.TrimSpace(r.Prompt); prompt != "" {
+		userContent = append(userContent, openai.ContentItem{
+			Type: openai.ContentTypeText,
+			Text: prompt,
+		})
+	}
+
+	for _, img := range r.Images {
+		if img == "" {
+			continue
+		}
+
+		userContent = append(userContent, openai.ContentItem{
+			Type:     openai.ContentTypeImage,
+			ImageURL: img,
+			Detail:   detail,
+		})
+	}
+
+	if len(userContent) > 0 {
+		messages = append(messages, openai.InputMessage{
+			Role:    "user",
+			Type:    "message",
+			Content: userContent,
+		})
+	}
+
+	if len(messages) == 0 {
+		return nil, errors.New("openai request requires at least one message")
+	}
+
+	payload := openai.HTTPRequest{
+		Model: strings.TrimSpace(r.Model),
+		Input: messages,
+	}
+
+	if payload.Model == "" {
+		payload.Model = openai.DefaultModel
+	}
+
+	if strings.HasPrefix(strings.ToLower(payload.Model), "gpt-5") {
+		payload.Reasoning = &openai.Reasoning{Effort: "low"}
+	}
+
+	if opts := r.Options; opts != nil {
+		if opts.MaxOutputTokens > 0 {
+			payload.MaxOutputTokens = opts.MaxOutputTokens
+		}
+
+		if opts.Temperature > 0 {
+			payload.Temperature = opts.Temperature
+		}
+
+		if opts.TopP > 0 {
+			payload.TopP = opts.TopP
+		}
+
+		if opts.PresencePenalty != 0 {
+			payload.PresencePenalty = opts.PresencePenalty
+		}
+
+		if opts.FrequencyPenalty != 0 {
+			payload.FrequencyPenalty = opts.FrequencyPenalty
+		}
+	}
+
+	if format := buildOpenAIResponseFormat(r); format != nil {
+		payload.Text = &openai.TextOptions{
+			Format: format,
+		}
+	}
+
+	return json.Marshal(payload)
+}
+
+// buildOpenAIResponseFormat determines which response_format to send to OpenAI.
+func buildOpenAIResponseFormat(r *ApiRequest) *openai.ResponseFormat {
+	if r == nil {
+		return nil
+	}
+
+	opts := r.Options
+	hasSchema := len(r.Schema) > 0
+
+	if !hasSchema && (opts == nil || !opts.ForceJson) {
+		return nil
+	}
+
+	result := &openai.ResponseFormat{}
+
+	if hasSchema {
+		result.Type = openai.ResponseFormatJSONSchema
+		result.Schema = r.Schema
+
+		if opts != nil && strings.TrimSpace(opts.SchemaVersion) != "" {
+			result.Name = strings.TrimSpace(opts.SchemaVersion)
+		} else {
+			result.Name = schema.JsonSchemaName(r.Schema, openai.DefaultSchemaVersion)
+		}
+	} else {
+		result.Type = openai.ResponseFormatJSONObject
+	}
+
+	return result
+}
--- a/internal/ai/vision/caption.go
+++ b/internal/ai/vision/caption.go
@ -53,7 +53,11 @@ func captionInternal(images Files, mediaSrc media.Src) (result *CaptionResult, m

 			apiRequest.System = model.GetSystemPrompt()
 			apiRequest.Prompt = model.GetPrompt()
-			apiRequest.Options = model.GetOptions()
+
+			if apiRequest.Options == nil {
+				apiRequest.Options = model.GetOptions()
+			}
+
 			apiRequest.WriteLog()

 			if apiResponse, err = PerformApiRequest(apiRequest, uri, method, model.EndpointKey()); err != nil {
--- a/internal/ai/vision/engine.go
+++ b/internal/ai/vision/engine.go
@ -58,14 +58,15 @@ func init() {
 	RegisterEngineAlias(EngineVision, EngineInfo{
 		RequestFormat:     ApiFormatVision,
 		ResponseFormat:    ApiFormatVision,
-		FileScheme:        string(scheme.Data),
+		FileScheme:        scheme.Data,
 		DefaultResolution: DefaultResolution,
 	})

 	RegisterEngineAlias(openai.EngineName, EngineInfo{
+		Uri:               "https://api.openai.com/v1/responses",
 		RequestFormat:     ApiFormatOpenAI,
 		ResponseFormat:    ApiFormatOpenAI,
-		FileScheme:        string(scheme.Data),
+		FileScheme:        scheme.Data,
 		DefaultResolution: openai.DefaultResolution,
 	})
 }
@ -79,6 +80,7 @@ func RegisterEngine(format ApiFormat, engine Engine) {

 // EngineInfo describes metadata that can be associated with an engine alias.
 type EngineInfo struct {
+	Uri               string
 	RequestFormat     ApiFormat
 	ResponseFormat    ApiFormat
 	FileScheme        string
--- a/internal/ai/vision/engine_ollama.go
+++ b/internal/ai/vision/engine_ollama.go
@ -28,7 +28,7 @@ func init() {
 	RegisterEngineAlias(ollama.EngineName, EngineInfo{
 		RequestFormat:     ApiFormatOllama,
 		ResponseFormat:    ApiFormatOllama,
-		FileScheme:        string(scheme.Base64),
+		FileScheme:        scheme.Base64,
 		DefaultResolution: ollama.DefaultResolution,
 	})

@ -72,7 +72,7 @@ func (ollamaDefaults) SchemaTemplate(model *Model) string {

 	switch model.Type {
 	case ModelTypeLabels:
-		return ollama.LabelsSchema(model.PromptContains("nsfw"))
+		return ollama.SchemaLabels(model.PromptContains("nsfw"))
 	}

 	return ""
@ -134,64 +134,99 @@ func (ollamaParser) Parse(ctx context.Context, req *ApiRequest, raw []byte, stat
 		return nil, err
 	}

-	result := &ApiResponse{
+	response := &ApiResponse{
 		Id:    req.GetId(),
 		Code:  status,
 		Model: &Model{Name: ollamaResp.Model},
 		Result: ApiResult{
-			Labels: append([]LabelResult{}, ollamaResp.Result.Labels...),
-			Caption: func() *CaptionResult {
-				if ollamaResp.Result.Caption != nil {
-					copyCaption := *ollamaResp.Result.Caption
-					return &copyCaption
-				}
-				return nil
-			}(),
+			Labels:  convertOllamaLabels(ollamaResp.Result.Labels),
+			Caption: convertOllamaCaption(ollamaResp.Result.Caption),
 		},
 	}

-	parsedLabels := len(result.Result.Labels) > 0
+	parsedLabels := len(response.Result.Labels) > 0

-	if !parsedLabels && strings.TrimSpace(ollamaResp.Response) != "" && req.Format == FormatJSON {
-		if labels, parseErr := parseOllamaLabels(ollamaResp.Response); parseErr != nil {
-			log.Debugf("vision: %s (parse ollama labels)", clean.Error(parseErr))
+	// Qwen3-VL models stream their JSON payload in the "Thinking" field.
+	fallbackJSON := strings.TrimSpace(ollamaResp.Response)
+	if fallbackJSON == "" {
+		fallbackJSON = strings.TrimSpace(ollamaResp.Thinking)
+	}
+
+	if !parsedLabels && fallbackJSON != "" && (req.Format == FormatJSON || strings.HasPrefix(fallbackJSON, "{")) {
+		if labels, parseErr := parseOllamaLabels(fallbackJSON); parseErr != nil {
+			log.Warnf("vision: %s (parse ollama labels)", clean.Error(parseErr))
 		} else if len(labels) > 0 {
-			result.Result.Labels = append(result.Result.Labels, labels...)
+			response.Result.Labels = append(response.Result.Labels, labels...)
 			parsedLabels = true
 		}
 	}

 	if parsedLabels {
-		filtered := result.Result.Labels[:0]
-		for i := range result.Result.Labels {
-			if result.Result.Labels[i].Confidence <= 0 {
-				result.Result.Labels[i].Confidence = ollama.LabelConfidenceDefault
+		filtered := response.Result.Labels[:0]
+		for i := range response.Result.Labels {
+			if response.Result.Labels[i].Confidence <= 0 {
+				response.Result.Labels[i].Confidence = ollama.LabelConfidenceDefault
 			}

-			if result.Result.Labels[i].Topicality <= 0 {
-				result.Result.Labels[i].Topicality = result.Result.Labels[i].Confidence
+			if response.Result.Labels[i].Topicality <= 0 {
+				response.Result.Labels[i].Topicality = response.Result.Labels[i].Confidence
 			}

 			// Apply thresholds and canonicalize the name.
-			normalizeLabelResult(&result.Result.Labels[i])
+			normalizeLabelResult(&response.Result.Labels[i])

-			if result.Result.Labels[i].Name == "" {
+			if response.Result.Labels[i].Name == "" {
 				continue
 			}

-			if result.Result.Labels[i].Source == "" {
-				result.Result.Labels[i].Source = entity.SrcOllama
+			if response.Result.Labels[i].Source == "" {
+				response.Result.Labels[i].Source = entity.SrcOllama
 			}

-			filtered = append(filtered, result.Result.Labels[i])
+			filtered = append(filtered, response.Result.Labels[i])
 		}
-		result.Result.Labels = filtered
+		response.Result.Labels = filtered
 	} else if caption := strings.TrimSpace(ollamaResp.Response); caption != "" {
-		result.Result.Caption = &CaptionResult{
+		response.Result.Caption = &CaptionResult{
 			Text:   caption,
 			Source: entity.SrcOllama,
 		}
 	}

-	return result, nil
+	return response, nil
+}
+
+func convertOllamaLabels(payload []ollama.LabelPayload) []LabelResult {
+	if len(payload) == 0 {
+		return nil
+	}
+
+	labels := make([]LabelResult, len(payload))
+
+	for i := range payload {
+		labels[i] = LabelResult{
+			Name:           payload[i].Name,
+			Source:         payload[i].Source,
+			Priority:       payload[i].Priority,
+			Confidence:     payload[i].Confidence,
+			Topicality:     payload[i].Topicality,
+			Categories:     payload[i].Categories,
+			NSFW:           payload[i].NSFW,
+			NSFWConfidence: payload[i].NSFWConfidence,
+		}
+	}
+
+	return labels
+}
+
+func convertOllamaCaption(payload *ollama.CaptionPayload) *CaptionResult {
+	if payload == nil {
+		return nil
+	}
+
+	return &CaptionResult{
+		Text:       payload.Text,
+		Source:     payload.Source,
+		Confidence: payload.Confidence,
+	}
 }
--- a/internal/ai/vision/engine_ollama_test.go
+++ b/internal/ai/vision/engine_ollama_test.go
@ -10,9 +10,9 @@ import (

 func TestOllamaDefaultConfidenceApplied(t *testing.T) {
 	req := &ApiRequest{Format: FormatJSON}
-	payload := ApiResponseOllama{
-		Result: ApiResult{
-			Labels: []LabelResult{{Name: "forest path", Confidence: 0, Topicality: 0}},
+	payload := ollama.Response{
+		Result: ollama.ResultPayload{
+			Labels: []ollama.LabelPayload{{Name: "forest path", Confidence: 0, Topicality: 0}},
 		},
 	}
 	raw, err := json.Marshal(payload)
@ -37,3 +37,46 @@ func TestOllamaDefaultConfidenceApplied(t *testing.T) {
 		t.Fatalf("expected topicality to default to confidence, got %.2f", resp.Result.Labels[0].Topicality)
 	}
 }
+
+func TestOllamaParserFallbacks(t *testing.T) {
+	t.Run("ThinkingFieldJSON", func(t *testing.T) {
+		req := &ApiRequest{Format: FormatJSON}
+		payload := ollama.Response{
+			Thinking: `{"labels":[{"name":"cat","confidence":0.9,"topicality":0.8}]}`,
+		}
+		raw, err := json.Marshal(payload)
+		if err != nil {
+			t.Fatalf("marshal: %v", err)
+		}
+
+		parser := ollamaParser{}
+		resp, err := parser.Parse(context.Background(), req, raw, 200)
+		if err != nil {
+			t.Fatalf("parse failed: %v", err)
+		}
+
+		if len(resp.Result.Labels) != 1 || resp.Result.Labels[0].Name != "Cat" {
+			t.Fatalf("expected cat label, got %+v", resp.Result.Labels)
+		}
+	})
+	t.Run("JsonPrefixedResponse", func(t *testing.T) {
+		req := &ApiRequest{} // no explicit format
+		payload := ollama.Response{
+			Response: `{"labels":[{"name":"cat","confidence":0.91,"topicality":0.81}]}`,
+		}
+		raw, err := json.Marshal(payload)
+		if err != nil {
+			t.Fatalf("marshal: %v", err)
+		}
+
+		parser := ollamaParser{}
+		resp, err := parser.Parse(context.Background(), req, raw, 200)
+		if err != nil {
+			t.Fatalf("parse failed: %v", err)
+		}
+
+		if len(resp.Result.Labels) != 1 || resp.Result.Labels[0].Name != "Cat" {
+			t.Fatalf("expected cat label, got %+v", resp.Result.Labels)
+		}
+	})
+}
--- a/internal/ai/vision/engine_openai.go
+++ b/internal/ai/vision/engine_openai.go
@ -1,18 +1,342 @@
 package vision

 import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"strings"
+
 	"github.com/photoprism/photoprism/internal/ai/vision/openai"
+	"github.com/photoprism/photoprism/internal/entity"
+	"github.com/photoprism/photoprism/pkg/clean"
 	"github.com/photoprism/photoprism/pkg/http/scheme"
 )

-// init registers the OpenAI engine alias so models can set Engine: "openai"
-// and inherit sensible defaults (request/response formats, file scheme, and
-// preferred thumbnail resolution).
+// openaiDefaults provides canned prompts, schema templates, and options for OpenAI engines.
+type openaiDefaults struct{}
+
+// openaiBuilder prepares ApiRequest objects for OpenAI's Responses API.
+type openaiBuilder struct{}
+
+// openaiParser converts Responses API payloads into ApiResponse instances.
+type openaiParser struct{}
+
 func init() {
-	RegisterEngineAlias(openai.EngineName, EngineInfo{
-		RequestFormat:     ApiFormatOpenAI,
-		ResponseFormat:    ApiFormatOpenAI,
-		FileScheme:        string(scheme.Base64),
-		DefaultResolution: openai.DefaultResolution,
+	RegisterEngine(ApiFormatOpenAI, Engine{
+		Builder:  openaiBuilder{},
+		Parser:   openaiParser{},
+		Defaults: openaiDefaults{},
 	})
 }
+
+// SystemPrompt returns the default OpenAI system prompt for the specified model type.
+func (openaiDefaults) SystemPrompt(model *Model) string {
+	if model == nil {
+		return ""
+	}
+
+	switch model.Type {
+	case ModelTypeCaption:
+		return openai.CaptionSystem
+	case ModelTypeLabels:
+		return openai.LabelSystem
+	default:
+		return ""
+	}
+}
+
+// UserPrompt returns the default OpenAI user prompt for the specified model type.
+func (openaiDefaults) UserPrompt(model *Model) string {
+	if model == nil {
+		return ""
+	}
+
+	switch model.Type {
+	case ModelTypeCaption:
+		return openai.CaptionPrompt
+	case ModelTypeLabels:
+		if DetectNSFWLabels {
+			return openai.LabelPromptNSFW
+		}
+		return openai.LabelPromptDefault
+	default:
+		return ""
+	}
+}
+
+// SchemaTemplate returns the JSON schema template for the model, if applicable.
+func (openaiDefaults) SchemaTemplate(model *Model) string {
+	if model == nil {
+		return ""
+	}
+
+	switch model.Type {
+	case ModelTypeLabels:
+		return string(openai.SchemaLabels(model.PromptContains("nsfw")))
+	default:
+		return ""
+	}
+}
+
+// Options returns default OpenAI request options for the model.
+func (openaiDefaults) Options(model *Model) *ApiRequestOptions {
+	if model == nil {
+		return nil
+	}
+
+	switch model.Type {
+	case ModelTypeCaption:
+		/*
+		  Options:
+		    Detail: low
+		    MaxOutputTokens: 512
+		    Temperature: 0.1
+		    TopP: 0.9
+		  (Sampling values are zeroed for GPT-5 models in openaiBuilder.Build.)
+		*/
+		return &ApiRequestOptions{
+			Detail:          openai.DefaultDetail,
+			MaxOutputTokens: openai.CaptionMaxTokens,
+			Temperature:     openai.DefaultTemperature,
+			TopP:            openai.DefaultTopP,
+		}
+	case ModelTypeLabels:
+		/*
+		  Options:
+		    Detail: low
+		    MaxOutputTokens: 1024
+		    Temperature: 0.1
+		    ForceJson: true
+		    SchemaVersion: "photoprism_vision_labels_v1"
+		  (Sampling values are zeroed for GPT-5 models in openaiBuilder.Build.)
+		*/
+		return &ApiRequestOptions{
+			Detail:          openai.DefaultDetail,
+			MaxOutputTokens: openai.LabelsMaxTokens,
+			Temperature:     openai.DefaultTemperature,
+			TopP:            openai.DefaultTopP,
+			ForceJson:       true,
+		}
+	default:
+		return nil
+	}
+}
+
+// Build constructs an OpenAI request payload using base64-encoded thumbnails.
+func (openaiBuilder) Build(ctx context.Context, model *Model, files Files) (*ApiRequest, error) {
+	if model == nil {
+		return nil, ErrInvalidModel
+	}
+
+	dataReq, err := NewApiRequestImages(files, scheme.Data)
+	if err != nil {
+		return nil, err
+	}
+
+	req := &ApiRequest{
+		Id:             dataReq.Id,
+		Images:         append(Files(nil), dataReq.Images...),
+		ResponseFormat: ApiFormatOpenAI,
+	}
+
+	if opts := model.GetOptions(); opts != nil {
+		req.Options = cloneOptions(opts)
+		if model.Type == ModelTypeCaption {
+			// Captions default to plain text responses; structured JSON is optional.
+			req.Options.ForceJson = false
+			if req.Options.MaxOutputTokens < openai.CaptionMaxTokens {
+				req.Options.MaxOutputTokens = openai.CaptionMaxTokens
+			}
+		} else if model.Type == ModelTypeLabels {
+			if req.Options.MaxOutputTokens < openai.LabelsMaxTokens {
+				req.Options.MaxOutputTokens = openai.LabelsMaxTokens
+			}
+		}
+
+		if strings.HasPrefix(strings.ToLower(strings.TrimSpace(model.Name)), "gpt-5") {
+			req.Options.Temperature = 0
+			req.Options.TopP = 0
+		}
+	}
+
+	if schema := strings.TrimSpace(model.SchemaTemplate()); schema != "" {
+		if raw, parseErr := parseOpenAISchema(schema); parseErr != nil {
+			log.Warnf("vision: failed to parse OpenAI schema template (%s)", clean.Error(parseErr))
+		} else {
+			req.Schema = raw
+		}
+	}
+
+	return req, nil
+}
+
+// Parse converts an OpenAI Responses API payload into the internal ApiResponse representation.
+func (openaiParser) Parse(ctx context.Context, req *ApiRequest, raw []byte, status int) (*ApiResponse, error) {
+	if status >= 300 {
+		if msg := openai.ParseErrorMessage(raw); msg != "" {
+			return nil, fmt.Errorf("openai: %s", msg)
+		}
+		return nil, fmt.Errorf("openai: status %d", status)
+	}
+
+	var resp openai.Response
+	if err := json.Unmarshal(raw, &resp); err != nil {
+		return nil, err
+	}
+
+	if resp.Error != nil && resp.Error.Message != "" {
+		return nil, errors.New(resp.Error.Message)
+	}
+
+	result := ApiResult{}
+	if jsonPayload := resp.FirstJSON(); len(jsonPayload) > 0 {
+		if err := populateOpenAIJSONResult(&result, jsonPayload); err != nil {
+			log.Debugf("vision: %s (parse openai json payload)", clean.Error(err))
+		}
+	}
+
+	if result.Caption == nil {
+		if text := resp.FirstText(); text != "" {
+			trimmed := strings.TrimSpace(text)
+			var parsedJSON bool
+
+			if len(trimmed) > 0 && (trimmed[0] == '{' || trimmed[0] == '[') {
+				if err := populateOpenAIJSONResult(&result, json.RawMessage(trimmed)); err != nil {
+					log.Debugf("vision: %s (parse openai json text payload)", clean.Error(err))
+				} else {
+					parsedJSON = true
+				}
+			}
+
+			if !parsedJSON && trimmed != "" {
+				result.Caption = &CaptionResult{
+					Text:   trimmed,
+					Source: entity.SrcOpenAI,
+				}
+			}
+		}
+	}
+
+	var responseID string
+	if req != nil {
+		responseID = req.GetId()
+	}
+
+	modelName := strings.TrimSpace(resp.Model)
+	if modelName == "" && req != nil {
+		modelName = strings.TrimSpace(req.Model)
+	}
+
+	return &ApiResponse{
+		Id:     responseID,
+		Code:   status,
+		Model:  &Model{Name: modelName},
+		Result: result,
+	}, nil
+}
+
+// parseOpenAISchema validates the provided JSON schema and returns it as a raw message.
+func parseOpenAISchema(schema string) (json.RawMessage, error) {
+	var raw json.RawMessage
+	if err := json.Unmarshal([]byte(schema), &raw); err != nil {
+		return nil, err
+	}
+	return normalizeOpenAISchema(raw)
+}
+
+// normalizeOpenAISchema upgrades legacy label schema definitions so they comply with
+// OpenAI's json_schema format requirements.
+func normalizeOpenAISchema(raw json.RawMessage) (json.RawMessage, error) {
+	if len(raw) == 0 {
+		return raw, nil
+	}
+
+	var doc map[string]any
+	if err := json.Unmarshal(raw, &doc); err != nil {
+		// Fallback to the original payload if it isn't a JSON object.
+		return raw, nil
+	}
+
+	if t, ok := doc["type"]; ok {
+		if typeStr, ok := t.(string); ok && strings.TrimSpace(typeStr) != "" {
+			return raw, nil
+		}
+	}
+
+	if _, ok := doc["properties"]; ok {
+		return raw, nil
+	}
+
+	labels, ok := doc["labels"]
+	if !ok {
+		return raw, nil
+	}
+
+	nsfw := false
+
+	if items, ok := labels.([]any); ok && len(items) > 0 {
+		if first, ok := items[0].(map[string]any); ok {
+			if _, hasNSFW := first["nsfw"]; hasNSFW {
+				nsfw = true
+			}
+			if _, hasNSFWConfidence := first["nsfw_confidence"]; hasNSFWConfidence {
+				nsfw = true
+			}
+		}
+	}
+
+	return openai.SchemaLabels(nsfw), nil
+}
+
+// populateOpenAIJSONResult unmarshals a structured OpenAI response into ApiResult fields.
+func populateOpenAIJSONResult(result *ApiResult, payload json.RawMessage) error {
+	if result == nil || len(payload) == 0 {
+		return nil
+	}
+
+	var envelope struct {
+		Caption *struct {
+			Text       string  `json:"text"`
+			Confidence float32 `json:"confidence"`
+		} `json:"caption"`
+		Labels []LabelResult `json:"labels"`
+	}
+
+	if err := json.Unmarshal(payload, &envelope); err != nil {
+		return err
+	}
+
+	if envelope.Caption != nil {
+		text := strings.TrimSpace(envelope.Caption.Text)
+		if text != "" {
+			result.Caption = &CaptionResult{
+				Text:       text,
+				Confidence: envelope.Caption.Confidence,
+				Source:     entity.SrcOpenAI,
+			}
+		}
+	}
+
+	if len(envelope.Labels) > 0 {
+		filtered := envelope.Labels[:0]
+
+		for i := range envelope.Labels {
+			if envelope.Labels[i].Source == "" {
+				envelope.Labels[i].Source = entity.SrcOpenAI
+			}
+
+			normalizeLabelResult(&envelope.Labels[i])
+
+			if envelope.Labels[i].Name == "" {
+				continue
+			}
+
+			filtered = append(filtered, envelope.Labels[i])
+		}
+
+		result.Labels = append(result.Labels, filtered...)
+	}
+
+	return nil
+}
--- a/internal/ai/vision/engine_openai_test.go
+++ b/internal/ai/vision/engine_openai_test.go
@ -0,0 +1,337 @@
+package vision
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/photoprism/photoprism/internal/ai/vision/openai"
+	"github.com/photoprism/photoprism/internal/ai/vision/schema"
+	"github.com/photoprism/photoprism/internal/entity"
+)
+
+func TestOpenAIBuilderBuild(t *testing.T) {
+	model := &Model{
+		Type:   ModelTypeLabels,
+		Name:   openai.DefaultModel,
+		Engine: openai.EngineName,
+	}
+	model.ApplyEngineDefaults()
+
+	request, err := openaiBuilder{}.Build(context.Background(), model, Files{examplesPath + "/chameleon_lime.jpg"})
+	require.NoError(t, err)
+	require.NotNil(t, request)
+
+	assert.Equal(t, ApiFormatOpenAI, request.ResponseFormat)
+	assert.NotEmpty(t, request.Images)
+	assert.NotNil(t, request.Options)
+	assert.Equal(t, openai.DefaultDetail, request.Options.Detail)
+	assert.True(t, request.Options.ForceJson)
+	assert.GreaterOrEqual(t, request.Options.MaxOutputTokens, openai.LabelsMaxTokens)
+}
+
+func TestOpenAIBuilderBuildCaptionDisablesForceJSON(t *testing.T) {
+	model := &Model{
+		Type:    ModelTypeCaption,
+		Name:    openai.DefaultModel,
+		Engine:  openai.EngineName,
+		Options: &ApiRequestOptions{ForceJson: true},
+	}
+	model.ApplyEngineDefaults()
+
+	request, err := openaiBuilder{}.Build(context.Background(), model, Files{examplesPath + "/chameleon_lime.jpg"})
+	require.NoError(t, err)
+	require.NotNil(t, request)
+	require.NotNil(t, request.Options)
+	assert.False(t, request.Options.ForceJson)
+	assert.GreaterOrEqual(t, request.Options.MaxOutputTokens, openai.CaptionMaxTokens)
+}
+
+func TestApiRequestJSONForOpenAI(t *testing.T) {
+	req := &ApiRequest{
+		Model:          "gpt-5-mini",
+		System:         "system",
+		Prompt:         "describe the scene",
+		Images:         []string{"data:image/jpeg;base64,AA=="},
+		ResponseFormat: ApiFormatOpenAI,
+		Options: &ApiRequestOptions{
+			Detail:          openai.DefaultDetail,
+			MaxOutputTokens: 128,
+			Temperature:     0.2,
+			TopP:            0.8,
+			ForceJson:       true,
+		},
+		Schema: json.RawMessage(`{"type":"object","properties":{"caption":{"type":"object"}}}`),
+	}
+
+	payload, err := req.JSON()
+	require.NoError(t, err)
+
+	var decoded struct {
+		Model string `json:"model"`
+		Input []struct {
+			Role    string `json:"role"`
+			Content []struct {
+				Type string `json:"type"`
+			} `json:"content"`
+		} `json:"input"`
+		Text struct {
+			Format struct {
+				Type   string          `json:"type"`
+				Name   string          `json:"name"`
+				Schema json.RawMessage `json:"schema"`
+				Strict bool            `json:"strict"`
+			} `json:"format"`
+		} `json:"text"`
+		Reasoning struct {
+			Effort string `json:"effort"`
+		} `json:"reasoning"`
+		MaxOutputTokens int `json:"max_output_tokens"`
+	}
+
+	require.NoError(t, json.Unmarshal(payload, &decoded))
+	assert.Equal(t, "gpt-5-mini", decoded.Model)
+	require.Len(t, decoded.Input, 2)
+	assert.Equal(t, "system", decoded.Input[0].Role)
+	assert.Equal(t, openai.ResponseFormatJSONSchema, decoded.Text.Format.Type)
+	assert.Equal(t, schema.JsonSchemaName(decoded.Text.Format.Schema, openai.DefaultSchemaVersion), decoded.Text.Format.Name)
+	assert.False(t, decoded.Text.Format.Strict)
+	assert.NotNil(t, decoded.Text.Format.Schema)
+	assert.Equal(t, "low", decoded.Reasoning.Effort)
+	assert.Equal(t, 128, decoded.MaxOutputTokens)
+}
+
+func TestApiRequestJSONForOpenAIDefaultSchemaName(t *testing.T) {
+	req := &ApiRequest{
+		Model:          "gpt-5-mini",
+		Images:         []string{"data:image/jpeg;base64,AA=="},
+		ResponseFormat: ApiFormatOpenAI,
+		Options: &ApiRequestOptions{
+			Detail:          openai.DefaultDetail,
+			MaxOutputTokens: 64,
+			ForceJson:       true,
+		},
+		Schema: json.RawMessage(`{"type":"object"}`),
+	}
+
+	payload, err := req.JSON()
+	require.NoError(t, err)
+
+	var decoded struct {
+		Text struct {
+			Format struct {
+				Name string `json:"name"`
+			} `json:"format"`
+		} `json:"text"`
+	}
+
+	require.NoError(t, json.Unmarshal(payload, &decoded))
+	assert.Equal(t, schema.JsonSchemaName(req.Schema, openai.DefaultSchemaVersion), decoded.Text.Format.Name)
+}
+
+func TestOpenAIParserParsesJSONFromTextPayload(t *testing.T) {
+	respPayload := `{
+		"id": "resp_123",
+		"model": "gpt-5-mini",
+		"output": [{
+			"role": "assistant",
+			"content": [{
+				"type": "output_text",
+				"text": "{\"labels\":[{\"name\":\"deer\",\"confidence\":0.98,\"topicality\":0.99}]}"
+			}]
+		}]
+	}`
+
+	req := &ApiRequest{
+		Id:             "test",
+		Model:          "gpt-5-mini",
+		ResponseFormat: ApiFormatOpenAI,
+	}
+
+	resp, err := openaiParser{}.Parse(context.Background(), req, []byte(respPayload), http.StatusOK)
+	require.NoError(t, err)
+	require.NotNil(t, resp)
+	require.Len(t, resp.Result.Labels, 1)
+	assert.Equal(t, "Deer", resp.Result.Labels[0].Name)
+	assert.Nil(t, resp.Result.Caption)
+}
+
+func TestParseOpenAISchemaLegacyUpgrade(t *testing.T) {
+	legacy := `{
+		"labels": [{
+			"name": "",
+			"confidence": 0,
+			"topicality": 0
+		}]
+	}`
+
+	raw, err := parseOpenAISchema(legacy)
+	require.NoError(t, err)
+
+	var decoded map[string]any
+	require.NoError(t, json.Unmarshal(raw, &decoded))
+
+	assert.Equal(t, "object", decoded["type"])
+
+	props, ok := decoded["properties"].(map[string]any)
+	require.True(t, ok)
+	labels, ok := props["labels"].(map[string]any)
+	require.True(t, ok)
+	assert.Equal(t, "array", labels["type"])
+}
+
+func TestParseOpenAISchemaLegacyUpgradeNSFW(t *testing.T) {
+	legacy := `{
+		"labels": [{
+			"name": "",
+			"confidence": 0,
+			"topicality": 0,
+			"nsfw": false,
+			"nsfw_confidence": 0
+		}]
+	}`
+
+	raw, err := parseOpenAISchema(legacy)
+	require.NoError(t, err)
+
+	var decoded map[string]any
+	require.NoError(t, json.Unmarshal(raw, &decoded))
+
+	props := decoded["properties"].(map[string]any)
+	labels := props["labels"].(map[string]any)
+	items := labels["items"].(map[string]any)
+	_, hasNSFW := items["properties"].(map[string]any)["nsfw"]
+	assert.True(t, hasNSFW)
+}
+
+func TestPerformApiRequestOpenAISuccess(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var reqPayload struct {
+			Model string `json:"model"`
+		}
+		assert.NoError(t, json.NewDecoder(r.Body).Decode(&reqPayload))
+		assert.Equal(t, "gpt-5-mini", reqPayload.Model)
+
+		response := map[string]any{
+			"id":    "resp_123",
+			"model": "gpt-5-mini",
+			"output": []any{
+				map[string]any{
+					"role": "assistant",
+					"content": []any{
+						map[string]any{
+							"type": "output_json",
+							"json": map[string]any{
+								"caption": map[string]any{
+									"text":       "A cat rests on a windowsill.",
+									"confidence": 0.91,
+								},
+								"labels": []map[string]any{
+									{
+										"name":       "cat",
+										"confidence": 0.92,
+										"topicality": 0.88,
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+		}
+
+		assert.NoError(t, json.NewEncoder(w).Encode(response))
+	}))
+	defer server.Close()
+
+	req := &ApiRequest{
+		Id:             "test",
+		Model:          "gpt-5-mini",
+		Images:         []string{"data:image/jpeg;base64,AA=="},
+		ResponseFormat: ApiFormatOpenAI,
+		Options: &ApiRequestOptions{
+			Detail: openai.DefaultDetail,
+		},
+		Schema: json.RawMessage(`{"type":"object"}`),
+	}
+
+	resp, err := PerformApiRequest(req, server.URL, http.MethodPost, "secret")
+	require.NoError(t, err)
+	require.NotNil(t, resp)
+
+	require.NotNil(t, resp.Result.Caption)
+	assert.Equal(t, entity.SrcOpenAI, resp.Result.Caption.Source)
+	assert.Equal(t, "A cat rests on a windowsill.", resp.Result.Caption.Text)
+
+	require.Len(t, resp.Result.Labels, 1)
+	assert.Equal(t, entity.SrcOpenAI, resp.Result.Labels[0].Source)
+	assert.Equal(t, "Cat", resp.Result.Labels[0].Name)
+}
+
+func TestPerformApiRequestOpenAITextFallback(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		response := map[string]any{
+			"id":    "resp_456",
+			"model": "gpt-5-mini",
+			"output": []any{
+				map[string]any{
+					"role": "assistant",
+					"content": []any{
+						map[string]any{
+							"type": "output_text",
+							"text": "Two hikers reach the summit at sunset.",
+						},
+					},
+				},
+			},
+		}
+		assert.NoError(t, json.NewEncoder(w).Encode(response))
+	}))
+	defer server.Close()
+
+	req := &ApiRequest{
+		Id:             "fallback",
+		Model:          "gpt-5-mini",
+		Images:         []string{"data:image/jpeg;base64,AA=="},
+		ResponseFormat: ApiFormatOpenAI,
+		Options: &ApiRequestOptions{
+			Detail: openai.DefaultDetail,
+		},
+		Schema: nil,
+	}
+
+	resp, err := PerformApiRequest(req, server.URL, http.MethodPost, "")
+	require.NoError(t, err)
+	require.NotNil(t, resp.Result.Caption)
+	assert.Equal(t, "Two hikers reach the summit at sunset.", resp.Result.Caption.Text)
+	assert.Equal(t, entity.SrcOpenAI, resp.Result.Caption.Source)
+}
+
+func TestPerformApiRequestOpenAIError(t *testing.T) {
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusBadRequest)
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"error": map[string]any{
+				"message": "Invalid image payload",
+			},
+		})
+	}))
+	defer server.Close()
+
+	req := &ApiRequest{
+		Id:             "error",
+		Model:          "gpt-5-mini",
+		ResponseFormat: ApiFormatOpenAI,
+		Schema:         nil,
+		Images:         []string{"data:image/jpeg;base64,AA=="},
+	}
+
+	_, err := PerformApiRequest(req, server.URL, http.MethodPost, "")
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "Invalid image payload")
+}
--- a/internal/ai/vision/labels.go
+++ b/internal/ai/vision/labels.go
@ -96,8 +96,10 @@ func labelsInternal(images Files, mediaSrc media.Src, labelSrc entity.Src) (resu
 				apiRequest.Prompt = prompt
 			}

-			if options := model.GetOptions(); options != nil {
-				apiRequest.Options = options
+			if apiRequest.Options == nil {
+				if options := model.GetOptions(); options != nil {
+					apiRequest.Options = options
+				}
 			}

 			apiRequest.WriteLog()
--- a/internal/ai/vision/model.go
+++ b/internal/ai/vision/model.go
@ -154,9 +154,11 @@ func (m *Model) EndpointKey() (key string) {

 	if key = m.Service.EndpointKey(); key != "" {
 		return key
-	} else {
-		return ServiceKey
 	}
+
+	ensureEnv()
+
+	return strings.TrimSpace(os.ExpandEnv(ServiceKey))
 }

 // EndpointFileScheme returns the endpoint API request file scheme type. Nil
@ -348,6 +350,26 @@ func mergeOptionDefaults(target, defaults *ApiRequestOptions) {
 	if len(target.Stop) == 0 && len(defaults.Stop) > 0 {
 		target.Stop = append([]string(nil), defaults.Stop...)
 	}
+
+	if target.MaxOutputTokens <= 0 && defaults.MaxOutputTokens > 0 {
+		target.MaxOutputTokens = defaults.MaxOutputTokens
+	}
+
+	if strings.TrimSpace(target.Detail) == "" && strings.TrimSpace(defaults.Detail) != "" {
+		target.Detail = strings.TrimSpace(defaults.Detail)
+	}
+
+	if !target.ForceJson && defaults.ForceJson {
+		target.ForceJson = true
+	}
+
+	if target.SchemaVersion == "" && defaults.SchemaVersion != "" {
+		target.SchemaVersion = defaults.SchemaVersion
+	}
+
+	if target.CombineOutputs == "" && defaults.CombineOutputs != "" {
+		target.CombineOutputs = defaults.CombineOutputs
+	}
 }

 func normalizeOptions(opts *ApiRequestOptions) {
@ -422,6 +444,10 @@ func (m *Model) ApplyEngineDefaults() {
 	}

 	if info, ok := EngineInfoFor(engine); ok {
+		if m.Service.Uri == "" {
+			m.Service.Uri = info.Uri
+		}
+
 		if m.Service.RequestFormat == "" {
 			m.Service.RequestFormat = info.RequestFormat
 		}
@ -439,6 +465,10 @@ func (m *Model) ApplyEngineDefaults() {
 		}
 	}

+	if engine == openai.EngineName && strings.TrimSpace(m.Service.Key) == "" {
+		m.Service.Key = "${OPENAI_API_KEY}"
+	}
+
 	m.Engine = engine
 }

@ -490,7 +520,7 @@ func (m *Model) SchemaTemplate() string {
 			}

 			if m.schema == "" {
-				m.schema = visionschema.Labels(m.PromptContains("nsfw"))
+				m.schema = visionschema.LabelsJson(m.PromptContains("nsfw"))
 			}
 		}
 	})
--- a/internal/ai/vision/model_test.go
+++ b/internal/ai/vision/model_test.go
@ -1,13 +1,17 @@
 package vision

 import (
+	"os"
+	"path/filepath"
 	"testing"

 	"github.com/stretchr/testify/assert"

 	"github.com/photoprism/photoprism/internal/ai/tensorflow"
 	"github.com/photoprism/photoprism/internal/ai/vision/ollama"
+	"github.com/photoprism/photoprism/internal/ai/vision/openai"
 	"github.com/photoprism/photoprism/internal/entity"
+	"github.com/photoprism/photoprism/pkg/http/scheme"
 )

 func TestModelGetOptionsDefaultsOllamaLabels(t *testing.T) {
@ -108,6 +112,85 @@ func TestModelApplyEngineDefaultsSetsResolution(t *testing.T) {
 	}
 }

+func TestModelApplyEngineDefaultsSetsServiceDefaults(t *testing.T) {
+	t.Run("OpenAIEngine", func(t *testing.T) {
+		model := &Model{
+			Type:   ModelTypeCaption,
+			Engine: openai.EngineName,
+		}
+
+		model.ApplyEngineDefaults()
+
+		assert.Equal(t, "https://api.openai.com/v1/responses", model.Service.Uri)
+		assert.Equal(t, ApiFormatOpenAI, model.Service.RequestFormat)
+		assert.Equal(t, ApiFormatOpenAI, model.Service.ResponseFormat)
+		assert.Equal(t, scheme.Data, model.Service.FileScheme)
+	})
+	t.Run("PreserveExistingService", func(t *testing.T) {
+		model := &Model{
+			Type:   ModelTypeCaption,
+			Engine: openai.EngineName,
+			Service: Service{
+				Uri:           "https://custom.example",
+				FileScheme:    scheme.Base64,
+				RequestFormat: ApiFormatOpenAI,
+			},
+		}
+
+		model.ApplyEngineDefaults()
+
+		assert.Equal(t, "https://custom.example", model.Service.Uri)
+		assert.Equal(t, scheme.Base64, model.Service.FileScheme)
+	})
+}
+
+func TestModelEndpointKeyOpenAIFallbacks(t *testing.T) {
+	t.Run("EnvFile", func(t *testing.T) {
+		dir := t.TempDir()
+		path := filepath.Join(dir, "openai.key")
+		if err := os.WriteFile(path, []byte("from-file\n"), 0o600); err != nil {
+			t.Fatalf("write key file: %v", err)
+		}
+
+		t.Setenv("OPENAI_API_KEY", "")
+		t.Setenv("OPENAI_API_KEY_FILE", path)
+
+		model := &Model{Type: ModelTypeCaption, Engine: openai.EngineName}
+		model.ApplyEngineDefaults()
+
+		if got := model.EndpointKey(); got != "from-file" {
+			t.Fatalf("expected file key, got %q", got)
+		}
+	})
+	t.Run("CustomPlaceholder", func(t *testing.T) {
+		t.Setenv("OPENAI_API_KEY", "env-secret")
+
+		model := &Model{Type: ModelTypeCaption, Engine: openai.EngineName}
+		model.ApplyEngineDefaults()
+		if got := model.EndpointKey(); got != "env-secret" {
+			t.Fatalf("expected env secret, got %q", got)
+		}
+
+		model.Service.Key = "${CUSTOM_KEY}"
+		t.Setenv("CUSTOM_KEY", "custom-secret")
+		if got := model.EndpointKey(); got != "custom-secret" {
+			t.Fatalf("expected custom secret, got %q", got)
+		}
+	})
+	t.Run("GlobalFallback", func(t *testing.T) {
+		prev := ServiceKey
+		ServiceKey = "${GLOBAL_KEY}"
+		defer func() { ServiceKey = prev }()
+
+		t.Setenv("GLOBAL_KEY", "global-secret")
+
+		model := &Model{}
+		if got := model.EndpointKey(); got != "global-secret" {
+			t.Fatalf("expected global secret, got %q", got)
+		}
+	})
+}
+
 func TestModelGetSource(t *testing.T) {
 	t.Run("NilModel", func(t *testing.T) {
 		var model *Model
@ -115,21 +198,18 @@ func TestModelGetSource(t *testing.T) {
 			t.Fatalf("expected SrcAuto for nil model, got %s", src)
 		}
 	})
-
 	t.Run("EngineAlias", func(t *testing.T) {
 		model := &Model{Engine: ollama.EngineName}
 		if src := model.GetSource(); src != entity.SrcOllama {
 			t.Fatalf("expected SrcOllama, got %s", src)
 		}
 	})
-
 	t.Run("RequestFormat", func(t *testing.T) {
 		model := &Model{Service: Service{RequestFormat: ApiFormatOpenAI}}
 		if src := model.GetSource(); src != entity.SrcOpenAI {
 			t.Fatalf("expected SrcOpenAI, got %s", src)
 		}
 	})
-
 	t.Run("DefaultImage", func(t *testing.T) {
 		model := &Model{}
 		if src := model.GetSource(); src != entity.SrcImage {
--- a/internal/ai/vision/ollama/README.md
+++ b/internal/ai/vision/ollama/README.md
@ -0,0 +1,152 @@
+## PhotoPrism — Ollama Engine Integration
+
+**Last Updated:** November 14, 2025
+
+### Overview
+
+This package provides PhotoPrism’s native adapter for Ollama-compatible multimodal models. It lets Caption, Labels, and future Generate workflows call locally hosted models without changing worker logic, reusing the shared API client (`internal/ai/vision/api_client.go`) and result types (`LabelResult`, `CaptionResult`). Requests stay inside your infrastructure, rely on base64 thumbnails, and honor the same ACL, timeout, and logging hooks as the default TensorFlow engines.
+
+#### Context & Constraints
+
+- Engine defaults live in `internal/ai/vision/ollama` and are applied whenever a model sets `Engine: ollama`. Aliases map to `ApiFormatOllama`, `scheme.Base64`, and a default 720 px thumbnail.  
+- Responses may arrive as newline-delimited JSON chunks. `decodeOllamaResponse` keeps the most recent chunk, while `parseOllamaLabels` replays plain JSON strings found in `response`.  
+- Structured JSON is optional for captions but enforced for labels when `Format: json` (default for label models targeting the Ollama engine).  
+- The adapter never overwrites TensorFlow defaults. If an Ollama call fails, downstream code still has Nasnet, NSFW, and Face models available.  
+- Workers assume a single-image payload per request. Run `photoprism vision run` to validate multi-image prompts before changing that invariant.
+
+#### Goals
+
+- Let operators opt into local, private LLMs for captions and labels via `vision.yml`.  
+- Provide safe defaults (prompts, schema, sampling) so most deployments only need to specify `Name`, `Engine`, and `Service.Uri`.  
+- Surface reproducible logs, metrics, and CLI commands that make it easy to compare Ollama output against TensorFlow/OpenAI engines.
+
+#### Non-Goals
+
+- Managing Ollama itself (model downloads, GPU scheduling, or authentication). Use the Compose profiles provided in the repository.  
+- Adding new HTTP endpoints or bypassing the existing `photoprism vision` CLI.  
+- Replacing TensorFlow workers—Ollama engines are additive and opt-in.
+
+### Architecture & Request Flow
+
+1. **Model Selection** — `Config.Model(ModelType)` returns the top-most enabled entry. When `Engine: ollama`, `ApplyEngineDefaults()` fills in the request/response format, base64 file scheme, and a 720 px resolution unless overridden.  
+2. **Request Build** — `ollamaBuilder.Build` wraps thumbnails with `NewApiRequestOllama`, which encodes them as base64 strings. `Model.Model()` resolves the exact Ollama tag (`gemma3:4b`, `qwen2.5vl:7b`, etc.).  
+3. **Transport** — `PerformApiRequest` uses a single HTTP POST (default timeout 10 min). Authentication is optional; provide `Service.Key` if you proxy through an API gateway.  
+4. **Parsing** — `ollamaParser.Parse` converts payloads into `ApiResponse`. It normalizes confidences (`LabelConfidenceDefault = 0.5` when missing), copies NSFW scores, and canonicalizes label names via `normalizeLabelResult`.  
+5. **Persistence** — `entity.SrcOllama` is stamped on labels/captions so UI badges and audits reflect the new source.
+
+### Prompt, Schema, & Options Guidance
+
+- **System Prompts**  
+  - Labels: `LabelSystem` enforces single-word nouns. Set `System` to override; assign `LabelSystemSimple` when you need descriptive phrases.  
+  - Captions: no system prompt by default; rely on user prompt or set one explicitly for stylistic needs.
+- **User Prompts**  
+  - Captions use `CaptionPrompt`, which requests one sentence in active voice.  
+  - Labels default to `LabelPromptDefault`; when `DetectNSFWLabels` is true, the adapter swaps in `LabelPromptNSFW`.  
+  - For stricter noun enforcement, set `Prompt` to `LabelPromptStrict`.  
+- **Schemas**  
+  - Labels rely on `schema.LabelsJson(nsfw)` (simple JSON template). Setting `Format: json` auto-attaches a reminder (`model.SchemaInstructions()`).  
+  - Override via `Schema` (inline YAML) or `SchemaFile`. `PHOTOPRISM_VISION_LABEL_SCHEMA_FILE` always wins if present.  
+- **Options**  
+  - Labels: default `Temperature` equals `DefaultTemperature` (0.1 unless configured), `TopP=0.9`, `Stop=["\n\n"]`.  
+  - Captions: only `Temperature` is set; other parameters inherit global defaults.  
+  - Custom `Options` merge with engine defaults. Leave `ForceJson=true` for labels so PhotoPrism can reject malformed payloads early.
+
+### Supported Ollama Vision Models
+
+| Model (Ollama Tag)      | Size & Footprint                                                                                                                                    | Strengths                                                                                                                   | JSON & Language Notes                                                                                                        | When To Use                                                                                                                                                                  |
+|-------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `gemma3:4b / 12b / 27b` | 4B/12B/27B parameters, ~3.3 GB → 17 GB downloads, 128 K context                                                                                     | Multimodal text+image reasoning with SigLIP encoder, handles OCR/long documents, supports tool/function calling             | Emits structured JSON reliably; >140 languages with strong default English output                                            | High-quality captions + multilingual labels when you have ≥12 GB VRAM (4B works on 8 GB with Q4_K_M)                                                                         |
+| `qwen2.5vl:7b`          | 8.29 B params (Q4_K_M) ≈6 GB download, 125 K context                                                                                                | Excellent charts, GUI grounding, DocVQA, multi-image reasoning, agentic tool use                                            | JSON mode tuned for schema compliance; supports 20+ languages with strong Chinese/English parity                             | Label extraction for mixed-language archives or UI/diagram analysis                                                                                                          |
+| `qwen3-vl:2b / 4b / 8b` | Dense 2B/4B/8B tiers (~3 GB, ~3.5 GB, ~6 GB downloads) with native 256 K context extendable to 1 M; fits single 12–24 GB GPUs or high-end CPUs (2B) | Spatial + video reasoning upgrades (Interleaved-MRoPE, DeepStack), 32-language OCR, GUI/agent control, long-document ingest | Emits JSON reliably when prompts specify schema; multilingual captions/labels with Thinking variants boosting STEM reasoning | General-purpose captions/labels when you need long-context doc/video support without cloud APIs; 2B for CPU/edge, 4B as balanced default, 8B when accuracy outweighs latency |
+| `llama3.2-vision:11b`   | 11 B params, ~7.8 GB download, requires ≥8 GB VRAM; 90 B variant needs ≥64 GB                                                                       | Strong general reasoning, captioning, OCR, supported by Meta ecosystem tooling                                              | Vision tasks officially supported in English; text-only tasks cover eight major languages                                    | Keep captions consistent with Meta-compatible prompts or when teams already standardize on Llama 3.x                                                                         |
+| `minicpm-v:8b-2.6`      | 8 B params, ~5.5 GB download, 32 K context                                                                                                          | Optimized for edge GPUs, high OCR accuracy, multi-image/video support, low token count (≈640 tokens for 1.8 MP)             | Multilingual (EN/ZH/DE/FR/IT/KR). Emits concise JSON but may need stricter stopping sequences                                | Memory-constrained deployments that still require NSFW/OCR-aware label output                                                                                                |
+
+> Tip: pull models inside the dev container with `docker compose --profile ollama up -d` and then `docker compose exec ollama ollama pull gemma3:4b`. Keep the profile stopped when you do not need extra GPU/CPU load.
+
+> Qwen3-VL models stream their JSON payload via the `thinking` field. PhotoPrism v2025.11+ captures this automatically; if you run older builds, upgrade before enabling these models or responses will appear empty.
+
+### Configuration
+
+#### Environment Variables
+
+- `PHOTOPRISM_VISION_LABEL_SCHEMA_FILE` — Absolute path to a JSON snippet that overrides the default label schema (applies to every Ollama label model).  
+- `PHOTOPRISM_VISION_YAML` — Custom `vision.yml` path. Keep it synced in Git if you automate deployments.  
+- `OLLAMA_HOST`, `OLLAMA_MODELS`, `OLLAMA_MAX_QUEUE`, `OLLAMA_NUM_PARALLEL`, etc. — Provided in `compose*.yaml` to tune the Ollama daemon. Adjust `OLLAMA_KEEP_ALIVE` if you want models to stay loaded between worker batches.  
+- `PHOTOPRISM_LOG_LEVEL=trace` — Enables verbose request/response previews (truncated to avoid leaking images). Use temporarily when debugging parsing issues.
+
+#### `vision.yml` Example
+
+```yaml
+Models:
+  - Type: labels
+    Name: qwen2.5vl:7b
+    Engine: ollama
+    Run: newly-indexed
+    Resolution: 720
+    Format: json
+    Options:
+      Temperature: 0.05
+      Stop: ["\n\n"]
+      ForceJson: true
+    Service:
+      Uri: http://ollama:11434/api/generate
+      RequestFormat: ollama
+      ResponseFormat: ollama
+      FileScheme: base64
+
+  - Type: caption
+    Name: gemma3:4b
+    Engine: ollama
+    Disabled: false
+    Options:
+      Temperature: 0.2
+    Service:
+      Uri: http://ollama:11434/api/generate
+```
+
+Guidelines:
+
+- Place new entries after the default TensorFlow models so they take precedence while Nasnet/NSFW remain as fallbacks.  
+- Always specify the exact Ollama tag (`model:version`) so upgrades are deliberate.  
+- Keep option flags before positional arguments in CLI snippets (`photoprism vision run -m labels --count 1`).  
+- If you proxy requests (e.g., through Traefik), set `Service.Key` to `Bearer <token>` and configure the proxy to inject/validate it.
+
+### Operational Checklist
+
+- **Scheduling** — Use `Run: newly-indexed` for incremental runs, `Run: manual` for ad-hoc CLI calls, or `Run: on-schedule` when paired with the scheduler. Leave `Run: auto` if you want the worker to decide based on other model states.  
+- **Timeouts & Retries** — Default timeout is 10 minutes (`ServiceTimeout`). Ollama streaming responses complete faster in practice; if you need stricter SLAs, wrap `photoprism vision run` in a job runner and retry failed batches manually.  
+- **Fallbacks** — Keep Nasnet configured even when Ollama labels are primary. `labels.go` stops at the first successful engine, so duplicates are avoided.  
+- **Security** — When exposing Ollama beyond localhost, terminate TLS at Traefik and enable API keys. Never return full JSON payloads in logs; rely on trace mode only for debugging and sanitize before sharing.  
+- **Model Storage** — Bind-mount `./storage/services/ollama:/root/.ollama` (see Compose) so pulled models survive container restarts. Run `docker compose exec ollama ollama list` during deployments to verify availability.
+
+### Observability & Testing
+
+- **CLI Smoke Tests**  
+  - Captions: `photoprism vision run -m caption --count 5 --force`.  
+  - Labels: `photoprism vision run -m labels --count 5 --force`.  
+  - After each run, check `photoprism vision ls` for `source=ollama`.  
+- **Unit Tests**  
+  - `go test ./internal/ai/vision/ollama ./internal/ai/vision -run Ollama -count=1` covers transport parsing and model defaults.  
+  - Add fixtures under `internal/ai/vision/testdata` when capturing new response shapes; keep files small and anonymized.  
+- **Logging**  
+  - Set `PHOTOPRISM_LOG_LEVEL=debug` to watch summary lines (“processed labels/caption via ollama”).  
+  - Use `log.Trace` sparingly; it prints truncated JSON blobs for troubleshooting.  
+- **Metrics**  
+  - `/api/v1/metrics` exposes counts per label source; scrape after a batch to compare throughput with TensorFlow/OpenAI runs.
+
+### Code Map
+
+- `internal/ai/vision/ollama/*.go` — Engine defaults, schema helpers, transport structs.  
+- `internal/ai/vision/engine_ollama.go` — Builder/parser glue plus label/caption normalization.  
+- `internal/ai/vision/api_ollama.go` — Base64 payload builder.  
+- `internal/ai/vision/api_client.go` — Streaming decoder shared among engines.  
+- `internal/ai/vision/models.go` — Default caption model definition (`gemma3`).  
+- `compose*.yaml` — Ollama service profile, Traefik labels, and persistent volume wiring.  
+- `frontend/src/common/util.js` — Maps `src="ollama"` to the correct badge; keep it updated when adding new source strings.
+
+### Next Steps
+
+- [ ] Add formal schema validation (JSON Schema or JTD) so malformed label responses fail fast before normalization.  
+- [ ] Support multiple thumbnails per request once core workflows confirm the API contract (requires worker + UI changes).  
+- [ ] Emit per-model latency and success metrics from the vision worker to simplify tuning when several Ollama engines run side-by-side.  
+- [ ] Mirror any loader changes into PhotoPrism Plus/Pro templates to keep splash + browser checks consistent after enabling external engines.
--- a/internal/ai/vision/ollama/defaults.go
+++ b/internal/ai/vision/ollama/defaults.go
@ -1,7 +1,5 @@
 package ollama

-import "github.com/photoprism/photoprism/internal/ai/vision/schema"
-
 const (
 	// CaptionPrompt instructs Ollama caption models to emit a single, active-voice sentence.
 	CaptionPrompt = "Create a caption with exactly one sentence in the active voice that describes the main visual content. Begin with the main subject and clear action. Avoid text formatting, meta-language, and filler words."
@ -22,12 +20,3 @@ const (
 	// DefaultResolution is the default thumbnail size submitted to Ollama models.
 	DefaultResolution = 720
 )
-
-// LabelsSchema returns the canonical label schema string consumed by Ollama models.
-func LabelsSchema(nsfw bool) string {
-	if nsfw {
-		return schema.LabelsNSFW
-	} else {
-		return schema.LabelsDefault
-	}
-}
--- a/internal/ai/vision/ollama/schema.go
+++ b/internal/ai/vision/ollama/schema.go
@ -0,0 +1,14 @@
+package ollama
+
+import (
+	"github.com/photoprism/photoprism/internal/ai/vision/schema"
+)
+
+// SchemaLabels returns the canonical label schema string consumed by Ollama models.
+//
+// Related documentation and references:
+// - https://www.alibabacloud.com/help/en/model-studio/json-mode
+// - https://www.json.org/json-en.html
+func SchemaLabels(nsfw bool) string {
+	return schema.LabelsJson(nsfw)
+}
--- a/internal/ai/vision/ollama/transport.go
+++ b/internal/ai/vision/ollama/transport.go
@ -0,0 +1,80 @@
+package ollama
+
+import (
+	"errors"
+	"fmt"
+	"time"
+)
+
+// Response encapsulates the subset of the Ollama generate API response we care about.
+type Response struct {
+	ID                 string        `yaml:"Id,omitempty" json:"id,omitempty"`
+	Code               int           `yaml:"Code,omitempty" json:"code,omitempty"`
+	Error              string        `yaml:"Error,omitempty" json:"error,omitempty"`
+	Model              string        `yaml:"Model,omitempty" json:"model,omitempty"`
+	CreatedAt          time.Time     `yaml:"CreatedAt,omitempty" json:"created_at,omitempty"`
+	Response           string        `yaml:"Response,omitempty" json:"response,omitempty"`
+	Thinking           string        `yaml:"Thinking,omitempty" json:"thinking,omitempty"`
+	Done               bool          `yaml:"Done,omitempty" json:"done,omitempty"`
+	Context            []int         `yaml:"Context,omitempty" json:"context,omitempty"`
+	TotalDuration      int64         `yaml:"TotalDuration,omitempty" json:"total_duration,omitempty"`
+	LoadDuration       int           `yaml:"LoadDuration,omitempty" json:"load_duration,omitempty"`
+	PromptEvalCount    int           `yaml:"PromptEvalCount,omitempty" json:"prompt_eval_count,omitempty"`
+	PromptEvalDuration int           `yaml:"PromptEvalDuration,omitempty" json:"prompt_eval_duration,omitempty"`
+	EvalCount          int           `yaml:"EvalCount,omitempty" json:"eval_count,omitempty"`
+	EvalDuration       int64         `yaml:"EvalDuration,omitempty" json:"eval_duration,omitempty"`
+	Result             ResultPayload `yaml:"Result,omitempty" json:"result,omitempty"`
+}
+
+// Err returns an error if the request has failed.
+func (r *Response) Err() error {
+	if r == nil {
+		return errors.New("response is nil")
+	}
+
+	if r.Code >= 400 {
+		if r.Error != "" {
+			return errors.New(r.Error)
+		}
+
+		return fmt.Errorf("error %d", r.Code)
+	} else if len(r.Result.Labels) == 0 && r.Result.Caption == nil {
+		return errors.New("no result")
+	}
+
+	return nil
+}
+
+// HasResult checks if there is at least one result in the response data.
+func (r *Response) HasResult() bool {
+	if r == nil {
+		return false
+	}
+
+	return len(r.Result.Labels) > 0 || r.Result.Caption != nil
+}
+
+// ResultPayload mirrors the structure returned by Ollama for result data.
+type ResultPayload struct {
+	Labels  []LabelPayload  `json:"labels"`
+	Caption *CaptionPayload `json:"caption,omitempty"`
+}
+
+// LabelPayload represents a single label object emitted by the Ollama adapter.
+type LabelPayload struct {
+	Name           string   `json:"name"`
+	Source         string   `json:"source,omitempty"`
+	Priority       int      `json:"priority,omitempty"`
+	Confidence     float32  `json:"confidence,omitempty"`
+	Topicality     float32  `json:"topicality,omitempty"`
+	Categories     []string `json:"categories,omitempty"`
+	NSFW           bool     `json:"nsfw,omitempty"`
+	NSFWConfidence float32  `json:"nsfw_confidence,omitempty"`
+}
+
+// CaptionPayload represents the caption object emitted by the Ollama adapter.
+type CaptionPayload struct {
+	Text       string  `json:"text"`
+	Source     string  `json:"source,omitempty"`
+	Confidence float32 `json:"confidence,omitempty"`
+}
--- a/internal/ai/vision/ollama/transport_test.go
+++ b/internal/ai/vision/ollama/transport_test.go
@ -0,0 +1,90 @@
+package ollama
+
+import (
+	"testing"
+	"time"
+)
+
+func TestResponseErr(t *testing.T) {
+	t.Run("NilResponse", func(t *testing.T) {
+		if err := (*Response)(nil).Err(); err == nil || err.Error() != "response is nil" {
+			t.Fatalf("expected nil-response error, got %v", err)
+		}
+	})
+
+	t.Run("HTTPErrorWithMessage", func(t *testing.T) {
+		resp := &Response{Code: 429, Error: "too many requests"}
+		if err := resp.Err(); err == nil || err.Error() != "too many requests" {
+			t.Fatalf("expected message error, got %v", err)
+		}
+	})
+
+	t.Run("HTTPErrorWithoutMessage", func(t *testing.T) {
+		resp := &Response{Code: 500}
+		if err := resp.Err(); err == nil || err.Error() != "error 500" {
+			t.Fatalf("expected formatted error, got %v", err)
+		}
+	})
+
+	t.Run("NoResult", func(t *testing.T) {
+		resp := &Response{Code: 200}
+		if err := resp.Err(); err == nil || err.Error() != "no result" {
+			t.Fatalf("expected no-result error, got %v", err)
+		}
+	})
+
+	t.Run("HasLabels", func(t *testing.T) {
+		resp := &Response{
+			Code:   200,
+			Result: ResultPayload{Labels: []LabelPayload{{Name: "sky"}}},
+			Model:  "qwen",
+		}
+		if err := resp.Err(); err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+	})
+
+	t.Run("HasCaption", func(t *testing.T) {
+		resp := &Response{
+			Code:   200,
+			Result: ResultPayload{Caption: &CaptionPayload{Text: "Caption"}},
+		}
+		if err := resp.Err(); err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+	})
+}
+
+func TestResponseHasResult(t *testing.T) {
+	if (*Response)(nil).HasResult() {
+		t.Fatal("nil response should not have result")
+	}
+
+	resp := &Response{}
+	if resp.HasResult() {
+		t.Fatal("expected false when result payload is empty")
+	}
+
+	resp.Result.Labels = []LabelPayload{{Name: "sun"}}
+	if !resp.HasResult() {
+		t.Fatal("expected true when labels present")
+	}
+
+	resp.Result.Labels = nil
+	resp.Result.Caption = &CaptionPayload{Text: "Sky", Confidence: 0.9}
+	if !resp.HasResult() {
+		t.Fatal("expected true when caption present")
+	}
+}
+
+func TestResponseJSONTagsAreOptional(t *testing.T) {
+	// Guard against accidental breaking changes to essential fields
+	resp := Response{
+		ID:        "test",
+		Model:     "ollama",
+		CreatedAt: time.Now(),
+	}
+	if resp.ID == "" || resp.Model == "" {
+		t.Fatalf("response fields should persist, got %+v", resp)
+	}
+}
--- a/internal/ai/vision/openai/README.md
+++ b/internal/ai/vision/openai/README.md
@ -0,0 +1,128 @@
+## PhotoPrism — OpenAI API Integration
+
+**Last Updated:** November 14, 2025
+
+### Overview
+
+This package contains PhotoPrism’s adapter for the OpenAI Responses API. It enables existing caption and label workflows (`GenerateCaption`, `GenerateLabels`, and the `photoprism vision run` CLI) to call OpenAI models alongside TensorFlow and Ollama without changing worker or API code. The implementation focuses on predictable results, structured outputs, and clear observability so operators can opt in gradually.
+
+#### Context & Constraints
+
+- OpenAI requests flow through the existing vision client (`internal/ai/vision/api_client.go`) and must honour PhotoPrism’s timeout, logging, and ACL rules.
+- Structured outputs are preferred but the adapter must gracefully handle free-form text; `output_text` responses are parsed both as JSON and as plain captions.
+- Costs should remain predictable: requests are limited to a single 720 px thumbnail (`detail=low`) and capped token budgets (512 caption, 1024 labels).
+- Secrets are supplied per model (`Service.Key`) with fallbacks to `OPENAI_API_KEY` / `_FILE`. Logs must redact sensitive data.
+
+#### Goals
+
+- Provide drop-in OpenAI support for captions and labels using `vision.yml`.
+- Keep configuration ergonomic by auto-populating prompts, schema names, token limits, and sampling defaults.
+- Expose enough logging and tests so operators can compare OpenAI output with existing engines before enabling it broadly.
+
+#### Non-Goals
+
+- Introducing a new `generate` model type or combined caption/label endpoint (reserved for a later phase).
+- Replacing the default TensorFlow models; they remain active as fallbacks.
+- Managing OpenAI billing or quota dashboards beyond surfacing token counts in logs and metrics.
+
+### Prompt, Model, & Schema Guidance
+
+- **Models:** The adapter targets GPT‑5 vision tiers (e.g. `gpt-5-nano`, `gpt-5-mini`). These models support image inputs, structured outputs, and deterministic settings. Set `Name` to the exact provider identifier so defaults are applied correctly. Caption models share the same configuration surface and run through the same adapter.
+- **Prompts:** Defaults live in `defaults.go`. Captions use a single-sentence instruction; labels use `LabelPromptDefault` (or `LabelPromptNSFW` when PhotoPrism requests NSFW metadata). Custom prompts should retain schema reminders so structured outputs stay valid.
+- **Schemas:** Labels use the JSON schema returned by `schema.LabelsJsonSchema(nsfw)`; the response format name is derived via `schema.JsonSchemaName` (e.g. `photoprism_vision_labels_v1`). Captions omit schemas unless operators explicitly request a structured format.
+- **When to keep defaults:** For most deployments, leaving `System`, `Prompt`, `Schema`, and `Options` unset yields stable output with minimal configuration. Override them only when domain-specific language or custom scoring is necessary, and add regression tests alongside.
+
+Budget-conscious operators can experiment with lighter prompts or lower-resolution thumbnails, but should keep token limits and determinism settings intact to avoid unexpected bills and UI churn.
+
+#### Performance & Cost Estimates
+
+- **Token budgets:** Captions request up to 512 output tokens; labels request up to 1024. Input tokens are typically ≤700 for a single 720 px thumbnail plus prompts.
+- **Latency:** GPT‑5 nano/mini vision calls typically complete in 3–8 s, depending on OpenAI region. Including reasoning metadata (`reasoning.effort=low`) has negligible impact but improves traceability.
+- **Costs:** Consult OpenAI’s pricing for the selected model. Multiply input/output tokens by the published rate. PhotoPrism currently sends one image per request to keep costs linear with photo count.
+
+### Configuration
+
+#### Environment Variables
+
+- `OPENAI_API_KEY` / `OPENAI_API_KEY_FILE` — fallback credentials when a model’s `Service.Key` is unset.
+- Existing `PHOTOPRISM_VISION_*` variables remain authoritative (see the [Getting Started Guide](https://docs.photoprism.app/getting-started/config-options/#computer-vision) for full lists).
+
+#### `vision.yml` Examples
+
+```yaml
+Models:
+  - Type: caption
+    Name: gpt-5-nano
+    Engine: openai
+    Disabled: false    # opt in manually
+    Resolution: 720    # optional; default is 720
+    Options:
+      Detail: low      # optional; defaults to low
+      MaxOutputTokens: 512
+    Service:
+      Uri: https://api.openai.com/v1/responses
+      FileScheme: data
+      Key: ${OPENAI_API_KEY}
+
+  - Type: labels
+    Name: gpt-5-mini
+    Engine: openai
+    Disabled: false
+    Resolution: 720
+    Options:
+      Detail: low
+      MaxOutputTokens: 1024
+      ForceJson: true  # redundant but explicit
+    Service:
+      Uri: https://api.openai.com/v1/responses
+      FileScheme: data
+      Key: ${OPENAI_API_KEY}
+```
+
+Keep TensorFlow entries in place so PhotoPrism falls back when the external service is unavailable.
+
+#### Defaults
+
+- File scheme: `data:` URLs (base64) for all OpenAI models.
+- Resolution: 720 px thumbnails (`vision.Thumb(ModelTypeCaption|Labels)`).
+- Options: `MaxOutputTokens` raised to 512 (caption) / 1024 (labels); `ForceJson=false` for captions, `true` for labels; `reasoning.effort="low"`.
+- Sampling: `Temperature` and `TopP` set to `0` for `gpt-5*` models; inherited values (0.1/0.9) remain for other engines. `openaiBuilder.Build` performs this override while preserving the struct defaults for non-OpenAI adapters.
+- Schema naming: Automatically derived via `schema.JsonSchemaName`, so operators may omit `SchemaVersion`.
+
+### Documentation
+
+- Label Generation: <https://docs.photoprism.app/developer-guide/vision/label-generation/>
+- Caption Generation: <https://docs.photoprism.app/developer-guide/vision/caption-generation/>
+- Vision CLI Commands: <https://docs.photoprism.app/developer-guide/vision/cli/>
+
+### Implementation Details
+
+#### Core Concepts
+
+- **Structured outputs:** PhotoPrism leverages OpenAI’s structured output capability as documented at <https://platform.openai.com/docs/guides/structured-outputs>. When a JSON schema is supplied, the adapter emits `text.format` with `type: "json_schema"` and a schema name derived from the content. The parser then prefers `output_json`, but also attempts to decode `output_text` payloads that contain JSON objects.
+- **Deterministic sampling:** GPT‑5 models are run with `temperature=0` and `top_p=0` to minimise variance, while still allowing developers to override values in `vision.yml` if needed.
+- **Reasoning metadata:** Requests include `reasoning.effort="low"` so OpenAI returns structured reasoning usage counters, helping operators track token consumption.
+- **Worker summaries:** The vision worker now logs either “updated …” or “processed … (no metadata changes detected)”, making reruns easy to audit.
+
+#### Rate Limiting
+
+OpenAI calls respect the existing `limiter.Auth` configuration used by the vision service. Failed requests surface standard HTTP errors and are not automatically retried; operators should ensure they have adequate account limits and consider external rate limiting when sharing credentials.
+
+#### Testing & Validation
+
+1. Unit tests: `go test ./internal/ai/vision/openai ./internal/ai/vision -run OpenAI -count=1`. Fixtures under `internal/ai/vision/openai/testdata/` replay real Responses payloads (captions and labels).
+2. CLI smoke test: `photoprism vision run -m labels --count 1 --force` with trace logging enabled to inspect sanitised Responses.
+3. Compare worker summaries and label sources (`openai`) in the UI or via `photoprism vision ls`.
+
+#### Code Map
+
+- **Adapter & defaults:** `internal/ai/vision/openai` (defaults, schema helpers, transport, tests).
+- **Request/response plumbing:** `internal/ai/vision/api_request.go`, `api_client.go`, `engine_openai.go`, `engine_openai_test.go`.
+- **Workers & CLI:** `internal/workers/vision.go`, `internal/commands/vision_run.go`.
+- **Shared utilities:** `internal/ai/vision/schema`, `pkg/clean`, `pkg/media`.
+
+#### Next Steps
+
+- [ ] Introduce the future `generate` model type that combines captions, labels, and optional markers.
+- [ ] Evaluate additional OpenAI models as pricing and capabilities evolve.
+- [ ] Expose token usage metrics (input/output/reasoning) via Prometheus once the schema stabilises.
--- a/internal/ai/vision/openai/defaults.go
+++ b/internal/ai/vision/openai/defaults.go
@ -1,6 +1,29 @@
 package openai

-import "github.com/photoprism/photoprism/internal/ai/vision/schema"
+const (
+	// CaptionSystem defines the default system prompt for caption models.
+	CaptionSystem = "You are a PhotoPrism vision model. Return concise, user-friendly captions that describe the main subjects accurately."
+	// CaptionPrompt instructs caption models to respond with a single sentence.
+	CaptionPrompt = "Provide exactly one sentence describing the key subject and action in the image. Avoid filler words and technical jargon."
+	// LabelSystem defines the system prompt for label generation.
+	LabelSystem = "You are a PhotoPrism vision model. Emit JSON that matches the provided schema and keep label names short, singular nouns."
+	// LabelPromptDefault requests general-purpose labels.
+	LabelPromptDefault = "Analyze the image and return label objects with name, confidence (0-1), and topicality (0-1)."
+	// LabelPromptNSFW requests labels including NSFW metadata when required.
+	LabelPromptNSFW = "Analyze the image and return label objects with name, confidence (0-1), topicality (0-1), nsfw (true when sensitive), and nsfw_confidence (0-1)."
+	// DefaultDetail specifies the preferred thumbnail detail level for Requests API calls.
+	DefaultDetail = "low"
+	// CaptionMaxTokens suggests the output budget for caption responses.
+	CaptionMaxTokens = 512
+	// LabelsMaxTokens suggests the output budget for label responses.
+	LabelsMaxTokens = 1024
+	// DefaultTemperature configures deterministic replies.
+	DefaultTemperature = 0.1
+	// DefaultTopP limits nucleus sampling.
+	DefaultTopP = 0.9
+	// DefaultSchemaVersion is used when callers do not specify an explicit schema version.
+	DefaultSchemaVersion = "v1"
+)

 var (
 	// DefaultModel is the model used by default when accessing the OpenAI API.
@ -8,8 +31,3 @@ var (
 	// DefaultResolution is the default thumbnail size submitted to the OpenAI.
 	DefaultResolution = 720
 )
-
-// LabelsSchema returns the canonical label schema string consumed by OpenAI models.
-func LabelsSchema() string {
-	return schema.LabelsDefault
-}
--- a/internal/ai/vision/openai/schema.go
+++ b/internal/ai/vision/openai/schema.go
@ -0,0 +1,16 @@
+package openai
+
+import (
+	"encoding/json"
+
+	"github.com/photoprism/photoprism/internal/ai/vision/schema"
+)
+
+// SchemaLabels returns the canonical labels JSON Schema string consumed by Ollama models.
+//
+// Related documentation and references:
+// - https://platform.openai.com/docs/guides/structured-outputs
+// - https://json-schema.org/learn/miscellaneous-examples
+func SchemaLabels(nsfw bool) json.RawMessage {
+	return schema.LabelsJsonSchema(nsfw)
+}
--- a/internal/ai/vision/openai/testdata/caption-response.json
+++ b/internal/ai/vision/openai/testdata/caption-response.json
@ -0,0 +1,73 @@
+{
+  "id": "resp_0d356718505119f3006916e5d8730881a0b91de2aa700f6196",
+  "object": "response",
+  "created_at": 1763108312,
+  "status": "completed",
+  "background": false,
+  "billing": {
+    "payer": "developer"
+  },
+  "error": null,
+  "incomplete_details": null,
+  "instructions": null,
+  "max_output_tokens": 512,
+  "max_tool_calls": null,
+  "model": "gpt-5-nano-2025-08-07",
+  "output": [
+    {
+      "id": "rs_0d356718505119f3006916e5d8efd481a0a4f9cc1823cc6c83",
+      "type": "reasoning",
+      "summary": []
+    },
+    {
+      "id": "msg_0d356718505119f3006916e5d9433881a0bc79197d2cfc2027",
+      "type": "message",
+      "status": "completed",
+      "content": [
+        {
+          "type": "output_text",
+          "annotations": [],
+          "logprobs": [],
+          "text": "A bee gathers nectar from the vibrant red poppy\u2019s center."
+        }
+      ],
+      "role": "assistant"
+    }
+  ],
+  "parallel_tool_calls": true,
+  "previous_response_id": null,
+  "prompt_cache_key": null,
+  "prompt_cache_retention": null,
+  "reasoning": {
+    "effort": "low",
+    "summary": null
+  },
+  "safety_identifier": null,
+  "service_tier": "default",
+  "store": true,
+  "temperature": 1.0,
+  "text": {
+    "format": {
+      "type": "text"
+    },
+    "verbosity": "medium"
+  },
+  "tool_choice": "auto",
+  "tools": [],
+  "top_logprobs": 0,
+  "top_p": 1.0,
+  "truncation": "disabled",
+  "usage": {
+    "input_tokens": 576,
+    "input_tokens_details": {
+      "cached_tokens": 0
+    },
+    "output_tokens": 19,
+    "output_tokens_details": {
+      "reasoning_tokens": 0
+    },
+    "total_tokens": 595
+  },
+  "user": null,
+  "metadata": {}
+}
--- a/internal/ai/vision/openai/testdata/labels-response.json
+++ b/internal/ai/vision/openai/testdata/labels-response.json
@ -0,0 +1,114 @@
+{
+  "id": "resp_0fa91dfb69b7d644006916ea0b72ac819f84ff3152a38dfcdb",
+  "object": "response",
+  "created_at": 1763109387,
+  "status": "completed",
+  "background": false,
+  "billing": {
+    "payer": "developer"
+  },
+  "error": null,
+  "incomplete_details": null,
+  "instructions": null,
+  "max_output_tokens": 1024,
+  "max_tool_calls": null,
+  "model": "gpt-5-mini-2025-08-07",
+  "output": [
+    {
+      "id": "rs_0fa91dfb69b7d644006916ea0c3450819f8a13396bf377f474",
+      "type": "reasoning",
+      "summary": []
+    },
+    {
+      "id": "msg_0fa91dfb69b7d644006916ea0d2dfc819faf52b11334fc10a4",
+      "type": "message",
+      "status": "completed",
+      "content": [
+        {
+          "type": "output_text",
+          "annotations": [],
+          "logprobs": [],
+          "text": "{\"labels\":[{\"name\":\"flower\",\"confidence\":0.99,\"topicality\":0.99},{\"name\":\"bee\",\"confidence\":0.95,\"topicality\":0.95},{\"name\":\"petal\",\"confidence\":0.92,\"topicality\":0.88},{\"name\":\"pollen\",\"confidence\":0.85,\"topicality\":0.8},{\"name\":\"insect\",\"confidence\":0.9,\"topicality\":0.85},{\"name\":\"red\",\"confidence\":0.88,\"topicality\":0.6},{\"name\":\"close-up\",\"confidence\":0.86,\"topicality\":0.7},{\"name\":\"nature\",\"confidence\":0.8,\"topicality\":0.5}]}"
+        }
+      ],
+      "role": "assistant"
+    }
+  ],
+  "parallel_tool_calls": true,
+  "previous_response_id": null,
+  "prompt_cache_key": null,
+  "prompt_cache_retention": null,
+  "reasoning": {
+    "effort": "low",
+    "summary": null
+  },
+  "safety_identifier": null,
+  "service_tier": "default",
+  "store": true,
+  "temperature": 1.0,
+  "text": {
+    "format": {
+      "type": "json_schema",
+      "description": null,
+      "name": "photoprism_vision_labels_v1",
+      "schema": {
+        "type": "object",
+        "properties": {
+          "labels": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "name": {
+                  "type": "string",
+                  "minLength": 1
+                },
+                "confidence": {
+                  "type": "number",
+                  "minimum": 0,
+                  "maximum": 1
+                },
+                "topicality": {
+                  "type": "number",
+                  "minimum": 0,
+                  "maximum": 1
+                }
+              },
+              "required": [
+                "name",
+                "confidence",
+                "topicality"
+              ],
+              "additionalProperties": false
+            },
+            "default": []
+          }
+        },
+        "required": [
+          "labels"
+        ],
+        "additionalProperties": false
+      },
+      "strict": true
+    },
+    "verbosity": "medium"
+  },
+  "tool_choice": "auto",
+  "tools": [],
+  "top_logprobs": 0,
+  "top_p": 1.0,
+  "truncation": "disabled",
+  "usage": {
+    "input_tokens": 724,
+    "input_tokens_details": {
+      "cached_tokens": 0
+    },
+    "output_tokens": 169,
+    "output_tokens_details": {
+      "reasoning_tokens": 0
+    },
+    "total_tokens": 893
+  },
+  "user": null,
+  "metadata": {}
+}
--- a/internal/ai/vision/openai/transport.go
+++ b/internal/ai/vision/openai/transport.go
@ -0,0 +1,142 @@
+package openai
+
+import (
+	"encoding/json"
+	"strings"
+)
+
+const (
+	// ContentTypeText identifies text input segments for the Responses API.
+	ContentTypeText = "input_text"
+	// ContentTypeImage identifies image input segments for the Responses API.
+	ContentTypeImage = "input_image"
+
+	// ResponseFormatJSONSchema requests JSON constrained by a schema.
+	ResponseFormatJSONSchema = "json_schema"
+	// ResponseFormatJSONObject requests a free-form JSON object.
+	ResponseFormatJSONObject = "json_object"
+)
+
+// HTTPRequest represents the payload expected by OpenAI's Responses API.
+type HTTPRequest struct {
+	Model            string         `json:"model"`
+	Input            []InputMessage `json:"input"`
+	Text             *TextOptions   `json:"text,omitempty"`
+	Reasoning        *Reasoning     `json:"reasoning,omitempty"`
+	MaxOutputTokens  int            `json:"max_output_tokens,omitempty"`
+	Temperature      float64        `json:"temperature,omitempty"`
+	TopP             float64        `json:"top_p,omitempty"`
+	PresencePenalty  float64        `json:"presence_penalty,omitempty"`
+	FrequencyPenalty float64        `json:"frequency_penalty,omitempty"`
+}
+
+// TextOptions carries formatting preferences for textual responses.
+type TextOptions struct {
+	Format *ResponseFormat `json:"format,omitempty"`
+}
+
+// Reasoning configures the effort level for reasoning models.
+type Reasoning struct {
+	Effort string `json:"effort,omitempty"`
+}
+
+// InputMessage captures a single system or user message in the request.
+type InputMessage struct {
+	Role    string        `json:"role"`
+	Type    string        `json:"type,omitempty"`
+	Content []ContentItem `json:"content"`
+}
+
+// ContentItem represents a text or image entry within a message.
+type ContentItem struct {
+	Type     string `json:"type"`
+	Text     string `json:"text,omitempty"`
+	ImageURL string `json:"image_url,omitempty"`
+	Detail   string `json:"detail,omitempty"`
+}
+
+// ResponseFormat describes how OpenAI should format its response.
+type ResponseFormat struct {
+	Type        string          `json:"type"`
+	Name        string          `json:"name,omitempty"`
+	Schema      json.RawMessage `json:"schema,omitempty"`
+	Description string          `json:"description,omitempty"`
+	Strict      bool            `json:"strict,omitempty"`
+}
+
+// Response mirrors the subset of the Responses API response we need.
+type Response struct {
+	ID     string           `json:"id"`
+	Model  string           `json:"model"`
+	Output []ResponseOutput `json:"output"`
+	Error  *struct {
+		Message string `json:"message"`
+		Type    string `json:"type"`
+	} `json:"error,omitempty"`
+}
+
+// ResponseOutput captures assistant messages within the response.
+type ResponseOutput struct {
+	Role    string            `json:"role"`
+	Content []ResponseContent `json:"content"`
+}
+
+// ResponseContent contains individual message parts (JSON or text).
+type ResponseContent struct {
+	Type string          `json:"type"`
+	Text string          `json:"text,omitempty"`
+	JSON json.RawMessage `json:"json,omitempty"`
+}
+
+// FirstJSON returns the first JSON payload contained in the response.
+func (r *Response) FirstJSON() json.RawMessage {
+	if r == nil {
+		return nil
+	}
+
+	for i := range r.Output {
+		for j := range r.Output[i].Content {
+			if len(r.Output[i].Content[j].JSON) > 0 {
+				return r.Output[i].Content[j].JSON
+			}
+		}
+	}
+
+	return nil
+}
+
+// FirstText returns the first textual payload contained in the response.
+func (r *Response) FirstText() string {
+	if r == nil {
+		return ""
+	}
+
+	for i := range r.Output {
+		for j := range r.Output[i].Content {
+			if text := strings.TrimSpace(r.Output[i].Content[j].Text); text != "" {
+				return text
+			}
+		}
+	}
+
+	return ""
+}
+
+// ParseErrorMessage extracts a human readable error message from a Responses API payload.
+func ParseErrorMessage(raw []byte) string {
+	var errResp struct {
+		Error *struct {
+			Message string `json:"message"`
+		} `json:"error"`
+	}
+
+	if err := json.Unmarshal(raw, &errResp); err != nil {
+		return ""
+	}
+
+	if errResp.Error != nil {
+		return strings.TrimSpace(errResp.Error.Message)
+	}
+
+	return ""
+}
--- a/internal/ai/vision/openai/transport_test.go
+++ b/internal/ai/vision/openai/transport_test.go
@ -0,0 +1,120 @@
+package openai
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func loadTestResponse(t *testing.T, name string) *Response {
+	t.Helper()
+
+	filePath := filepath.Join("testdata", name)
+
+	data, err := os.ReadFile(filePath)
+	if err != nil {
+		t.Fatalf("failed to read %s: %v", filePath, err)
+	}
+
+	var resp Response
+	if err := json.Unmarshal(data, &resp); err != nil {
+		t.Fatalf("failed to unmarshal %s: %v", filePath, err)
+	}
+
+	return &resp
+}
+
+func TestParseErrorMessage(t *testing.T) {
+	t.Run("returns message when present", func(t *testing.T) {
+		raw := []byte(`{"error":{"message":"Invalid schema"}}`)
+		msg := ParseErrorMessage(raw)
+		if msg != "Invalid schema" {
+			t.Fatalf("expected message, got %q", msg)
+		}
+	})
+
+	t.Run("returns empty string when error is missing", func(t *testing.T) {
+		raw := []byte(`{"output":[]}`)
+		if msg := ParseErrorMessage(raw); msg != "" {
+			t.Fatalf("expected empty message, got %q", msg)
+		}
+	})
+}
+
+func TestResponseFirstTextCaption(t *testing.T) {
+	resp := loadTestResponse(t, "caption-response.json")
+
+	if jsonPayload := resp.FirstJSON(); len(jsonPayload) != 0 {
+		t.Fatalf("expected no JSON payload, got: %s", jsonPayload)
+	}
+
+	text := resp.FirstText()
+	expected := "A bee gathers nectar from the vibrant red poppy’s center."
+	if text != expected {
+		t.Fatalf("unexpected caption text: %q", text)
+	}
+}
+
+func TestResponseFirstTextLabels(t *testing.T) {
+	resp := loadTestResponse(t, "labels-response.json")
+
+	if jsonPayload := resp.FirstJSON(); len(jsonPayload) != 0 {
+		t.Fatalf("expected no JSON payload, got: %s", jsonPayload)
+	}
+
+	text := resp.FirstText()
+	if len(text) == 0 {
+		t.Fatal("expected structured JSON string in text payload")
+	}
+	if text[0] != '{' {
+		t.Fatalf("expected JSON object in text payload, got %q", text)
+	}
+}
+
+func TestResponseFirstJSONFromStructuredPayload(t *testing.T) {
+	resp := &Response{
+		ID:    "resp_structured",
+		Model: "gpt-5-mini",
+		Output: []ResponseOutput{
+			{
+				Role: "assistant",
+				Content: []ResponseContent{
+					{
+						Type: "output_json",
+						JSON: json.RawMessage(`{"labels":[{"name":"sunset"}]}`),
+					},
+				},
+			},
+		},
+	}
+
+	jsonPayload := resp.FirstJSON()
+	if len(jsonPayload) == 0 {
+		t.Fatal("expected JSON payload, got empty result")
+	}
+
+	var decoded struct {
+		Labels []map[string]string `json:"labels"`
+	}
+	if err := json.Unmarshal(jsonPayload, &decoded); err != nil {
+		t.Fatalf("failed to decode JSON payload: %v", err)
+	}
+
+	if len(decoded.Labels) != 1 || decoded.Labels[0]["name"] != "sunset" {
+		t.Fatalf("unexpected JSON payload: %+v", decoded.Labels)
+	}
+}
+
+func TestSchemaLabelsReturnsValidJSON(t *testing.T) {
+	raw := SchemaLabels(false)
+
+	var decoded map[string]any
+	if err := json.Unmarshal(raw, &decoded); err != nil {
+		t.Fatalf("schema should be valid JSON: %v", err)
+	}
+
+	if decoded["type"] != "object" {
+		t.Fatalf("expected type object, got %v", decoded["type"])
+	}
+}
--- a/internal/ai/vision/schema/README.md
+++ b/internal/ai/vision/schema/README.md
@ -0,0 +1,52 @@
+## PhotoPrism — Vision Schema Reference
+
+**Last Updated:** November 14, 2025
+
+### Overview
+
+This package contains the canonical label response specifications used by PhotoPrism’s external vision engines. It exposes two helpers:
+
+- `LabelsJsonSchema(nsfw bool)` — returns a JSON **Schema** document tailored for OpenAI Responses requests, enabling strict validation of structured outputs.
+- `LabelsJson(nsfw bool)` — returns a literal JSON **sample** that Ollama-style models can mirror when they only support prompt-enforced structures.
+
+Both helpers build on the same field set (`name`, `confidence`, `topicality`, and optional NSFW flags) so downstream parsing logic (`LabelResult`) can remain engine-agnostic.
+
+### Schema Types & Differences
+
+| Helper                    | Target Engine            | Format                                                 | Validation Style                                                                    | When To Use                                                                                                     |
+|---------------------------|--------------------------|--------------------------------------------------------|-------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|
+| `LabelsJsonSchema(false)` | OpenAI (standard labels) | JSON Schema Draft                                      | Strong: OpenAI enforces field types/ranges server-side before returning a response. | When calling GPT‑vision models via `ApiFormatOpenAI` to ensure PhotoPrism receives well-formed label arrays.    |
+| `LabelsJsonSchema(true)`  | OpenAI (labels + NSFW)   | JSON Schema Draft with additional boolean/float fields | Strong: same enforcement plus required NSFW fields.                                 | When `DetectNSFWLabels` or NSFW-specific prompts are active and the model must emit `nsfw` + `nsfw_confidence`. |
+| `LabelsJson(false)`       | Ollama (standard labels) | Plain JSON example                                     | Soft: model is nudged to mimic the structure through prompt instructions.           | When running self-hosted Ollama models that support “JSON mode” but do not consume JSON Schema definitions.     |
+| `LabelsJson(true)`        | Ollama (labels + NSFW)   | Plain JSON example with NSFW keys                      | Soft: prompts describe the required keys; the adapter validates after parsing.      | When Ollama prompts mention NSFW scoring or PhotoPrism sets `DetectNSFWLabels=true`.                            |
+
+**Key technical distinction:** OpenAI’s Responses API accepts a JSON Schema (see `LabelsJsonSchema*`) and guarantees compliance by rejecting invalid responses, while Ollama currently relies on prompt-directed output. For Ollama integrations we provide a representative JSON document (`LabelsJson*`) that models can imitate; PhotoPrism then normalizes and validates the results in Go.
+
+### Field Definitions
+
+- `name` — single-word noun describing the subject (string, required).
+- `confidence` — normalized score between `0` and `1` (float, required).
+- `topicality` — relative relevance score between `0` and `1` (float, required; defaults to `confidence` if omitted after parsing).
+- `nsfw` — boolean flag indicating sensitive content (required only in NSFW variants).
+- `nsfw_confidence` — normalized probability for the NSFW assessment (required only in NSFW variants).
+
+OpenAI schemas enforce these ranges/types, while Ollama prompts remind the model to emit matching keys. After parsing, PhotoPrism applies `LabelConfidenceDefault` and `normalizeLabelResult` to fill gaps and enforce naming rules.
+
+### Usage Guidance
+
+1. **OpenAI models** (`Engine: openai`, `RequestFormat: openai`):
+   - Leave `Schema` unset in `vision.yml`; the engine defaults call `LabelsJsonSchema(model.PromptContains("nsfw"))`.
+   - Optionally override the schema via `Schema`/`SchemaFile` if you extend fields, but keep required keys so `LabelResult` parsing succeeds.
+2. **Ollama models** (`Engine: ollama`, `RequestFormat: ollama`):
+   - Rely on the built-in samples from `LabelsJson` or include them directly in prompts via `model.SchemaInstructions()`.
+   - Because enforcement happens after the response arrives, keep `Format: json` (default) and `Options.ForceJson=true` for label models to make parsing stricter.
+3. **Custom engines**:
+   - Reuse these helpers to stay compatible with PhotoPrism’s label DTOs.
+   - When adding new fields, update both schema/sample versions so OpenAI and Ollama adapters remain aligned.
+
+### References
+
+- JSON Schema primer: https://json-schema.org/learn/miscellaneous-examples  
+- OpenAI structured outputs: https://platform.openai.com/docs/guides/structured-outputs  
+- JSON mode background (Ollama-style prompts): https://www.alibabacloud.com/help/en/model-studio/json-mode  
+- JSON syntax refresher: https://www.json.org/json-en.html
--- a/internal/ai/vision/schema/labels.go
+++ b/internal/ai/vision/schema/labels.go
@ -1,16 +1,115 @@
 package schema

-// LabelsDefault provides the minimal JSON schema for label responses used across engines.
-const (
-	LabelsDefault = "{\n  \"labels\": [{\n    \"name\": \"\",\n    \"confidence\": 0,\n    \"topicality\": 0 }]\n}"
-	LabelsNSFW    = "{\n  \"labels\": [{\n    \"name\": \"\",\n    \"confidence\": 0,\n    \"topicality\": 0,\n    \"nsfw\": false,\n    \"nsfw_confidence\": 0\n  }]\n}"
+import (
+	"encoding/json"
 )

-// Labels returns the canonical label schema string.
-func Labels(nsfw bool) string {
+// LabelsJsonSchemaDefault provides the minimal JSON schema for label responses used across engines.
+const (
+	LabelsJsonSchemaDefault = `{
+  "type": "object",
+  "properties": {
+    "labels": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string",
+            "minLength": 1
+          },
+          "confidence": {
+            "type": "number",
+            "minimum": 0,
+            "maximum": 1
+          },
+          "topicality": {
+            "type": "number",
+            "minimum": 0,
+            "maximum": 1
+          }
+        },
+        "required": ["name", "confidence", "topicality"],
+        "additionalProperties": false
+      },
+      "default": []
+    }
+  },
+  "required": ["labels"],
+  "additionalProperties": false
+}`
+	LabelsJsonDefault    = "{\n  \"labels\": [{\n    \"name\": \"\",\n    \"confidence\": 0,\n    \"topicality\": 0 }]\n}"
+	LabelsJsonSchemaNSFW = `{
+  "type": "object",
+  "properties": {
+    "labels": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string",
+            "minLength": 1
+          },
+          "confidence": {
+            "type": "number",
+            "minimum": 0,
+            "maximum": 1
+          },
+          "topicality": {
+            "type": "number",
+            "minimum": 0,
+            "maximum": 1
+          },
+          "nsfw": {
+            "type": "boolean"
+          },
+          "nsfw_confidence": {
+            "type": "number",
+            "minimum": 0,
+            "maximum": 1
+          }
+        },
+        "required": [
+          "name",
+          "confidence",
+          "topicality",
+          "nsfw",
+          "nsfw_confidence"
+        ],
+        "additionalProperties": false
+      },
+      "default": []
+    }
+  },
+  "required": ["labels"],
+  "additionalProperties": false
+}`
+	LabelsJsonNSFW = "{\n  \"labels\": [{\n    \"name\": \"\",\n    \"confidence\": 0,\n    \"topicality\": 0,\n    \"nsfw\": false,\n    \"nsfw_confidence\": 0\n  }]\n}"
+)
+
+// LabelsJsonSchema returns the canonical label JSON Schema string for OpenAI API endpoints.
+//
+// Related documentation and references:
+// - https://platform.openai.com/docs/guides/structured-outputs
+// - https://json-schema.org/learn/miscellaneous-examples
+func LabelsJsonSchema(nsfw bool) json.RawMessage {
 	if nsfw {
-		return LabelsNSFW
+		return json.RawMessage(LabelsJsonSchemaNSFW)
 	} else {
-		return LabelsDefault
+		return json.RawMessage(LabelsJsonSchemaDefault)
+	}
+}
+
+// LabelsJson returns the canonical label JSON string for Ollama vision models.
+//
+// Related documentation and references:
+// - https://www.alibabacloud.com/help/en/model-studio/json-mode
+// - https://www.json.org/json-en.html
+func LabelsJson(nsfw bool) string {
+	if nsfw {
+		return LabelsJsonNSFW
+	} else {
+		return LabelsJsonDefault
 	}
 }
--- a/internal/ai/vision/schema/name.go
+++ b/internal/ai/vision/schema/name.go
@ -0,0 +1,36 @@
+package schema
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+
+	"github.com/photoprism/photoprism/pkg/clean"
+)
+
+const (
+	NamePrefix = "photoprism_vision"
+)
+
+// JsonSchemaName returns the schema version string to be used for API requests.
+func JsonSchemaName(schema json.RawMessage, version string) string {
+	var schemaName string
+
+	switch {
+	case bytes.Contains(schema, []byte("labels")):
+		schemaName = "labels"
+	case bytes.Contains(schema, []byte("labels")):
+		schemaName = "caption"
+	default:
+		schemaName = "schema"
+	}
+
+	version = clean.TypeLowerUnderscore(version)
+
+	if version == "" {
+		version = "v1"
+	}
+
+	return fmt.Sprintf("%s_%s_%s", NamePrefix, schemaName, version)
+
+}
--- a/internal/ai/vision/schema/name_test.go
+++ b/internal/ai/vision/schema/name_test.go
@ -0,0 +1,23 @@
+package schema
+
+import (
+	"encoding/json"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestJsonSchemaName(t *testing.T) {
+	t.Run("Default", func(t *testing.T) {
+		assert.Equal(t, "photoprism_vision_schema_v1", JsonSchemaName(nil, ""))
+	})
+	t.Run("Labels", func(t *testing.T) {
+		assert.Equal(t, "photoprism_vision_labels_v1", JsonSchemaName(json.RawMessage(LabelsJsonSchemaDefault), ""))
+	})
+	t.Run("LabelsV1", func(t *testing.T) {
+		assert.Equal(t, "photoprism_vision_labels_v2", JsonSchemaName([]byte("labels"), "v2"))
+	})
+	t.Run("LabelsJsonSchema", func(t *testing.T) {
+		assert.Equal(t, "photoprism_vision_labels_v1", JsonSchemaName(LabelsJsonSchema(false), "v1"))
+	})
+}
--- a/internal/ai/vision/schema/schema.go
+++ b/internal/ai/vision/schema/schema.go
@ -1,5 +1,5 @@
 /*
-Package schema defines canonical JSON schema templates shared by PhotoPrism's AI vision engines.
+Package schema defines canonical JSON and JSON Schema templates shared by PhotoPrism's AI vision engines.

 Copyright (c) 2018 - 2025 PhotoPrism UG. All rights reserved.

--- a/internal/ai/vision/service.go
+++ b/internal/ai/vision/service.go
@ -1,6 +1,9 @@
 package vision

 import (
+	"os"
+	"strings"
+
 	"github.com/photoprism/photoprism/pkg/http/scheme"
 )

@ -36,7 +39,9 @@ func (m *Service) EndpointKey() string {
 		return ""
 	}

-	return m.Key
+	ensureEnv()
+
+	return strings.TrimSpace(os.ExpandEnv(m.Key))
 }

 // EndpointFileScheme returns the endpoint API file scheme type.
--- a/internal/ai/vision/thresholds_test.go
+++ b/internal/ai/vision/thresholds_test.go
@ -9,14 +9,12 @@ func TestThresholds_GetConfidence(t *testing.T) {
 			t.Fatalf("expected 0, got %d", got)
 		}
 	})
-
 	t.Run("AboveMax", func(t *testing.T) {
 		th := Thresholds{Confidence: 150}
 		if got := th.GetConfidence(); got != 1 {
 			t.Fatalf("expected 1, got %d", got)
 		}
 	})
-
 	t.Run("Float", func(t *testing.T) {
 		th := Thresholds{Confidence: 25}
 		if got := th.GetConfidenceFloat32(); got != 0.25 {
@ -32,14 +30,12 @@ func TestThresholds_GetTopicality(t *testing.T) {
 			t.Fatalf("expected 0, got %d", got)
 		}
 	})
-
 	t.Run("AboveMax", func(t *testing.T) {
 		th := Thresholds{Topicality: 300}
 		if got := th.GetTopicality(); got != 1 {
 			t.Fatalf("expected 1, got %d", got)
 		}
 	})
-
 	t.Run("Float", func(t *testing.T) {
 		th := Thresholds{Topicality: 45}
 		if got := th.GetTopicalityFloat32(); got != 0.45 {
@ -55,14 +51,12 @@ func TestThresholds_GetNSFW(t *testing.T) {
 			t.Fatalf("expected default %d, got %d", DefaultThresholds.NSFW, got)
 		}
 	})
-
 	t.Run("AboveMax", func(t *testing.T) {
 		th := Thresholds{NSFW: 200}
 		if got := th.GetNSFW(); got != 1 {
 			t.Fatalf("expected 1, got %d", got)
 		}
 	})
-
 	t.Run("Float", func(t *testing.T) {
 		th := Thresholds{NSFW: 80}
 		if got := th.GetNSFWFloat32(); got != 0.8 {
--- a/internal/ai/vision/vision.go
+++ b/internal/ai/vision/vision.go
@ -25,7 +25,34 @@ Additional information can be found in our Developer Guide:
 package vision

 import (
+	"os"
+	"strings"
+	"sync"
+
 	"github.com/photoprism/photoprism/internal/event"
+	"github.com/photoprism/photoprism/pkg/clean"
+	"github.com/photoprism/photoprism/pkg/fs"
 )

 var log = event.Log
+
+var ensureEnvOnce sync.Once
+
+// ensureEnv loads environment-backed credentials once so adapters can look up
+// OPENAI_API_KEY even when operators rely on OPENAI_API_KEY_FILE. Future engine
+// integrations can reuse this hook to normalise additional secrets.
+func ensureEnv() {
+	ensureEnvOnce.Do(func() {
+		if os.Getenv("OPENAI_API_KEY") != "" {
+			return
+		}
+
+		if path := strings.TrimSpace(os.Getenv("OPENAI_API_KEY_FILE")); fs.FileExistsNotEmpty(path) {
+			if data, err := os.ReadFile(path); err == nil {
+				if key := clean.Auth(string(data)); key != "" {
+					_ = os.Setenv("OPENAI_API_KEY", key)
+				}
+			}
+		}
+	})
+}
--- a/internal/api/oidc_redirect.go
+++ b/internal/api/oidc_redirect.go
@ -339,7 +339,14 @@ func OIDCRedirect(router *gin.RouterGroup) {
 		sess.SetAuthID(user.AuthID, provider.Issuer())
 		sess.SetUser(user)
 		sess.SetGrantType(authn.GrantAuthorizationCode)
-		sess.IdToken = tokens.IDToken
+
+		// Ensure that the ID token fits into the existing
+		// database column; otherwise, truncate it.
+		if n := len(tokens.IDToken); n > 2048 {
+			sess.IdToken = tokens.IDToken[:2048]
+		} else {
+			sess.IdToken = tokens.IDToken
+		}

 		// Set session expiration and timeout.
 		sess.SetExpiresIn(unix.Day)
--- a/internal/api/swagger.json
+++ b/internal/api/swagger.json
@ -4542,6 +4542,12 @@
                "prompt": {
                    "type": "string"
                },
+                "schema": {
+                    "items": {
+                        "type": "integer"
+                    },
+                    "type": "array"
+                },
                "stream": {
                    "type": "boolean"
                },
@ -4562,6 +4568,15 @@
        },
        "vision.ApiRequestOptions": {
            "properties": {
+                "combine_outputs": {
+                    "type": "string"
+                },
+                "detail": {
+                    "type": "string"
+                },
+                "force_json": {
+                    "type": "boolean"
+                },
                "frequency_penalty": {
                    "type": "number"
                },
@ -4571,6 +4586,9 @@
                "main_gpu": {
                    "type": "integer"
                },
+                "max_output_tokens": {
+                    "type": "integer"
+                },
                "min_p": {
                    "type": "number"
                },
@ -4616,6 +4634,9 @@
                "repeat_penalty": {
                    "type": "number"
                },
+                "schema_version": {
+                    "type": "string"
+                },
                "seed": {
                    "type": "integer"
                },
--- a/internal/config/README.md
+++ b/internal/config/README.md
@ -0,0 +1,27 @@
+# Config Package Guide
+
+## Overview
+
+PhotoPrism’s runtime configuration is managed by this package. Fields are defined in [`options.go`](options.go) and then initialized with values from command-line flags, environment variables, and optional YAML files (`storage/config/*.yml`).
+
+## Sources and Precedence
+
+PhotoPrism loads configuration in the following order:
+
+1. **Built-in defaults** defined in this package.
+2. **`defaults.yml`** — optional system defaults (typically `/etc/photoprism/defaults.yml`). See [Global Config Defaults](https://docs.photoprism.app/getting-started/config-files/defaults/) if you package PhotoPrism for other environments and need to override the compiled defaults.
+3. **Environment variables** prefixed with `PHOTOPRISM_…` and specified in [`flags.go`](flags.go) along with the CLI flags. This is the primary override mechanism in container environments.
+4. **`options.yml`** — user-level configuration stored under `storage/config/options.yml` (or another directory controlled by `PHOTOPRISM_CONFIG_PATH`). Values here override both defaults and environment variables, see [Config Files](https://docs.photoprism.app/getting-started/config-files/).
+5. **CLI flags** (for example `photoprism --cache-path=/tmp/cache`). Flags always win when a conflict exists.
+
+The `PHOTOPRISM_CONFIG_PATH` variable controls where PhotoPrism looks for YAML files (defaults to `storage/config`).
+
+> Any change to configuration (flags, env vars, YAML files) requires a restart. The Go process reads options during startup and does not watch for changes.
+
+## CLI Reference
+
+- `photoprism help` (or `photoprism --help`) lists all subcommands and global flags.
+- `photoprism show config` renders every active option along with its current value. Pass `--json`, `--md`, `--tsv`, or `--csv` to change the output format.
+- `photoprism show config-options` prints the description and default value for each option. Use this when updating [`flags.go`](flags.go).
+- `photoprism show config-yaml` displays the configuration keys and their expected types in the [same structure that the YAML files use](https://docs.photoprism.app/getting-started/config-files/). It is a read-only helper meant to guide you when editing files under `storage/config`.
+- Additional `show` subcommands document search filters, metadata tags, and supported thumbnail sizes; see [`internal/commands/show.go`](../commands/show.go) for the complete list.
--- a/internal/config/feat/vision.go
+++ b/internal/config/feat/vision.go
@ -4,5 +4,5 @@ package feat
 var (
 	VisionModelGenerate = false // controls exposure of the generate endpoint and CLI commands
 	VisionModelMarkers  = false // gates marker generation/return until downstream UI and reconciliation paths are ready
-	VisionServiceOpenAI = false // controls whether users are able to configure OpenAI as a vision service engine
+	VisionServiceOpenAI = true  // controls whether users are able to configure OpenAI as a vision service engine
 )
--- a/internal/workers/vision.go
+++ b/internal/workers/vision.go
@ -135,6 +135,7 @@ func (w *Vision) Start(filter string, count int, models []string, customSrc stri
 	done := make(map[string]bool)
 	offset := 0
 	updated := 0
+	processed := 0

 	// Make sure count is within
 	if count < 1 || count > search.MaxResults {
@ -197,6 +198,8 @@ func (w *Vision) Start(filter string, count int, models []string, customSrc stri
 			continue
 		}

+		processed++
+
 		fileName := photoprism.FileName(photo.FileRoot, photo.FileName)
 		file, fileErr := photoprism.NewMediaFile(fileName)

@ -279,7 +282,18 @@ func (w *Vision) Start(filter string, count int, models []string, customSrc stri
 		}
 	}

-	log.Infof("vision: updated %s [%s]", english.Plural(updated, "picture", "pictures"), time.Since(start))
+	elapsed := time.Since(start)
+
+	switch {
+	case processed == 0:
+		log.Infof("vision: no pictures required processing [%s]", elapsed)
+	case updated == processed:
+		log.Infof("vision: updated %s [%s]", english.Plural(updated, "picture", "pictures"), elapsed)
+	case updated == 0:
+		log.Infof("vision: processed %s (no metadata changes detected) [%s]", english.Plural(processed, "picture", "pictures"), elapsed)
+	default:
+		log.Infof("vision: updated %s out of %s [%s]", english.Plural(updated, "picture", "pictures"), english.Plural(processed, "picture", "pictures"), elapsed)
+	}

 	if updated > 0 {
 		updateIndex = true
--- a/pkg/clean/ascii_test.go
+++ b/pkg/clean/ascii_test.go
@ -26,13 +26,13 @@ func TestASCII(t *testing.T) {
 }

 func BenchmarkASCII(b *testing.B) {
-	for n := 0; n < b.N; n++ {
+	for b.Loop() {
 		ASCII("https://docs.photoprism.app/getting-started 👍/config-options/#file-converters")
 	}
 }

 func BenchmarkASCIIEmpty(b *testing.B) {
-	for n := 0; n < b.N; n++ {
+	for b.Loop() {
 		ASCII("")
 	}
 }
--- a/pkg/clean/auth.go
+++ b/pkg/clean/auth.go
@ -13,7 +13,7 @@ var DomainRegexp = regexp.MustCompile("^(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\

 // Auth returns the sanitized authentication identifier trimmed to a maximum length of 255 characters.
 func Auth(s string) string {
-	if s == "" || len(s) > 2048 {
+	if s == "" || len(s) > 510 {
 		return ""
 	}

--- a/pkg/clean/auth_test.go
+++ b/pkg/clean/auth_test.go
@ -43,6 +43,12 @@ func TestAuth(t *testing.T) {
 	t.Run("TeLessThanSGreaterThanT", func(t *testing.T) {
 		assert.Equal(t, "Test", Auth("Te<s>t"))
 	})
+	t.Run("ApiKey", func(t *testing.T) {
+		assert.Equal(t,
+			"ab-prot-keech1aqu8quamiNaecuisuem1ahg7dieph8eitohzo7hoo7pe-Chohzu4eaA-Chohzu4ea-soh7Seic8eig9joojaeshe4Ahsu8zeibooCh9ooquaaleev3poLeev0su9jei2yeich3ahsi9quar1oqueic",
+			Auth("ab-prot-keech1aqu8quamiNaecuisuem1ahg7dieph8eitohzo7hoo7pe-Chohzu4eaA-Chohzu4ea-soh7Seic8eig9joojaeshe4Ahsu8zeibooCh9ooquaaleev3poLeev0su9jei2yeich3ahsi9quar1oqueic"),
+		)
+	})
 }

 func TestHandle(t *testing.T) {
--- a/pkg/clean/header_test.go
+++ b/pkg/clean/header_test.go
@ -27,13 +27,13 @@ func TestHeader(t *testing.T) {
 }

 func BenchmarkHeader(b *testing.B) {
-	for n := 0; n < b.N; n++ {
+	for b.Loop() {
 		Header("https://..docs.photoprism.app/gettin\\g-started/config-options/\tfile-converters")
 	}
 }

 func BenchmarkHeaderEmpty(b *testing.B) {
-	for n := 0; n < b.N; n++ {
+	for b.Loop() {
 		Header("")
 	}
 }
--- a/pkg/clean/search_test.go
+++ b/pkg/clean/search_test.go
@ -48,7 +48,7 @@ func TestSearchQuery(t *testing.T) {
 func BenchmarkSearchQuery_Complex(b *testing.B) {
 	s := "Jens AND Mander and me Or Kitty WITH flowers IN the park AT noon | img% json OR BILL!\n"
 	b.ReportAllocs()
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		_ = SearchQuery(s)
 	}
 }
@ -56,7 +56,7 @@ func BenchmarkSearchQuery_Complex(b *testing.B) {
 func BenchmarkSearchQuery_Short(b *testing.B) {
 	s := "cat and dog"
 	b.ReportAllocs()
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		_ = SearchQuery(s)
 	}
 }
@ -65,7 +65,7 @@ func BenchmarkSearchQuery_LongNoOps(b *testing.B) {
 	// No tokens to replace, primarily tests normalization + trim.
 	s := strings.Repeat("alpha beta gamma ", 50)
 	b.ReportAllocs()
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		_ = SearchQuery(s)
 	}
 }
--- a/pkg/clean/uri_test.go
+++ b/pkg/clean/uri_test.go
@ -26,13 +26,13 @@ func TestUri(t *testing.T) {
 }

 func BenchmarkUri(b *testing.B) {
-	for n := 0; n < b.N; n++ {
+	for b.Loop() {
 		Uri("https://docs.photoprism.app/getting-started/config-options/#file-converters")
 	}
 }

 func BenchmarkUriEmpty(b *testing.B) {
-	for n := 0; n < b.N; n++ {
+	for b.Loop() {
 		Uri("")
 	}
 }
--- a/pkg/fs/fastwalk/fastwalk_test.go
+++ b/pkg/fs/fastwalk/fastwalk_test.go
@ -233,7 +233,7 @@ var benchDir = flag.String("benchdir", runtime.GOROOT(), "The directory to scan

 func BenchmarkFastWalk(b *testing.B) {
 	b.ReportAllocs()
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		err := fastwalk.Walk(*benchDir, func(path string, typ os.FileMode) error { return nil })
 		if err != nil {
 			b.Fatal(err)
--- a/pkg/http/header/cache_test.go
+++ b/pkg/http/header/cache_test.go
@ -27,13 +27,13 @@ func TestCacheControlMaxAge(t *testing.T) {
 }

 func BenchmarkTestCacheControlMaxAge(b *testing.B) {
-	for n := 0; n < b.N; n++ {
+	for b.Loop() {
 		_ = CacheControlMaxAge(DurationYear, false)
 	}
 }

 func BenchmarkTestCacheControlMaxAgeImmutable(b *testing.B) {
-	for n := 0; n < b.N; n++ {
+	for b.Loop() {
 		_ = CacheControlMaxAge(DurationYear, false) + ", " + CacheControlImmutable
 	}
 }
--- a/pkg/list/bench_test.go
+++ b/pkg/list/bench_test.go
@ -33,7 +33,7 @@ func BenchmarkContainsAny_LargeOverlap(b *testing.B) {
 		bList[i] = a[i*4]
 	}
 	b.ReportAllocs()
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		if !ContainsAny(a, bList) {
 			b.Fatalf("expected overlap")
 		}
@ -44,7 +44,7 @@ func BenchmarkContainsAny_Disjoint(b *testing.B) {
 	a := makeStrings("a", 5000)
 	bList := makeStrings("b", 5000)
 	b.ReportAllocs()
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		if ContainsAny(a, bList) {
 			b.Fatalf("expected disjoint")
 		}
@ -56,7 +56,7 @@ func BenchmarkJoin_Large(b *testing.B) {
 	j := append(makeStrings("y", 5000), a[:1000]...) // 1000 duplicates
 	j = shuffleEveryK(j, 7)
 	b.ReportAllocs()
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		out := Join(a, j)
 		if len(out) != 10000 {
 			b.Fatalf("unexpected length: %d", len(out))
--- a/pkg/rnd/auth_test.go
+++ b/pkg/rnd/auth_test.go
@ -166,7 +166,7 @@ func TestIsJoinToken(t *testing.T) {
 }

 func BenchmarkJoinToken(b *testing.B) {
-	for n := 0; n < b.N; n++ {
+	for b.Loop() {
 		JoinToken()
 	}
 }
--- a/pkg/txt/clip_test.go
+++ b/pkg/txt/clip_test.go
@ -29,7 +29,7 @@ func TestClip(t *testing.T) {
 func BenchmarkClipRunesASCII(b *testing.B) {
 	s := strings.Repeat("abc def ghi ", 20) // ASCII
 	b.ReportAllocs()
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		_ = Clip(s, 50)
 	}
 }
@ -37,7 +37,7 @@ func BenchmarkClipRunesASCII(b *testing.B) {
 func BenchmarkClipRunesUTF8(b *testing.B) {
 	s := strings.Repeat("Grüße 世", 20) // non-ASCII runes
 	b.ReportAllocs()
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		_ = Clip(s, 50)
 	}
 }
--- a/pkg/txt/contains_test.go
+++ b/pkg/txt/contains_test.go
@ -115,7 +115,7 @@ func TestContainsAlnumLower(t *testing.T) {
 func BenchmarkContainsNumber(b *testing.B) {
 	s := "The quick brown fox jumps over 13 lazy dogs"
 	b.ReportAllocs()
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		_ = ContainsNumber(s)
 	}
 }
@ -123,7 +123,7 @@ func BenchmarkContainsNumber(b *testing.B) {
 func BenchmarkSortCaseInsensitive(b *testing.B) {
 	words := []string{"Zebra", "apple", "Banana", "cherry", "Apricot", "banana", "zebra", "Cherry"}
 	b.ReportAllocs()
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		w := append([]string(nil), words...)
 		SortCaseInsensitive(w)
 	}
--- a/pkg/txt/words_bench_test.go
+++ b/pkg/txt/words_bench_test.go
@ -46,7 +46,7 @@ func makeLargeText(distinct, repeats int) string {
 func BenchmarkWords_Large(b *testing.B) {
 	s := makeLargeText(200, 200) // ~40k tokens mixed
 	b.ReportAllocs()
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		_ = Words(s)
 	}
 }
@ -54,7 +54,7 @@ func BenchmarkWords_Large(b *testing.B) {
 func BenchmarkUniqueKeywords_Large(b *testing.B) {
 	s := makeLargeText(200, 200)
 	b.ReportAllocs()
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		_ = UniqueKeywords(s)
 	}
 }
@ -62,7 +62,7 @@ func BenchmarkUniqueKeywords_Large(b *testing.B) {
 func BenchmarkUniqueKeywords_ManyDup(b *testing.B) {
 	s := makeLargeText(20, 2000) // many repeats, few distinct
 	b.ReportAllocs()
-	for i := 0; i < b.N; i++ {
+	for b.Loop() {
 		_ = UniqueKeywords(s)
 	}
 }
--- a/pkg/vector/values_test.go
+++ b/pkg/vector/values_test.go