diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..ff42f7d --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,12 @@ +{ + "name": "Pinchflat Dev", + "dockerComposeFile": "../docker-compose.yml", + "service": "phx", + "workspaceFolder": "/app", + "shutdownAction": "stopCompose", + "customizations": { + "vscode": { + "extensions": ["phoenixframework.phoenix", "JakeBecker.elixir-ls", "esbenp.prettier-vscode"] + } + } +} diff --git a/.formatter.exs b/.formatter.exs index 6fc82a9..92e5016 100644 --- a/.formatter.exs +++ b/.formatter.exs @@ -1,3 +1,5 @@ +# TODO: figure out why my vscode extension doesn't respect the formatter.exs file +# if it's in a subdirectory [ import_deps: [:ecto, :ecto_sql, :phoenix], subdirectories: ["priv/*/migrations"], diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..64bd3e6 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,34 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '[Triage] ' +labels: triage +assignees: kieraneglin +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: + +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Diagnostic info** + + + +**Additional context** + + + +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..1db7dfe --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,19 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '[FR] ' +labels: feature request +assignees: kieraneglin +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. It's too complicated to [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/other.md b/.github/ISSUE_TEMPLATE/other.md new file mode 100644 index 0000000..2f7c4d8 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/other.md @@ -0,0 +1,7 @@ +--- +name: Other +about: For everything else +title: '' +labels: '' +assignees: kieraneglin +--- diff --git a/.github/workflows/docker_release.yml b/.github/workflows/docker_release.yml index 27c4b17..2e35dbc 100644 --- a/.github/workflows/docker_release.yml +++ b/.github/workflows/docker_release.yml @@ -11,6 +11,12 @@ on: options: - 'linux/amd64' - 'linux/amd64,linux/arm64' + docker_tags: + type: string + description: 'Docker Tags' + required: true + default: 'dev' + push: branches: - master @@ -40,10 +46,9 @@ jobs: ghcr.io/${{ github.repository }} # All non-release actions will be tagged as `dev` (ie: push, workflow_dispatch) tags: | - type=semver,pattern={{version}},prefix=v - type=semver,pattern={{major}}.{{minor}},prefix=v - type=semver,pattern={{major}},prefix=v - type=raw,value=dev,enable=${{ github.event_name != 'release' }} + type=ref,event=tag + type=raw,value=dev,enable=${{ github.event_name != 'release' && github.event_name != 'workflow_dispatch' }} + type=raw,value=${{ inputs.docker_tags }},enable=${{ github.event_name == 'workflow_dispatch' }} flavor: | latest=auto @@ -67,10 +72,10 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Build and Push - uses: docker/build-push-action@v5 + uses: docker/build-push-action@v6 with: context: . - file: ./selfhosted.Dockerfile + file: ./docker/selfhosted.Dockerfile # If the event is a release, use the release_arch, otherwise use the # platforms input if present, falling back to dev_arch platforms: ${{ github.event_name == 'release' && env.release_arch || (github.event.inputs.platforms || env.dev_arch) }} diff --git a/.github/workflows/lint_and_test.yml b/.github/workflows/lint_and_test.yml index 71e4ac8..c174559 100644 --- a/.github/workflows/lint_and_test.yml +++ b/.github/workflows/lint_and_test.yml @@ -16,24 +16,28 @@ jobs: if: "! contains(toJSON(github.event.commits.*.message), '[skip ci]')" env: COMPOSE_FILE: ./docker-compose.ci.yml + MIX_ENV: test steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Pull prebuilt images run: docker compose pull - - name: Setup Docker layer caching - uses: jpribyl/action-docker-layer-caching@v0.1.1 - continue-on-error: true - with: - key: ci-docker-cache-{hash} - restore-keys: | - ci-docker-cache- - layer-ci-docker-cache- + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 - - name: Build and Run Docker image + - name: Build docker image + uses: docker/build-push-action@v6 + with: + context: . + file: ./docker/dev.Dockerfile + load: true + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Run Docker image run: docker compose up --detach # NOTE: All exec commands use the -T flag to compensate for @@ -43,8 +47,7 @@ jobs: # See https://github.com/actions/runner/issues/241 and https://github.com/docker/compose/issues/8537 - name: Install Elixir and JS deps run: | - docker compose exec -T phx yarn install && cd assets && yarn install && cd .. - docker compose exec -T phx mix deps.get + docker compose exec -T phx mix deps.get && yarn install && cd assets && yarn install && cd .. - name: Create and Migrate database run: | diff --git a/.iex.exs b/.iex.exs index ebe1a24..8533f83 100644 --- a/.iex.exs +++ b/.iex.exs @@ -23,3 +23,11 @@ alias Pinchflat.Metadata.MetadataFileHelpers alias Pinchflat.SlowIndexing.FileFollowerServer Pinchflat.Release.check_file_permissions() + +defmodule IexHelpers do + def restart do + :init.restart() + end +end + +import IexHelpers diff --git a/.prettierignore b/.prettierignore index b6c9159..bdc7a23 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,3 +1 @@ assets/vendor/ -deps/ -_build/ diff --git a/.sobelow-conf b/.sobelow-conf index 0c4088d..bf1db34 100644 --- a/.sobelow-conf +++ b/.sobelow-conf @@ -16,7 +16,8 @@ "Config.HTTPS", "Config.CSP", "XSS.ContentType", - "Traversal.SendFile" + "Traversal.SendFile", + "Traversal.SendDownload" ], ignore_files: [], version: false diff --git a/README.md b/README.md index be086d3..d99b196 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ +> [!IMPORTANT] +> (2025-02-14) [zakkarry](https://github.com/sponsors/zakkarry), who is a collaborator on [cross-seed](https://github.com/cross-seed/cross-seed) and an extremely helpful community member in general, is facing hard times due to medical debt and family illness. If you're able, please consider [sponsoring him on GitHub](https://github.com/sponsors/zakkarry) or donating via [buymeacoffee](https://tip.ary.dev). Tell him I sent you! +

+
+ +[![](https://img.shields.io/github/license/kieraneglin/pinchflat?style=for-the-badge&color=ee512b)](LICENSE) +[![](https://img.shields.io/github/v/release/kieraneglin/pinchflat?style=for-the-badge&color=purple)](https://github.com/kieraneglin/pinchflat/releases) +[![](https://img.shields.io/static/v1?style=for-the-badge&logo=discord&message=Chat&color=5865F2&label=Discord)](https://discord.gg/j7T6dCuwU4) +[![](https://img.shields.io/github/actions/workflow/status/kieraneglin/pinchflat/lint_and_test.yml?style=for-the-badge)](#) +[![](https://img.shields.io/static/v1?label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode&style=for-the-badge)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/kieraneglin/pinchflat) + +
+ # Your next YouTube media manager ## Table of contents: @@ -21,7 +34,10 @@ - [Screenshots](#screenshots) - [Installation](#installation) - [Unraid](#unraid) + - [Portainer](#portainer) - [Docker](#docker) + - [Environment Variables](#environment-variables) + - [A note on reverse proxies](#reverse-proxies) - [Username and Password (authentication)](https://github.com/kieraneglin/pinchflat/wiki/Username-and-Password) - [Frequently asked questions](https://github.com/kieraneglin/pinchflat/wiki/Frequently-Asked-Questions) - [Documentation](https://github.com/kieraneglin/pinchflat/wiki) @@ -31,9 +47,9 @@ ## What it does -Pinchflat is a self-hosted app for downloading YouTube content built using [yt-dlp](https://github.com/yt-dlp/yt-dlp). It's designed to be lightweight, self-contained, and easy to use. You set up rules for how to download content from YouTube channels or playlists and it'll do the rest, checking periodically for new content. It's perfect for people who want to download content for use in with a media center app (Plex, Jellyfin, Kodi) or for those who want to archive media! +Pinchflat is a self-hosted app for downloading YouTube content built using [yt-dlp](https://github.com/yt-dlp/yt-dlp). It's designed to be lightweight, self-contained, and easy to use. You set up rules for how to download content from YouTube channels or playlists and it'll do the rest, periodically checking for new content. It's perfect for people who want to download content for use in with a media center app (Plex, Jellyfin, Kodi) or for those who want to archive media! -It's _not_ great for downloading one-off videos - it's built to download large amounts of content and keep it up to date. It's also not meant for consuming content in-app - Pinchflat downloads content to disk where you can then watch it with a media center app or VLC. +While you can [download individual videos](https://github.com/kieraneglin/pinchflat/wiki/Frequently-Asked-Questions#how-do-i-download-one-off-videos), Pinchflat is best suited for downloading content from channels or playlists. It's also not meant for consuming content in-app - Pinchflat downloads content to disk where you can then watch it with a media center app or VLC. If it doesn't work for your use case, please make a feature request! You can also check out these great alternatives: [Tube Archivist](https://github.com/tubearchivist/tubearchivist), [ytdl-sub](https://github.com/jmbannon/ytdl-sub), and [TubeSync](https://github.com/meeb/tubesync) @@ -42,22 +58,27 @@ If it doesn't work for your use case, please make a feature request! You can als - Self-contained - just one Docker container with no external dependencies - Powerful naming system so content is stored where and how you want it - Easy-to-use web interface with presets to get you started right away -- First-class support for media center apps like Plex, Jellyfin, and Kodi -- Supports serving RSS feeds to your favourite podcast app (beta - [docs]()) +- First-class support for media center apps like Plex, Jellyfin, and Kodi ([docs](https://github.com/kieraneglin/pinchflat/wiki/Frequently-Asked-Questions#how-do-i-get-media-into-plexjellyfinkodi)) +- Supports serving RSS feeds to your favourite podcast app ([docs](https://github.com/kieraneglin/pinchflat/wiki/Podcast-RSS-Feeds)) - Automatically downloads new content from channels and playlists - Uses a novel approach to download new content more quickly than other apps - Supports downloading audio content - Custom rules for handling YouTube Shorts and livestreams +- Apprise support for notifications +- Allows automatically redownloading new media after a set period + - This can help improve the download quality of new content or improve SponsorBlock tags - Optionally automatically delete old content ([docs](https://github.com/kieraneglin/pinchflat/wiki/Automatically-Delete-Media)) -- Advanced options like setting cutoff dates and filtering by title +- Advanced options like setting cutoff dates and filtering by title ([docs](https://github.com/kieraneglin/pinchflat/wiki/Frequently-Asked-Questions#i-only-want-certain-videos-from-a-source---how-can-i-only-download-those)) - Reliable hands-off operation - Can pass cookies to YouTube to download your private playlists ([docs](https://github.com/kieraneglin/pinchflat/wiki/YouTube-Cookies)) - Sponsorblock integration +- \[Advanced\] allows custom `yt-dlp` options ([docs](https://github.com/kieraneglin/pinchflat/wiki/%5BAdvanced%5D-Custom-yt%E2%80%90dlp-options)) +- \[Advanced\] supports running custom scripts when after downloading/deleting media (alpha - [docs](https://github.com/kieraneglin/pinchflat/wiki/%5BAdvanced%5D-Custom-lifecycle-scripts)) ## Screenshots -Pinchflat screenshot -Pinchflat screenshot +Pinchflat screenshot +Pinchflat screenshot ## Installation @@ -67,7 +88,8 @@ Simply search for Pinchflat in the Community Apps store! ### Portainer -Important: See the note below about storing config on a network file share. It's preferred to store the config on a local disk if at all possible. +> [!IMPORTANT] +> See the note below about storing config on a network file share. It's preferred to store the config on a local disk if at all possible. Docker Compose file: @@ -75,7 +97,10 @@ Docker Compose file: version: '3' services: pinchflat: - image: keglin/pinchflat:latest + image: ghcr.io/kieraneglin/pinchflat:latest + environment: + # Set the timezone to your local timezone + - TZ=America/New_York ports: - '8945:8945' volumes: @@ -89,41 +114,90 @@ services: 2. Prepare the docker image in one of the two ways below: - **From GHCR:** `docker pull ghcr.io/kieraneglin/pinchflat:latest` - NOTE: also available on Docker Hub at `keglin/pinchflat:latest` - - **Building locally:** `docker build . --file selfhosted.Dockerfile -t ghcr.io/kieraneglin/pinchflat:latest` + - **Building locally:** `docker build . --file docker/selfhosted.Dockerfile -t ghcr.io/kieraneglin/pinchflat:latest` 3. Run the container: ```bash # Be sure to replace /host/path/to/config and /host/path/to/downloads below with # the paths to the directories you created in step 1 +# Be sure to replace America/New_York with your local timezone docker run \ + -e TZ=America/New_York \ -p 8945:8945 \ -v /host/path/to/config:/config \ -v /host/path/to/downloads:/downloads \ ghcr.io/kieraneglin/pinchflat:latest ``` +### Podman + +The Podman setup is similar to Docker but changes a few flags to run under a User Namespace instead of root. To run Pinchflat under Podman and use the current user's UID/GID for file access run this: + +``` +podman run \ + --security-opt label=disable \ + --userns=keep-id --user=$UID \ + -e TZ=America/Los_Angeles \ + -p 8945:8945 \ + -v /host/path/to/config:/config:rw \ + -v /host/path/to/downloads/:/downloads:rw \ + ghcr.io/kieraneglin/pinchflat:latest +``` + +Using this setup consider creating a new `pinchflat` user and giving that user ownership to the config and download directory. See [Podman --userns](https://docs.podman.io/en/v4.6.1/markdown/options/userns.container.html) docs. + ### IMPORTANT: File permissions You _must_ ensure the host directories you've mounted are writable by the user running the Docker container. If you get a permission error follow the steps it suggests. See [#106](https://github.com/kieraneglin/pinchflat/issues/106) for more. -It's recommended to not run the container as root. Doing so can create permission issues if other apps need to work with the downloaded media. If you need to run any command as root, you can run `su` from the container's shell as there is no password set for the root user. +> [!IMPORTANT] +> It's not recommended to run the container as root. Doing so can create permission issues if other apps need to work with the downloaded media. -### Advanced: storing Pinchflat config directory on a network share - -README: This is currently in the testing phase and not a recommended option (yet). The implications of changing this setting isn't clear and this could, conceivably, result in data loss. Only change this setting if you know what you're doing, why this is important, and are okay with possible data loss or DB corruption. This may become the default in the future once it's been tested more thoroughly. +### ADVANCED: Storing Pinchflat config directory on a network share As pointed out in [#137](https://github.com/kieraneglin/pinchflat/issues/137), SQLite doesn't like being run in WAL mode on network shares. If you're running Pinchflat on a network share, you can disable WAL mode by setting the `JOURNAL_MODE` environment variable to `delete`. This will make Pinchflat run in rollback journal mode which is less performant but should work on network shares. +> [!CAUTION] +> Changing this setting from WAL to `delete` on an existing Pinchflat instance could, conceivably, result in data loss. Only change this setting if you know what you're doing, why this is important, and are okay with possible data loss or DB corruption. Backup your database first! + If you change this setting and it works well for you, please leave a comment on [#137](https://github.com/kieraneglin/pinchflat/issues/137)! Doubly so if it does _not_ work well. +### Environment variables + +| Name | Required? | Default | Notes | +| --------------------------- | --------- | ------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | +| `TZ` | No | `UTC` | Must follow IANA TZ format | +| `LOG_LEVEL` | No | `debug` | Can be set to `info` but `debug` is strongly recommended | +| `UMASK` | No | `022` | Unraid users may want to set this to `000` | +| `BASIC_AUTH_USERNAME` | No | | See [authentication docs](https://github.com/kieraneglin/pinchflat/wiki/Username-and-Password) | +| `BASIC_AUTH_PASSWORD` | No | | See [authentication docs](https://github.com/kieraneglin/pinchflat/wiki/Username-and-Password) | +| `EXPOSE_FEED_ENDPOINTS` | No | `false` | See [RSS feed docs](https://github.com/kieraneglin/pinchflat/wiki/Podcast-RSS-Feeds) | +| `ENABLE_IPV6` | No | `false` | Setting to _any_ non-blank value will enable IPv6 | +| `JOURNAL_MODE` | No | `wal` | Set to `delete` if your config directory is stored on a network share (not recommended) | +| `TZ_DATA_DIR` | No | `/etc/elixir_tzdata_data` | The container path where the timezone database is stored | +| `BASE_ROUTE_PATH` | No | `/` | The base path for route generation. Useful when running behind certain reverse proxies - prefixes must be stripped. | +| `YT_DLP_WORKER_CONCURRENCY` | No | `2` | The number of concurrent workers that use `yt-dlp` _per queue_. Set to 1 if you're getting IP limited, otherwise don't touch it | +| `ENABLE_PROMETHEUS` | No | `false` | Setting to _any_ non-blank value will enable Prometheus. See [docs](https://github.com/kieraneglin/pinchflat/wiki/Prometheus-and-Grafana) | + +### Reverse Proxies + +Pinchflat makes heavy use of websockets for real-time updates. If you're running Pinchflat behind a reverse proxy then you'll need to make sure it's configured to support websockets. + ## EFF donations -A portion of all donations to Pinchflat will be donated to the [Electronic Frontier Foundation](https://www.eff.org/). The EFF defends your online liberties and [backed](https://github.com/github/dmca/blob/9a85e0f021f7967af80e186b890776a50443f06c/2020/11/2020-11-16-RIAA-reversal-effletter.pdf) `youtube-dl` when Google took them down. [See here](https://github.com/kieraneglin/pinchflat/wiki/EFF-Donation-Receipts) for a list of donation receipts. +Prior to 2024-05-10, a portion of all donations were given to the [Electronic Frontier Foundation](https://www.eff.org/). Now, the app doesn't accept donations that go to me personally and instead directs you straight to the EFF. [Here](https://github.com/kieraneglin/pinchflat/issues/234) are some people that have generously donated. -## Pre-release disclaimer +The EFF defends your online liberties and [backed](https://github.com/github/dmca/blob/9a85e0f021f7967af80e186b890776a50443f06c/2020/11/2020-11-16-RIAA-reversal-effletter.pdf) `youtube-dl` when Google took them down. -This is pre-release software and anything can break at any time. I make not guarantees about the stability of this software, forward-compatibility of updates, or integrity (both related to and independent of Pinchflat). Essentially, use at your own risk and expect there will be rough edges for now. +## Stability disclaimer + +This software is in active development and anything can break at any time. I make no guarantees about the stability of this software, forward-compatibility of updates, or integrity (both related to and independent of Pinchflat). ## License See `LICENSE` file + + + +[license-badge]: https://img.shields.io/github/license/kieraneglin/pinchflat?style=for-the-badge&color=ee512b +[license-badge-url]: LICENSE diff --git a/assets/css/satoshi.css b/assets/css/satoshi.css index 272640a..bd622f8 100644 --- a/assets/css/satoshi.css +++ b/assets/css/satoshi.css @@ -22,8 +22,8 @@ @font-face { font-family: 'Satoshi'; - src: url('/fonts/satoshi/Satoshi-Light.woff2'), url('/fonts/satoshi/Satoshi-Light.woff'), - url('/fonts/satoshi/Satoshi-Light.ttf'); + src: url('../fonts/satoshi/Satoshi-Light.woff2'), url('../fonts/satoshi/Satoshi-Light.woff'), + url('../fonts/satoshi/Satoshi-Light.ttf'); font-weight: 300; font-display: swap; font-style: normal; @@ -31,8 +31,9 @@ @font-face { font-family: 'Satoshi'; - src: url('/fonts/satoshi/Satoshi-LightItalic.woff2'), - url('/fonts/satoshi/Satoshi-LightItalic.woff'), url('/fonts/satoshi/Satoshi-LightItalic.ttf'); + src: url('../fonts/satoshi/Satoshi-LightItalic.woff2'), + url('../fonts/satoshi/Satoshi-LightItalic.woff'), + url('../fonts/satoshi/Satoshi-LightItalic.ttf'); font-weight: 300; font-display: swap; font-style: italic; @@ -40,8 +41,8 @@ @font-face { font-family: 'Satoshi'; - src: url('/fonts/satoshi/Satoshi-Regular.woff2'), url('/fonts/satoshi/Satoshi-Regular.woff'), - url('/fonts/satoshi/Satoshi-Regular.ttf'); + src: url('../fonts/satoshi/Satoshi-Regular.woff2'), url('../fonts/satoshi/Satoshi-Regular.woff'), + url('../fonts/satoshi/Satoshi-Regular.ttf'); font-weight: 400; font-display: swap; font-style: normal; @@ -49,8 +50,8 @@ @font-face { font-family: 'Satoshi'; - src: url('/fonts/satoshi/Satoshi-Italic.woff2'), url('/fonts/satoshi/Satoshi-Italic.woff'), - url('/fonts/satoshi/Satoshi-Italic.ttf'); + src: url('../fonts/satoshi/Satoshi-Italic.woff2'), url('../fonts/satoshi/Satoshi-Italic.woff'), + url('../fonts/satoshi/Satoshi-Italic.ttf'); font-weight: 400; font-display: swap; font-style: italic; @@ -58,8 +59,8 @@ @font-face { font-family: 'Satoshi'; - src: url('/fonts/satoshi/Satoshi-Medium.woff2'), url('/fonts/satoshi/Satoshi-Medium.woff'), - url('/fonts/satoshi/Satoshi-Medium.ttf'); + src: url('../fonts/satoshi/Satoshi-Medium.woff2'), url('../fonts/satoshi/Satoshi-Medium.woff'), + url('../fonts/satoshi/Satoshi-Medium.ttf'); font-weight: 500; font-display: swap; font-style: normal; @@ -67,8 +68,9 @@ @font-face { font-family: 'Satoshi'; - src: url('/fonts/satoshi/Satoshi-MediumItalic.woff2'), - url('/fonts/satoshi/Satoshi-MediumItalic.woff'), url('/fonts/satoshi/Satoshi-MediumItalic.ttf'); + src: url('../fonts/satoshi/Satoshi-MediumItalic.woff2'), + url('../fonts/satoshi/Satoshi-MediumItalic.woff'), + url('../fonts/satoshi/Satoshi-MediumItalic.ttf'); font-weight: 500; font-display: swap; font-style: italic; @@ -76,8 +78,8 @@ @font-face { font-family: 'Satoshi'; - src: url('/fonts/satoshi/Satoshi-Bold.woff2'), url('/fonts/satoshi/Satoshi-Bold.woff'), - url('/fonts/satoshi/Satoshi-Bold.ttf'); + src: url('../fonts/satoshi/Satoshi-Bold.woff2'), url('../fonts/satoshi/Satoshi-Bold.woff'), + url('../fonts/satoshi/Satoshi-Bold.ttf'); font-weight: 700; font-display: swap; font-style: normal; @@ -85,8 +87,8 @@ @font-face { font-family: 'Satoshi'; - src: url('/fonts/satoshi/Satoshi-BoldItalic.woff2'), url('/fonts/satoshi/Satoshi-BoldItalic.woff'), - url('/fonts/satoshi/Satoshi-BoldItalic.ttf'); + src: url('../fonts/satoshi/Satoshi-BoldItalic.woff2'), + url('../fonts/satoshi/Satoshi-BoldItalic.woff'), url('../fonts/satoshi/Satoshi-BoldItalic.ttf'); font-weight: 700; font-display: swap; font-style: italic; @@ -94,8 +96,9 @@ @font-face { font-family: 'Satoshi'; - src: url('/fonts/satoshi/Satoshi-BlackItalic.woff2'), - url('/fonts/satoshi/Satoshi-BlackItalic.woff'), url('/fonts/satoshi/Satoshi-BlackItalic.ttf'); + src: url('../fonts/satoshi/Satoshi-BlackItalic.woff2'), + url('../fonts/satoshi/Satoshi-BlackItalic.woff'), + url('../fonts/satoshi/Satoshi-BlackItalic.ttf'); font-weight: 900; font-display: swap; font-style: italic; diff --git a/assets/js/alpine_helpers.js b/assets/js/alpine_helpers.js new file mode 100644 index 0000000..9c2367f --- /dev/null +++ b/assets/js/alpine_helpers.js @@ -0,0 +1,48 @@ +window.copyTextToClipboard = async (text) => { + // Navigator clipboard api needs a secure context (https) + if (navigator.clipboard && window.isSecureContext) { + await navigator.clipboard.writeText(text) + } else { + const textArea = document.createElement('textarea') + textArea.value = text + // Move textarea out of the viewport so it's not visible + textArea.style.position = 'absolute' + textArea.style.left = '-999999px' + + document.body.prepend(textArea) + textArea.select() + + try { + document.execCommand('copy') + } catch (error) { + console.error(error) + } finally { + textArea.remove() + } + } +} + +window.copyWithCallbacks = async (text, onCopy, onAfterDelay, delay = 4000) => { + await window.copyTextToClipboard(text) + onCopy() + setTimeout(onAfterDelay, delay) +} + +window.markVersionAsSeen = (versionString) => { + localStorage.setItem('seenVersion', versionString) +} + +window.isVersionSeen = (versionString) => { + return localStorage.getItem('seenVersion') === versionString +} + +window.dispatchFor = (elementOrId, eventName, detail = {}) => { + const element = + typeof elementOrId === 'string' ? document.getElementById(elementOrId) : elementOrId + + // This is needed to ensure the DOM has updated before dispatching the event. + // Doing so ensures that the latest DOM state is what's sent to the server + setTimeout(() => { + element.dispatchEvent(new Event(eventName, { bubbles: true, detail })) + }, 0) +} diff --git a/assets/js/app.js b/assets/js/app.js index f161731..e6e0219 100644 --- a/assets/js/app.js +++ b/assets/js/app.js @@ -23,12 +23,13 @@ import { LiveSocket } from 'phoenix_live_view' import topbar from '../vendor/topbar' import Alpine from 'alpinejs' import './tabs' +import './alpine_helpers' window.Alpine = Alpine Alpine.start() let csrfToken = document.querySelector("meta[name='csrf-token']").getAttribute('content') -let liveSocket = new LiveSocket('/live', Socket, { +let liveSocket = new LiveSocket(document.body.dataset.socketPath, Socket, { params: { _csrf_token: csrfToken }, dom: { onBeforeElUpdated(from, to) { @@ -36,32 +37,19 @@ let liveSocket = new LiveSocket('/live', Socket, { window.Alpine.clone(from, to) } } - } -}) - -window.copyTextToClipboard = async (text) => { - // Navigator clipboard api needs a secure context (https) - if (navigator.clipboard && window.isSecureContext) { - await navigator.clipboard.writeText(text) - } else { - const textArea = document.createElement('textarea') - textArea.value = text - // Move textarea out of the viewport so it's not visible - textArea.style.position = 'absolute' - textArea.style.left = '-999999px' - - document.body.prepend(textArea) - textArea.select() - - try { - document.execCommand('copy') - } catch (error) { - console.error(error) - } finally { - textArea.remove() + }, + hooks: { + 'supress-enter-submission': { + mounted() { + this.el.addEventListener('keypress', (event) => { + if (event.key === 'Enter') { + event.preventDefault() + } + }) + } } } -} +}) // Show progress bar on live navigation and form submits topbar.config({ barColors: { 0: '#29d' }, shadowColor: 'rgba(0, 0, 0, .3)' }) diff --git a/assets/js/tabs.js b/assets/js/tabs.js index 964223d..0d1cfa8 100644 --- a/assets/js/tabs.js +++ b/assets/js/tabs.js @@ -1,20 +1,20 @@ -window.setTabIndex = (index) => { - window.location.hash = `tab-${index}` +window.setTabByName = (tabName) => { + window.location.hash = `tab-${tabName}` - return index + return tabName } // The conditionals and currIndex stuff ensures that // the tab index is always set to 0 if the hash is empty // AND other hash values are ignored -window.getTabIndex = (currIndex) => { +window.getTabFromHash = (currentTabName, defaultTabName) => { if (window.location.hash === '' || window.location.hash === '#') { - return 0 + return defaultTabName } if (window.location.hash.startsWith('#tab-')) { - return parseInt(window.location.hash.replace('#tab-', '')) + return window.location.hash.replace('#tab-', '') } - return currIndex + return currentTabName } diff --git a/assets/tailwind.config.js b/assets/tailwind.config.js index 49d9b55..fdb2bc4 100644 --- a/assets/tailwind.config.js +++ b/assets/tailwind.config.js @@ -347,6 +347,38 @@ module.exports = { }, { values } ) + }), + plugin(function ({ matchComponents, theme }) { + let iconsDir = path.join(__dirname, './vendor/simple-icons') + let values = {} + + fs.readdirSync(iconsDir).forEach((file) => { + let name = path.basename(file, '.svg') + values[name] = { name, fullPath: path.join(iconsDir, file) } + }) + + matchComponents( + { + si: ({ name, fullPath }) => { + let content = fs + .readFileSync(fullPath) + .toString() + .replace(/\r?\n|\r/g, '') + return { + [`--si-${name}`]: `url('data:image/svg+xml;utf8,${content}')`, + '-webkit-mask': `var(--si-${name})`, + mask: `var(--si-${name})`, + 'mask-repeat': 'no-repeat', + 'background-color': 'currentColor', + 'vertical-align': 'middle', + display: 'inline-block', + width: theme('spacing.5'), + height: theme('spacing.5') + } + } + }, + { values } + ) }) ] } diff --git a/assets/vendor/simple-icons/discord.svg b/assets/vendor/simple-icons/discord.svg new file mode 100644 index 0000000..9d7796b --- /dev/null +++ b/assets/vendor/simple-icons/discord.svg @@ -0,0 +1 @@ +Discord \ No newline at end of file diff --git a/assets/vendor/simple-icons/github.svg b/assets/vendor/simple-icons/github.svg new file mode 100644 index 0000000..2334976 --- /dev/null +++ b/assets/vendor/simple-icons/github.svg @@ -0,0 +1 @@ +GitHub diff --git a/config/config.exs b/config/config.exs index 4e7a854..f57e0cc 100644 --- a/config/config.exs +++ b/config/config.exs @@ -10,9 +10,12 @@ import Config config :pinchflat, ecto_repos: [Pinchflat.Repo], generators: [timestamp_type: :utc_datetime], + env: config_env(), # Specifying backend data here makes mocking and local testing SUPER easy yt_dlp_executable: System.find_executable("yt-dlp"), + apprise_executable: System.find_executable("apprise"), yt_dlp_runner: Pinchflat.YtDlp.CommandRunner, + apprise_runner: Pinchflat.Lifecycle.Notifications.CommandRunner, media_directory: "/downloads", # The user may or may not store metadata for their needs, but the app will always store its copy metadata_directory: "/config/metadata", @@ -23,7 +26,9 @@ config :pinchflat, basic_auth_username: "", basic_auth_password: "", expose_feed_endpoints: false, - file_watcher_poll_interval: 1000 + file_watcher_poll_interval: 1000, + timezone: "UTC", + base_route_path: "/" config :pinchflat, Pinchflat.Repo, journal_mode: :wal, @@ -37,32 +42,15 @@ config :pinchflat, PinchflatWeb.Endpoint, adapter: Phoenix.Endpoint.Cowboy2Adapter, render_errors: [ formats: [html: PinchflatWeb.ErrorHTML, json: PinchflatWeb.ErrorJSON], - layout: false + root_layout: {PinchflatWeb.Layouts, :root}, + layout: {PinchflatWeb.Layouts, :app} ], pubsub_server: Pinchflat.PubSub, live_view: [signing_salt: "/t5878kO"] config :pinchflat, Oban, engine: Oban.Engines.Lite, - repo: Pinchflat.Repo, - # Keep old jobs for 30 days for display in the UI - plugins: [ - {Oban.Plugins.Pruner, max_age: 30 * 24 * 60 * 60}, - {Oban.Plugins.Cron, - crontab: [ - {"@daily", Pinchflat.Downloading.MediaRetentionWorker} - ]} - ], - # TODO: consider making this an env var or something? - queues: [ - default: 10, - fast_indexing: 6, - media_indexing: 2, - media_collection_indexing: 2, - media_fetching: 2, - local_metadata: 8, - remote_metadata: 4 - ] + repo: Pinchflat.Repo # Configures the mailer # @@ -96,13 +84,19 @@ config :tailwind, ] # Configures Elixir's Logger -config :logger, :console, - format: "$time $metadata[$level] $message\n", +config :logger, :default_formatter, + format: "$date $time $metadata[$level] | $message\n", metadata: [:request_id] # Use Jason for JSON parsing in Phoenix config :phoenix, :json_library, Jason +config :pinchflat, Pinchflat.PromEx, + disabled: true, + manual_metrics_start_delay: :no_delay, + drop_metrics_groups: [], + metrics_server: :disabled + # Import environment specific config. This must remain at the bottom # of this file so it overrides the configuration defined above. import_config "#{config_env()}.exs" diff --git a/config/dev.exs b/config/dev.exs index 887322b..8b9b793 100644 --- a/config/dev.exs +++ b/config/dev.exs @@ -67,7 +67,7 @@ config :pinchflat, PinchflatWeb.Endpoint, config :pinchflat, dev_routes: true # Do not include metadata nor timestamps in development logs -config :logger, :console, format: "[$level] $message\n" +config :logger, :default_formatter, format: "[$level] $message\n" # Set a higher stacktrace during development. Avoid configuring such # in production as building large stacktraces may be expensive. @@ -81,3 +81,5 @@ config :phoenix_live_view, :debug_heex_annotations, true # Disable swoosh api client as it is only required for production adapters. config :swoosh, :api_client, false + +config :pinchflat, Pinchflat.PromEx, disabled: false diff --git a/config/runtime.exs b/config/runtime.exs index a496f55..5624bfe 100644 --- a/config/runtime.exs +++ b/config/runtime.exs @@ -40,34 +40,79 @@ config :pinchflat, Pinchflat.Repo, Path.join([:code.priv_dir(:pinchflat), "repo", "extensions", "sqlean-linux-#{system_arch}", "sqlean"]) ] +# Some users may want to increase the number of workers that use yt-dlp to improve speeds +# Others may want to decrease the number of these workers to lessen the chance of an IP ban +{yt_dlp_worker_count, _} = Integer.parse(System.get_env("YT_DLP_WORKER_CONCURRENCY", "2")) +# Used to set the cron for the yt-dlp update worker. The reason for this is +# to avoid all instances of PF updating yt-dlp at the same time, which 1) +# could result in rate limiting and 2) gives me time to react if an update +# breaks something +%{hour: current_hour, minute: current_minute} = DateTime.utc_now() + +config :pinchflat, Oban, + queues: [ + default: 10, + fast_indexing: yt_dlp_worker_count, + media_collection_indexing: yt_dlp_worker_count, + media_fetching: yt_dlp_worker_count, + remote_metadata: yt_dlp_worker_count, + local_data: 8 + ], + plugins: [ + # Keep old jobs for 30 days for display in the UI + {Oban.Plugins.Pruner, max_age: 30 * 24 * 60 * 60}, + {Oban.Plugins.Cron, + crontab: [ + {"#{current_minute} #{current_hour} * * *", Pinchflat.YtDlp.UpdateWorker}, + {"0 1 * * *", Pinchflat.Downloading.MediaRetentionWorker}, + {"0 2 * * *", Pinchflat.Downloading.MediaQualityUpgradeWorker} + ]} + ] + if config_env() == :prod do - config_path = "/config" + # Various paths. These ones shouldn't be tweaked if running in Docker + media_path = System.get_env("MEDIA_PATH", "/downloads") + config_path = System.get_env("CONFIG_PATH", "/config") db_path = System.get_env("DATABASE_PATH", Path.join([config_path, "db", "pinchflat.db"])) log_path = System.get_env("LOG_PATH", Path.join([config_path, "logs", "pinchflat.log"])) metadata_path = System.get_env("METADATA_PATH", Path.join([config_path, "metadata"])) extras_path = System.get_env("EXTRAS_PATH", Path.join([config_path, "extras"])) - + tmpfile_path = System.get_env("TMPFILE_PATH", Path.join([System.tmp_dir!(), "pinchflat", "data"])) + # This one can be changed if you want + tz_data_path = System.get_env("TZ_DATA_PATH", Path.join([extras_path, "elixir_tz_data"])) # For running PF as a podcast host on self-hosted environments expose_feed_endpoints = String.length(System.get_env("EXPOSE_FEED_ENDPOINTS", "")) > 0 - # For testing alternate journal modes (see issue #137) journal_mode = String.to_existing_atom(System.get_env("JOURNAL_MODE", "wal")) + # For running PF in a subdirectory via a reverse proxy + base_route_path = System.get_env("BASE_ROUTE_PATH", "/") + enable_ipv6 = String.length(System.get_env("ENABLE_IPV6", "")) > 0 + enable_prometheus = String.length(System.get_env("ENABLE_PROMETHEUS", "")) > 0 - config :logger, level: String.to_existing_atom(System.get_env("LOG_LEVEL", "info")) + config :logger, level: String.to_existing_atom(System.get_env("LOG_LEVEL", "debug")) config :pinchflat, yt_dlp_executable: System.find_executable("yt-dlp"), - media_directory: "/downloads", + apprise_executable: System.find_executable("apprise"), + media_directory: media_path, metadata_directory: metadata_path, extras_directory: extras_path, - tmpfile_directory: Path.join([System.tmp_dir!(), "pinchflat", "data"]), + tmpfile_directory: tmpfile_path, dns_cluster_query: System.get_env("DNS_CLUSTER_QUERY"), - expose_feed_endpoints: expose_feed_endpoints + expose_feed_endpoints: expose_feed_endpoints, + # This is configured in application.ex + timezone: "UTC", + log_path: log_path, + base_route_path: base_route_path + + config :tzdata, :data_dir, tz_data_path config :pinchflat, Pinchflat.Repo, database: db_path, journal_mode: journal_mode + config :pinchflat, Pinchflat.PromEx, disabled: !enable_prometheus + # The secret key base is used to sign/encrypt cookies and other secrets. # A default value is used in config/dev.exs and config/test.exs but you # want to use a different value for prod and you most likely don't want @@ -100,9 +145,10 @@ if config_env() == :prod do # Set it to {0, 0, 0, 0, 0, 0, 0, 1} for local network only access. # See the documentation on https://hexdocs.pm/plug_cowboy/Plug.Cowboy.html # for details about using IPv6 vs IPv4 and loopback vs public addresses. - ip: {0, 0, 0, 0}, + ip: if(enable_ipv6, do: {0, 0, 0, 0, 0, 0, 0, 0}, else: {0, 0, 0, 0}), port: String.to_integer(System.get_env("PORT") || "4000") ], + url: [path: base_route_path], secret_key_base: secret_key_base config :pinchflat, :logger, [ diff --git a/config/test.exs b/config/test.exs index 6c7409e..ce61044 100644 --- a/config/test.exs +++ b/config/test.exs @@ -3,6 +3,7 @@ import Config config :pinchflat, # Specifying backend data here makes mocking and local testing SUPER easy yt_dlp_executable: Path.join([File.cwd!(), "/test/support/scripts/yt-dlp-mocks/repeater.sh"]), + apprise_executable: Path.join([File.cwd!(), "/test/support/scripts/yt-dlp-mocks/repeater.sh"]), media_directory: Path.join([System.tmp_dir!(), "test", "media"]), metadata_directory: Path.join([System.tmp_dir!(), "test", "metadata"]), tmpfile_directory: Path.join([System.tmp_dir!(), "test", "tmpfiles"]), diff --git a/dev.Dockerfile b/dev.Dockerfile deleted file mode 100644 index 517ca63..0000000 --- a/dev.Dockerfile +++ /dev/null @@ -1,45 +0,0 @@ -ARG ELIXIR_VERSION=1.16.2 -ARG OTP_VERSION=26.2.2 -ARG DEBIAN_VERSION=bookworm-20240130 -ARG DEV_IMAGE="hexpm/elixir:${ELIXIR_VERSION}-erlang-${OTP_VERSION}-debian-${DEBIAN_VERSION}" - -FROM ${DEV_IMAGE} - -# Set the locale deets -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US:en -ENV LC_ALL en_US.UTF-8 - -# Install debian packages -RUN apt-get update -qq -RUN apt-get install -y inotify-tools ffmpeg curl git openssh-client \ - python3 python3-pip python3-setuptools python3-wheel python3-dev - -# Install nodejs -RUN curl -sL https://deb.nodesource.com/setup_20.x -o nodesource_setup.sh -RUN bash nodesource_setup.sh -RUN apt-get install nodejs -RUN npm install -g yarn - -# Install baseline Elixir packages -RUN mix local.hex --force -RUN mix local.rebar --force - -# Download YT-DLP -# NOTE: If you're seeing weird issues, consider using the FFMPEG released by yt-dlp -RUN python3 -m pip install -U --pre yt-dlp --break-system-packages - -# Create app directory and copy the Elixir projects into it. -WORKDIR /app -COPY . ./ - -# Needs permissions to be updated AFTER the copy step -RUN chmod +x ./docker-run.dev.sh - -# Install Elixir deps -# RUN mix archive.install github hexpm/hex branch latest -RUN mix deps.get -# Gives us iex shell history -ENV ERL_AFLAGS="-kernel shell_history enabled" - -EXPOSE 4008 diff --git a/docker-compose.ci.yml b/docker-compose.ci.yml index d4f4d6f..bf26c4b 100644 --- a/docker-compose.ci.yml +++ b/docker-compose.ci.yml @@ -1,13 +1,15 @@ -version: '3' services: phx: build: context: . - dockerfile: dev.Dockerfile + dockerfile: ./docker/dev.Dockerfile environment: - MIX_ENV=test volumes: - '.:/app' + # These lines ensure the deps can be saved as build artifacts for caching + - '/app/deps' + - '/app/_build' ports: - '4008:4008' command: tail -F /dev/null diff --git a/docker-compose.yml b/docker-compose.yml index 23cde70..e193a16 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,14 +1,12 @@ -version: '3' services: phx: build: context: . - dockerfile: dev.Dockerfile + dockerfile: ./docker/dev.Dockerfile volumes: - '.:/app' ports: - '4008:4008' - command: - - ./docker-run.dev.sh + command: bash -c "chmod +x docker/docker-run.dev.sh && docker/docker-run.dev.sh" stdin_open: true tty: true diff --git a/docker/dev.Dockerfile b/docker/dev.Dockerfile new file mode 100644 index 0000000..04a3e13 --- /dev/null +++ b/docker/dev.Dockerfile @@ -0,0 +1,72 @@ +ARG ELIXIR_VERSION=1.18.4 +ARG OTP_VERSION=27.2.4 +ARG DEBIAN_VERSION=bookworm-20250428-slim + +ARG DEV_IMAGE="hexpm/elixir:${ELIXIR_VERSION}-erlang-${OTP_VERSION}-debian-${DEBIAN_VERSION}" + +FROM ${DEV_IMAGE} + +ARG TARGETPLATFORM +RUN echo "Building for ${TARGETPLATFORM:?}" + +# Install debian packages +RUN apt-get update -qq && \ + apt-get install -y inotify-tools curl git openssh-client jq \ + python3 python3-setuptools python3-wheel python3-dev pipx \ + python3-mutagen locales procps build-essential graphviz zsh unzip + +# Install ffmpeg +RUN export FFMPEG_DOWNLOAD=$(case ${TARGETPLATFORM:-linux/amd64} in \ + "linux/amd64") echo "https://github.com/yt-dlp/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linux64-gpl.tar.xz" ;; \ + "linux/arm64") echo "https://github.com/yt-dlp/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linuxarm64-gpl.tar.xz" ;; \ + *) echo "" ;; esac) && \ + curl -L ${FFMPEG_DOWNLOAD} --output /tmp/ffmpeg.tar.xz && \ + tar -xf /tmp/ffmpeg.tar.xz --strip-components=2 --no-anchored -C /usr/bin/ "ffmpeg" && \ + tar -xf /tmp/ffmpeg.tar.xz --strip-components=2 --no-anchored -C /usr/bin/ "ffprobe" + +# Install nodejs and Yarn +RUN curl -sL https://deb.nodesource.com/setup_20.x -o nodesource_setup.sh && \ + bash nodesource_setup.sh && \ + apt-get install -y nodejs && \ + npm install -g yarn && \ + # Install baseline Elixir packages + mix local.hex --force && \ + mix local.rebar --force && \ + # Install Deno - required for YouTube downloads (See yt-dlp#14404) + curl -fsSL https://deno.land/install.sh | DENO_INSTALL=/usr/local sh -s -- -y --no-modify-path && \ + # Download and update YT-DLP + export YT_DLP_DOWNLOAD=$(case ${TARGETPLATFORM:-linux/amd64} in \ + "linux/amd64") echo "https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux" ;; \ + "linux/arm64") echo "https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64" ;; \ + *) echo "https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux" ;; esac) && \ + curl -L ${YT_DLP_DOWNLOAD} -o /usr/local/bin/yt-dlp && \ + chmod a+rx /usr/local/bin/yt-dlp && \ + yt-dlp -U && \ + # Install Apprise + export PIPX_HOME=/opt/pipx && \ + export PIPX_BIN_DIR=/usr/local/bin && \ + pipx install apprise && \ + # Set up ZSH tools + chsh -s $(which zsh) && \ + sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" + +# Set the locale +RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen +ENV LANG=en_US.UTF-8 +ENV LANGUAGE=en_US:en +ENV LC_ALL=en_US.UTF-8 + +WORKDIR /app + +COPY mix.exs mix.lock ./ +# Install Elixir deps +# NOTE: this has to be before the bulk copy to ensure that deps are cached +RUN MIX_ENV=dev mix deps.get && MIX_ENV=dev mix deps.compile +RUN MIX_ENV=test mix deps.get && MIX_ENV=test mix deps.compile + +COPY . ./ + +# Gives us iex shell history +ENV ERL_AFLAGS="-kernel shell_history enabled" + +EXPOSE 4008 diff --git a/docker-run.dev.sh b/docker/docker-run.dev.sh similarity index 56% rename from docker-run.dev.sh rename to docker/docker-run.dev.sh index ddae66f..8ebb1a8 100755 --- a/docker-run.dev.sh +++ b/docker/docker-run.dev.sh @@ -2,12 +2,12 @@ set -e -# Ensure the app's deps are installed +echo "\nInstalling Elixir deps..." mix deps.get -# Install JS deps -echo "\nInstalling JS..." -cd assets && yarn install +# Install both project-level and assets-level JS dependencies +echo "\nInstalling JS deps..." +yarn install && cd assets && yarn install cd .. # Potentially Set up the database diff --git a/docker/selfhosted.Dockerfile b/docker/selfhosted.Dockerfile new file mode 100644 index 0000000..17f7af7 --- /dev/null +++ b/docker/selfhosted.Dockerfile @@ -0,0 +1,146 @@ +# Find eligible builder and runner images on Docker Hub. We use Ubuntu/Debian +# instead of Alpine to avoid DNS resolution issues in production. +ARG ELIXIR_VERSION=1.18.4 +ARG OTP_VERSION=27.2.4 +ARG DEBIAN_VERSION=bookworm-20250428-slim + +ARG BUILDER_IMAGE="hexpm/elixir:${ELIXIR_VERSION}-erlang-${OTP_VERSION}-debian-${DEBIAN_VERSION}" +ARG RUNNER_IMAGE="debian:${DEBIAN_VERSION}" + +FROM ${BUILDER_IMAGE} AS builder + +ARG TARGETPLATFORM +RUN echo "Building for ${TARGETPLATFORM:?}" + +# install build dependencies +RUN apt-get update -y && \ + # System packages + apt-get install -y \ + build-essential \ + git \ + curl && \ + # Node.js and Yarn + curl -sL https://deb.nodesource.com/setup_20.x -o nodesource_setup.sh && \ + bash nodesource_setup.sh && \ + apt-get install -y nodejs && \ + npm install -g yarn && \ + # Hex and Rebar + mix local.hex --force && \ + mix local.rebar --force && \ + # FFmpeg (latest build that doesn't cause an illegal instruction error for some users - see #347) + export FFMPEG_DOWNLOAD=$(case ${TARGETPLATFORM:-linux/amd64} in \ + "linux/amd64") echo "https://github.com/yt-dlp/FFmpeg-Builds/releases/download/autobuild-2024-07-30-14-10/ffmpeg-N-116468-g0e09f6d690-linux64-gpl.tar.xz" ;; \ + "linux/arm64") echo "https://github.com/yt-dlp/FFmpeg-Builds/releases/download/autobuild-2024-07-30-14-10/ffmpeg-N-116468-g0e09f6d690-linuxarm64-gpl.tar.xz" ;; \ + *) echo "" ;; esac) && \ + curl -L ${FFMPEG_DOWNLOAD} --output /tmp/ffmpeg.tar.xz && \ + tar -xf /tmp/ffmpeg.tar.xz --strip-components=2 --no-anchored -C /usr/local/bin/ "ffmpeg" && \ + tar -xf /tmp/ffmpeg.tar.xz --strip-components=2 --no-anchored -C /usr/local/bin/ "ffprobe" && \ + # Cleanup + apt-get clean && \ + rm -f /var/lib/apt/lists/*_* + +# prepare build dir +WORKDIR /app + +# set build ENV +ENV MIX_ENV="prod" +ENV ERL_FLAGS="+JPperf true" + +# install mix dependencies +COPY mix.exs mix.lock ./ +RUN mix deps.get --only $MIX_ENV && mkdir config + +# copy compile-time config files before we compile dependencies +# to ensure any relevant config change will trigger the dependencies +# to be re-compiled. +COPY config/config.exs config/${MIX_ENV}.exs config/ +RUN mix deps.compile + +COPY priv priv +COPY lib lib +COPY assets assets + +# Compile assets +RUN yarn --cwd assets install && mix assets.deploy && mix compile + +# Changes to config/runtime.exs don't require recompiling the code +COPY config/runtime.exs config/ + +COPY rel rel +RUN mix release + +## -- Release Stage -- + +FROM ${RUNNER_IMAGE} + +ARG TARGETPLATFORM +ARG PORT=8945 + +COPY --from=builder ./usr/local/bin/ffmpeg /usr/bin/ffmpeg +COPY --from=builder ./usr/local/bin/ffprobe /usr/bin/ffprobe + +RUN apt-get update -y && \ + # System packages + apt-get install -y \ + libstdc++6 \ + openssl \ + libncurses5 \ + locales \ + ca-certificates \ + python3-mutagen \ + curl \ + zip \ + openssh-client \ + nano \ + python3 \ + pipx \ + jq \ + # unzip is needed for Deno + unzip \ + procps && \ + # Install Deno - required for YouTube downloads (See yt-dlp#14404) + curl -fsSL https://deno.land/install.sh | DENO_INSTALL=/usr/local sh -s -- -y --no-modify-path && \ + # Apprise + export PIPX_HOME=/opt/pipx && \ + export PIPX_BIN_DIR=/usr/local/bin && \ + pipx install apprise && \ + # yt-dlp + export YT_DLP_DOWNLOAD=$(case ${TARGETPLATFORM:-linux/amd64} in \ + "linux/amd64") echo "https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux" ;; \ + "linux/arm64") echo "https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64" ;; \ + *) echo "https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux" ;; esac) && \ + curl -L ${YT_DLP_DOWNLOAD} -o /usr/local/bin/yt-dlp && \ + chmod a+rx /usr/local/bin/yt-dlp && \ + yt-dlp -U && \ + # Set the locale + sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && locale-gen && \ + # Clean up + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# More locale setup +ENV LANG=en_US.UTF-8 +ENV LANGUAGE=en_US:en +ENV LC_ALL=en_US.UTF-8 + +WORKDIR "/app" + +# Set up data volumes +RUN mkdir -p /config /downloads /etc/elixir_tzdata_data /etc/yt-dlp/plugins && \ + chmod ugo+rw /etc/elixir_tzdata_data /etc/yt-dlp /etc/yt-dlp/plugins /usr/local/bin /usr/local/bin/yt-dlp + +# set runner ENV +ENV MIX_ENV="prod" +ENV PORT=${PORT} +ENV RUN_CONTEXT="selfhosted" +ENV UMASK=022 +EXPOSE ${PORT} + +# Only copy the final release from the build stage +COPY --from=builder /app/_build/${MIX_ENV}/rel/pinchflat ./ + +HEALTHCHECK --interval=30s --start-period=15s \ + CMD curl --fail http://localhost:${PORT}/healthcheck || exit 1 + +# Start the app +CMD ["/app/bin/docker_start"] diff --git a/ideas.md b/ideas.md deleted file mode 100644 index ac5addc..0000000 --- a/ideas.md +++ /dev/null @@ -1,4 +0,0 @@ -- Write media datbase ID as metadata/to file/whatever so it gives us an option to retroactively match media to the DB down the line. Useful if someone moves the media without informing the UI - - Use a UUID for the media database ID (or at least alongside it) -- Look into this and its recommended plugins https://hexdocs.pm/ex_check/readme.html -- Add output template option for the source's friendly name diff --git a/lib/pinchflat/application.ex b/lib/pinchflat/application.ex index 7defa36..3f823f4 100644 --- a/lib/pinchflat/application.ex +++ b/lib/pinchflat/application.ex @@ -4,10 +4,18 @@ defmodule Pinchflat.Application do @moduledoc false use Application + require Logger @impl true def start(_type, _args) do - children = [ + check_and_update_timezone() + attach_oban_telemetry() + Logger.add_handlers(:pinchflat) + + # See https://hexdocs.pm/elixir/Supervisor.html + # for other strategies and supported options + [ + Pinchflat.PromEx, PinchflatWeb.Telemetry, Pinchflat.Repo, # Must be before startup tasks @@ -20,17 +28,11 @@ defmodule Pinchflat.Application do {Finch, name: Pinchflat.Finch}, # Start a worker by calling: Pinchflat.Worker.start_link(arg) # {Pinchflat.Worker, arg}, - # Start to serve requests, typically the last entry - PinchflatWeb.Endpoint + # Start to serve requests, typically the last entry (except for the post-boot tasks) + PinchflatWeb.Endpoint, + Pinchflat.Boot.PostBootStartupTasks ] - - :ok = Oban.Telemetry.attach_default_logger() - Logger.add_handlers(:pinchflat) - - # See https://hexdocs.pm/elixir/Supervisor.html - # for other strategies and supported options - opts = [strategy: :one_for_one, name: Pinchflat.Supervisor] - Supervisor.start_link(children, opts) + |> Supervisor.start_link(strategy: :one_for_one, name: Pinchflat.Supervisor) end # Tell Phoenix to update the endpoint configuration @@ -40,4 +42,27 @@ defmodule Pinchflat.Application do PinchflatWeb.Endpoint.config_change(changed, removed) :ok end + + defp attach_oban_telemetry do + events = [[:oban, :job, :start], [:oban, :job, :stop], [:oban, :job, :exception]] + + :ok = Oban.Telemetry.attach_default_logger() + :telemetry.attach_many("job-telemetry-broadcast", events, &PinchflatWeb.Telemetry.job_state_change_broadcast/4, []) + end + + # This has to be here (rather than runtime.exs) since the `tzdata` application + # has to be started before we can check the timezone + defp check_and_update_timezone do + attempted_timezone = System.get_env("TIMEZONE") || System.get_env("TZ") || "UTC" + + valid_timezone = + if Tzdata.zone_exists?(attempted_timezone) do + attempted_timezone + else + Logger.warning("Invalid timezone #{attempted_timezone}, defaulting to UTC") + "UTC" + end + + Application.put_env(:pinchflat, :timezone, valid_timezone) + end end diff --git a/lib/pinchflat/boot/nfo_backfill_worker.ex b/lib/pinchflat/boot/nfo_backfill_worker.ex deleted file mode 100644 index 917d2f1..0000000 --- a/lib/pinchflat/boot/nfo_backfill_worker.ex +++ /dev/null @@ -1,70 +0,0 @@ -defmodule Pinchflat.Boot.NfoBackfillWorker do - @moduledoc false - - use Oban.Worker, - queue: :local_metadata, - # This should have it running once _ever_ (until the job is pruned, anyway) - # NOTE: remove within the next month - unique: [period: :infinity, states: Oban.Job.states()], - tags: ["media_item", "media_metadata", "local_metadata", "data_backfill"] - - import Ecto.Query, warn: false - require Logger - - alias Pinchflat.Repo - alias Pinchflat.Media - alias Pinchflat.Media.MediaItem - alias Pinchflat.Metadata.NfoBuilder - alias Pinchflat.Metadata.MetadataFileHelpers - - @doc """ - Runs a one-off backfill job to regenerate NFO files for media items that have - both an NFO file and a metadata file. This is needed because NFO files weren't - escaping characters properly so we need to regenerate them. - - This job will only run once as long as I remove it before the jobs are pruned in a month. - - Returns :ok - """ - @impl Oban.Worker - def perform(%Oban.Job{}) do - Logger.info("Running NFO backfill worker") - - media_items = get_media_items_to_backfill() - - Enum.each(media_items, fn media_item -> - nfo_exists = File.exists?(media_item.nfo_filepath) - metadata_exists = File.exists?(media_item.metadata.metadata_filepath) - - if nfo_exists && metadata_exists do - Logger.info("NFO and metadata exist for media item #{media_item.id} - proceeding") - - regenerate_nfo_for_media_item(media_item) - end - end) - - :ok - end - - defp get_media_items_to_backfill do - from(m in MediaItem, where: not is_nil(m.nfo_filepath)) - |> Repo.all() - |> Repo.preload([:metadata, source: :media_profile]) - end - - defp regenerate_nfo_for_media_item(media_item) do - try do - case MetadataFileHelpers.read_compressed_metadata(media_item.metadata.metadata_filepath) do - {:ok, metadata} -> - Media.update_media_item(media_item, %{ - nfo_filepath: NfoBuilder.build_and_store_for_media_item(media_item.nfo_filepath, metadata) - }) - - _err -> - Logger.error("Failed to read metadata for media item #{media_item.id}") - end - rescue - e -> Logger.error("Unknown error regenerating NFO file for MI ##{media_item.id}: #{inspect(e)}") - end - end -end diff --git a/lib/pinchflat/boot/post_boot_startup_tasks.ex b/lib/pinchflat/boot/post_boot_startup_tasks.ex new file mode 100644 index 0000000..d6ae6eb --- /dev/null +++ b/lib/pinchflat/boot/post_boot_startup_tasks.ex @@ -0,0 +1,46 @@ +defmodule Pinchflat.Boot.PostBootStartupTasks do + @moduledoc """ + This module is responsible for running startup tasks on app boot + AFTER all other boot steps have taken place and the app is ready to serve requests. + + It's a GenServer because that plays REALLY nicely with the existing + Phoenix supervision tree. + """ + + alias Pinchflat.YtDlp.UpdateWorker, as: YtDlpUpdateWorker + + # restart: :temporary means that this process will never be restarted (ie: will run once and then die) + use GenServer, restart: :temporary + import Ecto.Query, warn: false + + def start_link(opts \\ []) do + GenServer.start_link(__MODULE__, %{env: Application.get_env(:pinchflat, :env)}, opts) + end + + @doc """ + Runs post-boot application startup tasks. + + Any code defined here will run every time the application starts. You must + make sure that the code is idempotent and safe to run multiple times. + + This is a good place to set up default settings, create initial records, stuff like that. + Should be fast - anything with the potential to be slow should be kicked off as a job instead. + """ + @impl true + def init(%{env: :test} = state) do + # Do nothing _as part of the app bootup process_. + # Since bootup calls `start_link` and that's where the `env` state is injected, + # you can still call `.init()` manually to run these tasks for testing purposes + {:ok, state} + end + + def init(state) do + update_yt_dlp() + + {:ok, state} + end + + defp update_yt_dlp do + YtDlpUpdateWorker.kickoff() + end +end diff --git a/lib/pinchflat/boot/post_job_startup_tasks.ex b/lib/pinchflat/boot/post_job_startup_tasks.ex index ab69da7..6eba701 100644 --- a/lib/pinchflat/boot/post_job_startup_tasks.ex +++ b/lib/pinchflat/boot/post_job_startup_tasks.ex @@ -1,21 +1,18 @@ defmodule Pinchflat.Boot.PostJobStartupTasks do @moduledoc """ This module is responsible for running startup tasks on app boot - AFTER the job runner has initiallized. + AFTER the job runner has initialized. It's a GenServer because that plays REALLY nicely with the existing Phoenix supervision tree. """ - alias Pinchflat.Repo - alias Pinchflat.Boot.NfoBackfillWorker - # restart: :temporary means that this process will never be restarted (ie: will run once and then die) use GenServer, restart: :temporary import Ecto.Query, warn: false def start_link(opts \\ []) do - GenServer.start_link(__MODULE__, %{}, opts) + GenServer.start_link(__MODULE__, %{env: Application.get_env(:pinchflat, :env)}, opts) end @doc """ @@ -28,8 +25,15 @@ defmodule Pinchflat.Boot.PostJobStartupTasks do Should be fast - anything with the potential to be slow should be kicked off as a job instead. """ @impl true + def init(%{env: :test} = state) do + # Do nothing _as part of the app bootup process_. + # Since bootup calls `start_link` and that's where the `env` state is injected, + # you can still call `.init()` manually to run these tasks for testing purposes + {:ok, state} + end + def init(state) do - Repo.insert_unique_job(NfoBackfillWorker.new(%{})) + # Nothing at the moment! {:ok, state} end diff --git a/lib/pinchflat/boot/pre_job_startup_tasks.ex b/lib/pinchflat/boot/pre_job_startup_tasks.ex index d4f4168..5035e35 100644 --- a/lib/pinchflat/boot/pre_job_startup_tasks.ex +++ b/lib/pinchflat/boot/pre_job_startup_tasks.ex @@ -14,11 +14,12 @@ defmodule Pinchflat.Boot.PreJobStartupTasks do alias Pinchflat.Repo alias Pinchflat.Settings - alias Pinchflat.YtDlp.CommandRunner - alias Pinchflat.Filesystem.FilesystemHelpers + alias Pinchflat.Utils.FilesystemUtils + + alias Pinchflat.Lifecycle.UserScripts.CommandRunner, as: UserScriptRunner def start_link(opts \\ []) do - GenServer.start_link(__MODULE__, %{}, opts) + GenServer.start_link(__MODULE__, %{env: Application.get_env(:pinchflat, :env)}, opts) end @doc """ @@ -31,14 +32,32 @@ defmodule Pinchflat.Boot.PreJobStartupTasks do Should be fast - anything with the potential to be slow should be kicked off as a job instead. """ @impl true + def init(%{env: :test} = state) do + # Do nothing _as part of the app bootup process_. + # Since bootup calls `start_link` and that's where the `env` state is injected, + # you can still call `.init()` manually to run these tasks for testing purposes + {:ok, state} + end + def init(state) do + ensure_tmpfile_directory() reset_executing_jobs() - create_blank_cookie_file() + create_blank_yt_dlp_files() + create_blank_user_script_file() apply_default_settings() + run_app_init_script() {:ok, state} end + defp ensure_tmpfile_directory do + tmpfile_dir = Application.get_env(:pinchflat, :tmpfile_directory) + + if !File.exists?(tmpfile_dir) do + File.mkdir_p!(tmpfile_dir) + end + end + # If a node cannot gracefully shut down, the currently executing jobs get stuck # in the "executing" state. This is a problem because the job runner will not # pick them up again @@ -51,20 +70,52 @@ defmodule Pinchflat.Boot.PreJobStartupTasks do Logger.info("Reset #{count} executing jobs") end - defp create_blank_cookie_file do + defp create_blank_yt_dlp_files do + files = ["cookies.txt", "yt-dlp-configs/base-config.txt"] base_dir = Application.get_env(:pinchflat, :extras_directory) - filepath = Path.join(base_dir, "cookies.txt") + + Enum.each(files, fn file -> + filepath = Path.join(base_dir, file) + + if !File.exists?(filepath) do + Logger.info("Creating blank file: #{filepath}") + + FilesystemUtils.write_p!(filepath, "") + end + end) + end + + defp create_blank_user_script_file do + base_dir = Application.get_env(:pinchflat, :extras_directory) + filepath = Path.join([base_dir, "user-scripts", "lifecycle"]) if !File.exists?(filepath) do - Logger.info("Cookies does not exist - creating it") + Logger.info("Creating blank file and making it executable: #{filepath}") - FilesystemHelpers.write_p!(filepath, "") + FilesystemUtils.write_p!(filepath, "") + File.chmod(filepath, 0o755) end end defp apply_default_settings do - {:ok, yt_dlp_version} = CommandRunner.version() + {:ok, yt_dlp_version} = yt_dlp_runner().version() + {:ok, apprise_version} = apprise_runner().version() Settings.set(yt_dlp_version: yt_dlp_version) + Settings.set(apprise_version: apprise_version) + end + + defp run_app_init_script do + runner = Application.get_env(:pinchflat, :user_script_runner, UserScriptRunner) + + runner.run(:app_init, %{}) + end + + defp yt_dlp_runner do + Application.get_env(:pinchflat, :yt_dlp_runner) + end + + defp apprise_runner do + Application.get_env(:pinchflat, :apprise_runner) end end diff --git a/lib/pinchflat/downloading/download_option_builder.ex b/lib/pinchflat/downloading/download_option_builder.ex index 8c1af52..ab2b56a 100644 --- a/lib/pinchflat/downloading/download_option_builder.ex +++ b/lib/pinchflat/downloading/download_option_builder.ex @@ -3,45 +3,80 @@ defmodule Pinchflat.Downloading.DownloadOptionBuilder do Builds the options for yt-dlp to download media based on the given media profile. """ + alias Pinchflat.Sources alias Pinchflat.Sources.Source alias Pinchflat.Media.MediaItem alias Pinchflat.Downloading.OutputPathBuilder + alias Pinchflat.Downloading.QualityOptionBuilder + + alias Pinchflat.Utils.FilesystemUtils, as: FSUtils @doc """ Builds the options for yt-dlp to download media based on the given media's profile. - IDEA: consider adding the ability to pass in a second argument to override - these options + Returns {:ok, [Keyword.t()]} """ - def build(%MediaItem{} = media_item_with_preloads) do + def build(%MediaItem{} = media_item_with_preloads, override_opts \\ []) do media_profile = media_item_with_preloads.source.media_profile built_options = - default_options() ++ + default_options(override_opts) ++ subtitle_options(media_profile) ++ thumbnail_options(media_item_with_preloads) ++ metadata_options(media_profile) ++ quality_options(media_profile) ++ sponsorblock_options(media_profile) ++ - output_options(media_item_with_preloads) + output_options(media_item_with_preloads) ++ + config_file_options(media_item_with_preloads) {:ok, built_options} end @doc """ Builds the output path for yt-dlp to download media based on the given source's - media profile. + or media_item's media profile. Uses the source's override output path template if it exists. + + Accepts a %MediaItem{} or %Source{} struct. If a %Source{} struct is passed, it + will use a default %MediaItem{} struct with the given source. Returns binary() """ def build_output_path_for(%Source{} = source_with_preloads) do - output_path_template = source_with_preloads.media_profile.output_path_template - - build_output_path(output_path_template, source_with_preloads) + build_output_path_for(%MediaItem{source: source_with_preloads}) end - defp default_options do - [:no_progress, :windows_filenames] + def build_output_path_for(%MediaItem{} = media_item_with_preloads) do + output_path_template = Sources.output_path_template(media_item_with_preloads.source) + + build_output_path(output_path_template, media_item_with_preloads) + end + + @doc """ + Builds the quality options for yt-dlp to download media based on the given source's + or media_item's media profile. Useful for helping predict final filepath of downloaded + media. + + returns [Keyword.t()] + """ + def build_quality_options_for(%Source{} = source_with_preloads) do + build_quality_options_for(%MediaItem{source: source_with_preloads}) + end + + def build_quality_options_for(%MediaItem{} = media_item_with_preloads) do + media_profile = media_item_with_preloads.source.media_profile + + quality_options(media_profile) + end + + defp default_options(override_opts) do + overwrite_behaviour = Keyword.get(override_opts, :overwrite_behaviour, :force_overwrites) + + [ + :no_progress, + overwrite_behaviour, + # This makes the date metadata conform to what jellyfin expects + parse_metadata: "%(upload_date>%Y-%m-%d)s:(?P.+)" + ] end defp subtitle_options(media_profile) do @@ -56,6 +91,9 @@ defmodule Pinchflat.Downloading.DownloadOptionBuilder do {{:download_auto_subs, true}, %{download_subs: true}} -> acc ++ [:write_auto_subs] + {{:download_auto_subs, true}, %{embed_subs: true}} -> + acc ++ [:write_auto_subs] + {{:embed_subs, true}, %{preferred_resolution: pr}} when pr != :audio -> acc ++ [:embed_subs] @@ -104,17 +142,7 @@ defmodule Pinchflat.Downloading.DownloadOptionBuilder do end defp quality_options(media_profile) do - video_codec_options = "+codec:avc:m4a" - - case media_profile.preferred_resolution do - # Also be aware that :audio disabled all embedding options for subtitles - :audio -> [:extract_audio, format: "bestaudio[ext=m4a]"] - :"360p" -> [format_sort: "res:360,#{video_codec_options}"] - :"480p" -> [format_sort: "res:480,#{video_codec_options}"] - :"720p" -> [format_sort: "res:720,#{video_codec_options}"] - :"1080p" -> [format_sort: "res:1080,#{video_codec_options}"] - :"2160p" -> [format_sort: "res:2160,#{video_codec_options}"] - end + QualityOptionBuilder.build(media_profile) end defp sponsorblock_options(media_profile) do @@ -124,27 +152,64 @@ defmodule Pinchflat.Downloading.DownloadOptionBuilder do case {behaviour, categories} do {_, []} -> [] {:remove, _} -> [sponsorblock_remove: Enum.join(categories, ",")] + {:mark, _} -> [sponsorblock_mark: Enum.join(categories, ",")] {:disabled, _} -> [] end end + # This is put here instead of the CommandRunner module because it should only + # be applied to downloading - if it were in CommandRunner it would apply to + # all yt-dlp commands (like indexing) + defp config_file_options(media_item) do + base_dir = Path.join(Application.get_env(:pinchflat, :extras_directory), "yt-dlp-configs") + # Ordered by priority - the first file has the highest priority + filenames = [ + "media-item-#{media_item.id}-config.txt", + "source-#{media_item.source_id}-config.txt", + "media-profile-#{media_item.source.media_profile_id}-config.txt", + "base-config.txt" + ] + + config_filepaths = + Enum.reduce(filenames, [], fn filename, acc -> + filepath = Path.join(base_dir, filename) + + if FSUtils.exists_and_nonempty?(filepath) do + [filepath | acc] + else + acc + end + end) + + Enum.map(config_filepaths, fn filepath -> {:config_locations, filepath} end) + end + defp output_options(media_item_with_preloads) do [ - output: build_output_path_for(media_item_with_preloads.source) + output: build_output_path_for(media_item_with_preloads) ] end - defp build_output_path(string, source) do - additional_options_map = output_options_map(source) + defp build_output_path(string, media_item_with_preloads) do + additional_options_map = output_options_map(media_item_with_preloads) {:ok, output_path} = OutputPathBuilder.build(string, additional_options_map) Path.join(base_directory(), output_path) end - defp output_options_map(source) do + defp output_options_map(media_item_with_preloads) do + source = media_item_with_preloads.source + %{ + "media_item_id" => to_string(media_item_with_preloads.id), + "source_id" => to_string(source.id), + "media_profile_id" => to_string(source.media_profile_id), "source_custom_name" => source.custom_name, - "source_collection_type" => source.collection_type + "source_collection_id" => source.collection_id, + "source_collection_name" => source.collection_name, + "source_collection_type" => to_string(source.collection_type), + "media_playlist_index" => pad_int(media_item_with_preloads.playlist_index), + "media_upload_date_index" => pad_int(media_item_with_preloads.upload_date_index) } end @@ -152,13 +217,19 @@ defmodule Pinchflat.Downloading.DownloadOptionBuilder do # It's dependent on the output_path_template being a string ending `.{{ ext }}` # (or equivalent), but that's validated by the MediaProfile schema. defp determine_thumbnail_location(media_item_with_preloads) do - output_path_template = media_item_with_preloads.source.media_profile.output_path_template + output_path_template = Sources.output_path_template(media_item_with_preloads.source) output_path_template |> String.split(~r{\.}, include_captures: true) |> List.insert_at(-3, "-thumb") |> Enum.join() - |> build_output_path(media_item_with_preloads.source) + |> build_output_path(media_item_with_preloads) + end + + defp pad_int(integer, count \\ 2, padding \\ "0") do + integer + |> to_string() + |> String.pad_leading(count, padding) end defp base_directory do diff --git a/lib/pinchflat/downloading/downloading_helpers.ex b/lib/pinchflat/downloading/downloading_helpers.ex index 1e61545..eae187c 100644 --- a/lib/pinchflat/downloading/downloading_helpers.ex +++ b/lib/pinchflat/downloading/downloading_helpers.ex @@ -7,9 +7,13 @@ defmodule Pinchflat.Downloading.DownloadingHelpers do require Logger + use Pinchflat.Media.MediaQuery + + alias Pinchflat.Repo alias Pinchflat.Media alias Pinchflat.Tasks alias Pinchflat.Sources.Source + alias Pinchflat.Media.MediaItem alias Pinchflat.Downloading.MediaDownloadWorker @doc """ @@ -23,13 +27,15 @@ defmodule Pinchflat.Downloading.DownloadingHelpers do Returns :ok """ - def enqueue_pending_download_tasks(%Source{download_media: true} = source) do + def enqueue_pending_download_tasks(source, job_opts \\ []) + + def enqueue_pending_download_tasks(%Source{download_media: true} = source, job_opts) do source |> Media.list_pending_media_items_for() - |> Enum.each(&MediaDownloadWorker.kickoff_with_task/1) + |> Enum.each(&MediaDownloadWorker.kickoff_with_task(&1, %{}, job_opts)) end - def enqueue_pending_download_tasks(%Source{download_media: false}) do + def enqueue_pending_download_tasks(%Source{download_media: false}, _job_opts) do :ok end @@ -43,4 +49,54 @@ defmodule Pinchflat.Downloading.DownloadingHelpers do |> Media.list_pending_media_items_for() |> Enum.each(&Tasks.delete_pending_tasks_for/1) end + + @doc """ + Takes a single media item and enqueues a download job if the media should be + downloaded, based on the source's download settings and whether media is + considered pending. + + Returns {:ok, %Task{}} | {:error, :should_not_download} | {:error, any()} + """ + def kickoff_download_if_pending(%MediaItem{} = media_item, job_opts \\ []) do + media_item = Repo.preload(media_item, :source) + + if media_item.source.download_media && Media.pending_download?(media_item) do + Logger.info("Kicking off download for media item ##{media_item.id} (#{media_item.media_id})") + + MediaDownloadWorker.kickoff_with_task(media_item, %{}, job_opts) + else + {:error, :should_not_download} + end + end + + @doc """ + For a given source, enqueues download jobs for all media items _that have already been downloaded_. + + This is useful for when a source's download settings have changed and you want to run through all + existing media and retry the download. For instance, if the source didn't originally download thumbnails + and you've changed the source to download them, you can use this to download all the thumbnails for + existing media items. + + NOTE: does not delete existing files whatsoever. Does not overwrite the existing media file if it exists + at the location it expects. Will cause a full redownload of everything if the output template has changed + + NOTE: unrelated to the MediaQualityUpgradeWorker, which is for redownloading media items for quality upgrades + or improved sponsorblock segments + + Returns [{:ok, %Task{}} | {:error, any()}] + """ + def kickoff_redownload_for_existing_media(%Source{} = source) do + MediaQuery.new() + |> MediaQuery.require_assoc(:media_profile) + |> where( + ^dynamic( + [m, s, mp], + ^MediaQuery.for_source(source) and + ^MediaQuery.downloaded() and + not (^MediaQuery.download_prevented()) + ) + ) + |> Repo.all() + |> Enum.map(&MediaDownloadWorker.kickoff_with_task/1) + end end diff --git a/lib/pinchflat/downloading/media_download_worker.ex b/lib/pinchflat/downloading/media_download_worker.ex index 17ab5b5..a0fbceb 100644 --- a/lib/pinchflat/downloading/media_download_worker.ex +++ b/lib/pinchflat/downloading/media_download_worker.ex @@ -3,8 +3,9 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do use Oban.Worker, queue: :media_fetching, + priority: 5, unique: [period: :infinity, states: [:available, :scheduled, :retryable, :executing]], - tags: ["media_item", "media_fetching"] + tags: ["media_item", "media_fetching", "show_in_dashboard"] require Logger @@ -12,8 +13,11 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do alias Pinchflat.Tasks alias Pinchflat.Repo alias Pinchflat.Media + alias Pinchflat.Media.FileSyncing alias Pinchflat.Downloading.MediaDownloader + alias Pinchflat.Lifecycle.UserScripts.CommandRunner, as: UserScriptRunner + @doc """ Starts the media_item media download worker and creates a task for the media_item. @@ -31,18 +35,23 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do Does not download media if its source is set to not download media (unless forced). - Returns :ok | {:ok, %MediaItem{}} | {:error, any, ...any} + Options: + - `force`: force download even if the source is set to not download media. Fully + re-downloads media, including the video + - `quality_upgrade?`: re-downloads media, including the video. Does not force download + if the source is set to not download media + + Returns :ok | {:error, any, ...any} """ @impl Oban.Worker def perform(%Oban.Job{args: %{"id" => media_item_id} = args}) do - media_item = - media_item_id - |> Media.get_media_item!() - |> Repo.preload(:source) + should_force = Map.get(args, "force", false) + is_quality_upgrade = Map.get(args, "quality_upgrade?", false) - # If the source or media item is set to not download media, perform a no-op unless forced - if (media_item.source.download_media && !media_item.prevent_download) || args["force"] do - download_media_and_schedule_jobs(media_item) + media_item = fetch_and_run_prevent_download_user_script(media_item_id) + + if should_download_media?(media_item, should_force, is_quality_upgrade) do + download_media_and_schedule_jobs(media_item, is_quality_upgrade, should_force) else :ok end @@ -51,28 +60,95 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do Ecto.StaleEntryError -> Logger.info("#{__MODULE__} discarded: media item #{media_item_id} stale") end - defp download_media_and_schedule_jobs(media_item) do - case MediaDownloader.download_for_media_item(media_item) do - {:ok, updated_media_item} -> - compute_and_save_media_filesize(updated_media_item) + # If this is a quality upgrade, only check if the source is set to download media + # or that the media item's download hasn't been prevented + defp should_download_media?(media_item, should_force, true = _is_quality_upgrade) do + (media_item.source.download_media && !media_item.prevent_download) || should_force + end - {:ok, updated_media_item} + # If it's not a quality upgrade, additionally check if the media item is pending download + defp should_download_media?(media_item, should_force, _is_quality_upgrade) do + source = media_item.source + is_pending = Media.pending_download?(media_item) - {:recovered, _} -> + (is_pending && source.download_media && !media_item.prevent_download) || should_force + end + + # If a user script exists and, when run, returns a non-zero exit code, prevent this and all future downloads + # of the media item. + defp fetch_and_run_prevent_download_user_script(media_item_id) do + media_item = Media.get_media_item!(media_item_id) + + {:ok, media_item} = + case run_user_script(:media_pre_download, media_item) do + {:ok, _, exit_code} when exit_code != 0 -> Media.update_media_item(media_item, %{prevent_download: true}) + _ -> {:ok, media_item} + end + + Repo.preload(media_item, :source) + end + + defp download_media_and_schedule_jobs(media_item, is_quality_upgrade, should_force) do + overwrite_behaviour = if should_force || is_quality_upgrade, do: :force_overwrites, else: :no_force_overwrites + override_opts = [overwrite_behaviour: overwrite_behaviour] + + case MediaDownloader.download_for_media_item(media_item, override_opts) do + {:ok, downloaded_media_item} -> + {:ok, updated_media_item} = + Media.update_media_item(downloaded_media_item, %{ + media_size_bytes: compute_media_filesize(downloaded_media_item), + media_redownloaded_at: get_redownloaded_at(is_quality_upgrade) + }) + + :ok = FileSyncing.delete_outdated_files(media_item, updated_media_item) + run_user_script(:media_downloaded, updated_media_item) + + :ok + + {:recovered, _media_item, _message} -> {:error, :retry} - {:error, _message} -> - {:error, :download_failed} + {:error, :unsuitable_for_download, _message} -> + {:ok, :non_retry} + + {:error, _error_atom, message} -> + action_on_error(message) end end - defp compute_and_save_media_filesize(media_item) do + defp compute_media_filesize(media_item) do case File.stat(media_item.media_filepath) do - {:ok, %{size: size}} -> - Media.update_media_item(media_item, %{media_size_bytes: size}) - - _ -> - :ok + {:ok, %{size: size}} -> size + _ -> nil end end + + defp get_redownloaded_at(true), do: DateTime.utc_now() + defp get_redownloaded_at(_), do: nil + + defp action_on_error(message) do + # This will attempt re-download at the next indexing, but it won't be retried + # immediately as part of job failure logic + non_retryable_errors = [ + "Video unavailable", + "Sign in to confirm", + "This video is available to this channel's members" + ] + + if String.contains?(to_string(message), non_retryable_errors) do + Logger.error("yt-dlp download will not be retried: #{inspect(message)}") + + {:ok, :non_retry} + else + {:error, :download_failed} + end + end + + # NOTE: I like this pattern of using the default value so that I don't have to + # define it in config.exs (and friends). Consider using this elsewhere. + defp run_user_script(event, media_item) do + runner = Application.get_env(:pinchflat, :user_script_runner, UserScriptRunner) + + runner.run(event, media_item) + end end diff --git a/lib/pinchflat/downloading/media_downloader.ex b/lib/pinchflat/downloading/media_downloader.ex index 8d5be4d..1a1ee2f 100644 --- a/lib/pinchflat/downloading/media_downloader.ex +++ b/lib/pinchflat/downloading/media_downloader.ex @@ -9,53 +9,102 @@ defmodule Pinchflat.Downloading.MediaDownloader do alias Pinchflat.Repo alias Pinchflat.Media + alias Pinchflat.Sources alias Pinchflat.Media.MediaItem + alias Pinchflat.Utils.StringUtils alias Pinchflat.Metadata.NfoBuilder alias Pinchflat.Metadata.MetadataParser alias Pinchflat.Metadata.MetadataFileHelpers - alias Pinchflat.Filesystem.FilesystemHelpers + alias Pinchflat.Utils.FilesystemUtils alias Pinchflat.Downloading.DownloadOptionBuilder alias Pinchflat.YtDlp.Media, as: YtDlpMedia @doc """ Downloads media for a media item, updating the media item based on the metadata - returned by yt-dlp. Also saves the entire metadata response to the associated - media_metadata record. + returned by yt-dlp. Encountered errors are saved to the Media Item record. Saves + the entire metadata response to the associated media_metadata record. - NOTE: related methods (like the download worker) won't download if the media item's source + NOTE: related methods (like the download worker) won't download if Pthe media item's source is set to not download media. However, I'm not enforcing that here since I need this for testing. This may change in the future but I'm not stressed. - Returns {:ok, %MediaItem{}} | {:error, any, ...any} + Returns {:ok, %MediaItem{}} | {:error, atom(), String.t()} | {:recovered, %MediaItem{}, String.t()} """ - def download_for_media_item(%MediaItem{} = media_item) do - output_filepath = FilesystemHelpers.generate_metadata_tmpfile(:json) + def download_for_media_item(%MediaItem{} = media_item, override_opts \\ []) do + case attempt_download_and_update_for_media_item(media_item, override_opts) do + {:ok, media_item} -> + # Returns {:ok, %MediaItem{}} + Media.update_media_item(media_item, %{last_error: nil}) + + {:error, error_atom, message} -> + Media.update_media_item(media_item, %{last_error: StringUtils.wrap_string(message)}) + + {:error, error_atom, message} + + {:recovered, media_item, message} -> + {:ok, updated_media_item} = Media.update_media_item(media_item, %{last_error: StringUtils.wrap_string(message)}) + + {:recovered, updated_media_item, message} + end + end + + # Looks complicated, but here's the key points: + # - download_with_options runs a pre-check to see if the media item is suitable for download. + # - If the media item fails the precheck, it returns {:error, :unsuitable_for_download, message} + # - However, if the precheck fails in a way that we think can be fixed by using cookies, we retry with cookies + # and return the result of that + # - If the precheck passes but the download fails, it normally returns {:error, :download_failed, message} + # - However, there are some errors we can recover from (eg: failure to communicate with SponsorBlock). + # In this case, we attempt the download anyway and update the media item with what details we do have. + # This case returns {:recovered, updated_media_item, message} + # - If we attempt a retry but it fails, we return {:error, :unrecoverable, message} + # - If there is an unknown error unrelated to the above, we return {:error, :unknown, message} + # - Finally, if there is no error, we update the media item with the parsed JSON and return {:ok, updated_media_item} + # + # Restated, here are the return values for each case: + # - On success: {:ok, updated_media_item} + # - On initial failure but successfully recovered: {:recovered, updated_media_item, message} + # - On error: {:error, error_atom, message} where error_atom is one of: + # - `:unsuitable_for_download` if the media item fails the precheck + # - `:unrecoverable` if there was an initial failure and the recovery attempt failed + # - `:download_failed` for all other yt-dlp-related downloading errors + # - `:unknown` for any other errors, including those not related to yt-dlp + # - If we retry using cookies, all of the above return values apply. The cookie retry + # logic is handled transparently as far as the caller is concerned + defp attempt_download_and_update_for_media_item(media_item, override_opts) do + output_filepath = FilesystemUtils.generate_metadata_tmpfile(:json) media_with_preloads = Repo.preload(media_item, [:metadata, source: :media_profile]) - case download_with_options(media_item.original_url, media_with_preloads, output_filepath) do + case download_with_options(media_item.original_url, media_with_preloads, output_filepath, override_opts) do {:ok, parsed_json} -> update_media_item_from_parsed_json(media_with_preloads, parsed_json) + {:error, :unsuitable_for_download} -> + message = + "Media item ##{media_with_preloads.id} isn't suitable for download yet. May be an active or processing live stream" + + Logger.warning(message) + + {:error, :unsuitable_for_download, message} + {:error, message, _exit_code} -> Logger.error("yt-dlp download error for media item ##{media_with_preloads.id}: #{inspect(message)}") if String.contains?(to_string(message), recoverable_errors()) do - attempt_update_media_item(media_with_preloads, output_filepath) - - {:recovered, message} + attempt_recovery_from_error(media_with_preloads, output_filepath, message) else - {:error, message} + {:error, :download_failed, message} end err -> Logger.error("Unknown error downloading media item ##{media_with_preloads.id}: #{inspect(err)}") - {:error, "Unknown error: #{inspect(err)}"} + {:error, :unknown, "Unknown error: #{inspect(err)}"} end end - defp attempt_update_media_item(media_with_preloads, output_filepath) do + defp attempt_recovery_from_error(media_with_preloads, output_filepath, error_message) do with {:ok, contents} <- File.read(output_filepath), {:ok, parsed_json} <- Phoenix.json_library().decode(contents) do Logger.info(""" @@ -64,12 +113,13 @@ defmodule Pinchflat.Downloading.MediaDownloader do anyway """) - update_media_item_from_parsed_json(media_with_preloads, parsed_json) + {:ok, updated_media_item} = update_media_item_from_parsed_json(media_with_preloads, parsed_json) + {:recovered, updated_media_item, error_message} else err -> Logger.error("Unable to recover error for media item ##{media_with_preloads.id}: #{inspect(err)}") - {:error, :retry_failed} + {:error, :unrecoverable, error_message} end end @@ -79,12 +129,13 @@ defmodule Pinchflat.Downloading.MediaDownloader do |> MetadataParser.parse_for_media_item() |> Map.merge(%{ media_downloaded_at: DateTime.utc_now(), + culled_at: nil, nfo_filepath: determine_nfo_filepath(media_with_preloads, parsed_json), metadata: %{ # IDEA: might be worth kicking off a job for this since thumbnail fetching # could fail and I want to handle that in isolation metadata_filepath: MetadataFileHelpers.compress_and_store_metadata_for(media_with_preloads, parsed_json), - thumbnail_filepath: MetadataFileHelpers.download_and_store_thumbnail_for(media_with_preloads, parsed_json) + thumbnail_filepath: MetadataFileHelpers.download_and_store_thumbnail_for(media_with_preloads) } }) @@ -103,10 +154,51 @@ defmodule Pinchflat.Downloading.MediaDownloader do end end - defp download_with_options(url, item_with_preloads, output_filepath) do - {:ok, options} = DownloadOptionBuilder.build(item_with_preloads) + defp download_with_options(url, item_with_preloads, output_filepath, override_opts) do + {:ok, options} = DownloadOptionBuilder.build(item_with_preloads, override_opts) + force_use_cookies = Keyword.get(override_opts, :force_use_cookies, false) + source_uses_cookies = Sources.use_cookies?(item_with_preloads.source, :downloading) + should_use_cookies = force_use_cookies || source_uses_cookies - YtDlpMedia.download(url, options, output_filepath: output_filepath) + runner_opts = [output_filepath: output_filepath, use_cookies: should_use_cookies] + + case {YtDlpMedia.get_downloadable_status(url, use_cookies: should_use_cookies), should_use_cookies} do + {{:ok, :downloadable}, _} -> + YtDlpMedia.download(url, options, runner_opts) + + {{:ok, :ignorable}, _} -> + {:error, :unsuitable_for_download} + + {{:error, _message, _exit_code} = err, false} -> + # If there was an error and we don't have cookies, this method will retry with cookies + # if doing so would help AND the source allows. Otherwise, it will return the error as-is + maybe_retry_with_cookies(url, item_with_preloads, output_filepath, override_opts, err) + + # This gets hit if cookies are enabled which, importantly, also covers the case where we + # retry a download with cookies and it fails again + {{:error, message, exit_code}, true} -> + {:error, message, exit_code} + + {err, _} -> + err + end + end + + defp maybe_retry_with_cookies(url, item_with_preloads, output_filepath, override_opts, err) do + {:error, message, _} = err + source = item_with_preloads.source + message_contains_cookie_error = String.contains?(to_string(message), recoverable_cookie_errors()) + + if Sources.use_cookies?(source, :error_recovery) && message_contains_cookie_error do + download_with_options( + url, + item_with_preloads, + output_filepath, + Keyword.put(override_opts, :force_use_cookies, true) + ) + else + err + end end defp recoverable_errors do @@ -114,4 +206,11 @@ defmodule Pinchflat.Downloading.MediaDownloader do "Unable to communicate with SponsorBlock" ] end + + defp recoverable_cookie_errors do + [ + "Sign in to confirm", + "This video is available to this channel's members" + ] + end end diff --git a/lib/pinchflat/downloading/media_quality_upgrade_worker.ex b/lib/pinchflat/downloading/media_quality_upgrade_worker.ex new file mode 100644 index 0000000..8fcfb22 --- /dev/null +++ b/lib/pinchflat/downloading/media_quality_upgrade_worker.ex @@ -0,0 +1,33 @@ +defmodule Pinchflat.Downloading.MediaQualityUpgradeWorker do + @moduledoc false + + use Oban.Worker, + queue: :media_fetching, + unique: [period: :infinity, states: [:available, :scheduled, :retryable, :executing]], + tags: ["media_item", "media_fetching", "show_in_dashboard"] + + require Logger + + alias Pinchflat.Media + alias Pinchflat.Downloading.MediaDownloadWorker + + @doc """ + Redownloads media items that are eligible for redownload for the purpose + of upgrading the quality of the media or improving things like sponsorblock + segments. + + This worker is scheduled to run daily via the Oban Cron plugin + and it should run _after_ the retention worker. + + Returns :ok + """ + @impl Oban.Worker + def perform(%Oban.Job{}) do + upgradable_media = Media.list_upgradeable_media_items() + Logger.info("Redownloading #{length(upgradable_media)} media items") + + Enum.each(upgradable_media, fn media_item -> + MediaDownloadWorker.kickoff_with_task(media_item, %{quality_upgrade?: true}) + end) + end +end diff --git a/lib/pinchflat/downloading/media_retention_worker.ex b/lib/pinchflat/downloading/media_retention_worker.ex index 3e5c0d0..461ac1c 100644 --- a/lib/pinchflat/downloading/media_retention_worker.ex +++ b/lib/pinchflat/downloading/media_retention_worker.ex @@ -2,12 +2,15 @@ defmodule Pinchflat.Downloading.MediaRetentionWorker do @moduledoc false use Oban.Worker, - queue: :local_metadata, + queue: :local_data, unique: [period: :infinity, states: [:available, :scheduled, :retryable, :executing]], - tags: ["media_item", "local_metadata"] + tags: ["media_item", "local_data"] + + use Pinchflat.Media.MediaQuery require Logger + alias Pinchflat.Repo alias Pinchflat.Media @doc """ @@ -20,14 +23,53 @@ defmodule Pinchflat.Downloading.MediaRetentionWorker do """ @impl Oban.Worker def perform(%Oban.Job{}) do - cullable_media = Media.list_cullable_media_items() + cull_cullable_media_items() + delete_media_items_from_before_cutoff() + + :ok + end + + defp cull_cullable_media_items do + cullable_media = + MediaQuery.new() + |> MediaQuery.require_assoc(:source) + |> where(^MediaQuery.cullable()) + |> Repo.all() + Logger.info("Culling #{length(cullable_media)} media items past their retention date") Enum.each(cullable_media, fn media_item -> + # Setting `prevent_download` does what it says on the tin, but `culled_at` is purely informational. + # We don't actually do anything with that in terms of queries and it gets set to nil if the media item + # gets re-downloaded. Media.delete_media_files(media_item, %{ prevent_download: true, culled_at: DateTime.utc_now() }) end) end + + # NOTE: Since this is a date and not a datetime, we can't add logic to have to-the-minute + # comparison like we can with retention periods. We can only compare to the day. + defp delete_media_items_from_before_cutoff do + deletable_media = + MediaQuery.new() + |> MediaQuery.require_assoc(:source) + |> where(^MediaQuery.deletable_based_on_source_cutoff()) + |> Repo.all() + + Logger.info("Deleting #{length(deletable_media)} media items that are from before the source cutoff") + + Enum.each(deletable_media, fn media_item -> + # Note that I'm not setting `prevent_download` on the media_item here. + # That's because cutoff_date can easily change and it's a valid behavior to re-download older + # media items if the cutoff_date changes. + # Download is ultimately prevented because `MediaQuery.pending()` only returns media items + # from after the cutoff date (among other things), so it's not like the media will just immediately + # be re-downloaded. + Media.delete_media_files(media_item, %{ + culled_at: DateTime.utc_now() + }) + end) + end end diff --git a/lib/pinchflat/downloading/output_path_builder.ex b/lib/pinchflat/downloading/output_path_builder.ex index 89d3f6c..1c1392f 100644 --- a/lib/pinchflat/downloading/output_path_builder.ex +++ b/lib/pinchflat/downloading/output_path_builder.ex @@ -9,13 +9,23 @@ defmodule Pinchflat.Downloading.OutputPathBuilder do Builds the actual final filepath from a given template. Optionally, you can pass in a map of additional options to be used in the template. + Custom options are recursively expanded _once_ so you can nest custom options + one-deep if needed. + Translates liquid-style templates into yt-dlp-style templates, leaving yt-dlp syntax intact. """ def build(template_string, additional_template_options \\ %{}) do combined_options = Map.merge(custom_yt_dlp_option_map(), additional_template_options) - TemplateParser.parse(template_string, combined_options, &identifier_fn/2) + expanded_options = + Enum.map(combined_options, fn {key, value} -> + {:ok, parse_result} = TemplateParser.parse(value, combined_options, &identifier_fn/2) + + {key, parse_result} + end) + + TemplateParser.parse(template_string, Map.new(expanded_options), &identifier_fn/2) end # The `nil` case simply wraps the identifier in yt-dlp-style syntax. This assumes that @@ -43,7 +53,13 @@ defmodule Pinchflat.Downloading.OutputPathBuilder do "upload_yyyy_mm_dd" => "%(upload_date>%Y-%m-%d)S", "season_from_date" => "%(upload_date>%Y)S", "season_episode_from_date" => "s%(upload_date>%Y)Se%(upload_date>%m%d)S", - "artist_name" => "%(artist,creator,uploader,uploader_id)S" + "season_episode_index_from_date" => "s%(upload_date>%Y)Se%(upload_date>%m%d)S{{ media_upload_date_index }}", + "artist_name" => "%(artist,creator,uploader,uploader_id)S", + "static_season__episode_by_index" => "Season 1/s01e{{ media_playlist_index }}", + "static_season__episode_by_date" => "Season 1/s01e%(upload_date>%y%m%d)S", + "season_by_year__episode_by_date" => "Season %(upload_date>%Y)S/s%(upload_date>%Y)Se%(upload_date>%m%d)S", + "season_by_year__episode_by_date_and_index" => + "Season %(upload_date>%Y)S/s%(upload_date>%Y)Se%(upload_date>%m%d)S{{ media_upload_date_index }}" } end end diff --git a/lib/pinchflat/downloading/quality_option_builder.ex b/lib/pinchflat/downloading/quality_option_builder.ex new file mode 100644 index 0000000..cb89435 --- /dev/null +++ b/lib/pinchflat/downloading/quality_option_builder.ex @@ -0,0 +1,66 @@ +defmodule Pinchflat.Downloading.QualityOptionBuilder do + @moduledoc """ + A standalone builder module for building quality-related options for yt-dlp to download media. + + Currently exclusively used in DownloadOptionBuilder since this logic is too complex to just + place in the main module. + """ + + alias Pinchflat.Settings + alias Pinchflat.Profiles.MediaProfile + + @doc """ + Builds the quality-related options for yt-dlp to download media based on the given media profile + + Includes things like container, preferred format/codec, and audio track options. + """ + def build(%MediaProfile{preferred_resolution: :audio, media_container: container} = media_profile) do + acodec = Settings.get!(:audio_codec_preference) + + [ + :extract_audio, + format_sort: "+acodec:#{acodec}", + audio_format: container || "best", + format: build_format_string(media_profile) + ] + end + + def build(%MediaProfile{preferred_resolution: resolution_atom, media_container: container} = media_profile) do + vcodec = Settings.get!(:video_codec_preference) + acodec = Settings.get!(:audio_codec_preference) + {resolution_string, _} = resolution_atom |> Atom.to_string() |> Integer.parse() + + [ + # Since Plex doesn't support reading metadata from MKV + remux_video: container || "mp4", + format_sort: "res:#{resolution_string},+codec:#{vcodec}:#{acodec}", + format: build_format_string(media_profile) + ] + end + + defp build_format_string(%MediaProfile{preferred_resolution: :audio, audio_track: audio_track}) do + if audio_track do + "bestaudio[#{build_format_modifier(audio_track)}]/bestaudio/best" + else + "bestaudio/best" + end + end + + defp build_format_string(%MediaProfile{audio_track: audio_track}) do + if audio_track do + "bestvideo+bestaudio[#{build_format_modifier(audio_track)}]/bestvideo*+bestaudio/best" + else + "bestvideo*+bestaudio/best" + end + end + + # Reminder to self: this conflicts with `--extractor-args "youtube:lang="` + # since that will translate the format_notes as well, which means they may not match. + # At least that's what happens now - worth a re-check if I have to come back to this + defp build_format_modifier("original"), do: "format_note*=original" + defp build_format_modifier("default"), do: "format_note*='(default)'" + # This uses the carat to anchor the language to the beginning of the string + # since that's what's needed to match `en` to `en-US` and `en-GB`, etc. The user + # can always specify the full language code if they want. + defp build_format_modifier(language_code), do: "language^=#{language_code}" +end diff --git a/lib/pinchflat/fast_indexing/fast_indexing_helpers.ex b/lib/pinchflat/fast_indexing/fast_indexing_helpers.ex index a38ccf7..15a4342 100644 --- a/lib/pinchflat/fast_indexing/fast_indexing_helpers.ex +++ b/lib/pinchflat/fast_indexing/fast_indexing_helpers.ex @@ -5,74 +5,105 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpers do Many of these methods are made to be kickoff or be consumed by workers. """ + require Logger + + use Pinchflat.Media.MediaQuery + alias Pinchflat.Repo alias Pinchflat.Media + alias Pinchflat.Tasks + alias Pinchflat.Sources alias Pinchflat.Sources.Source - alias Pinchflat.Media.MediaQuery alias Pinchflat.FastIndexing.YoutubeRss - alias Pinchflat.Downloading.MediaDownloadWorker - alias Pinchflat.FastIndexing.MediaIndexingWorker + alias Pinchflat.FastIndexing.YoutubeApi + alias Pinchflat.Downloading.DownloadingHelpers + alias Pinchflat.FastIndexing.FastIndexingWorker + alias Pinchflat.Downloading.DownloadOptionBuilder alias Pinchflat.YtDlp.Media, as: YtDlpMedia @doc """ - Fetches new media IDs from a source's YouTube RSS feed and kicks off indexing tasks - for any new media items. See comments in `MediaIndexingWorker` for more info on the - order of operations and how this fits into the indexing process. + Kicks off a new fast indexing task for a source. This will delete any existing fast indexing + tasks for the source before starting a new one. - Despite the similar name to `kickoff_fast_indexing_task`, this does work differently. - `kickoff_fast_indexing_task` starts a task that _calls_ this function whereas this - function starts individual indexing tasks for each new media item. I think it does - make sense grammatically, but I could see how that's confusing. - - Returns :ok + Returns {:ok, %Task{}} """ - def kickoff_indexing_tasks_from_youtube_rss_feed(%Source{} = source) do - {:ok, media_ids} = YoutubeRss.get_recent_media_ids_from_rss(source) - existing_media_items = list_media_items_by_media_id_for(source, media_ids) - new_media_ids = media_ids -- Enum.map(existing_media_items, & &1.media_id) - - Enum.each(new_media_ids, fn media_id -> - url = "https://www.youtube.com/watch?v=#{media_id}" - - MediaIndexingWorker.kickoff_with_task(source, url) - end) + def kickoff_indexing_task(%Source{} = source) do + Tasks.delete_pending_tasks_for(source, "FastIndexingWorker", include_executing: true) + FastIndexingWorker.kickoff_with_task(source) end @doc """ - Indexes a single media item for a source and enqueues a download job if the - media should be downloaded. This method creates the media item record so it's - the one-stop-shop for adding a media item (and possibly downloading it) just - by a URL and source. + Fetches new media IDs for a source from YT's API or RSS, indexes them, and kicks off downloading + tasks for any pending media items. See comments in `FastIndexingWorker` for more info on the + order of operations and how this fits into the indexing process. - Returns {:ok, media_item} | {:error, any()} + Returns [%MediaItem{}] where each item is a new media item that was created _but not necessarily + downloaded_. """ - def index_and_enqueue_download_for_media_item(%Source{} = source, url) do - maybe_media_item = create_media_item_from_url(source, url) + def index_and_kickoff_downloads(%Source{} = source) do + # The media_profile is needed to determine the quality options to _then_ determine a more + # accurate predicted filepath + source = Repo.preload(source, [:media_profile]) - case maybe_media_item do - {:ok, media_item} -> - if source.download_media && Media.pending_download?(media_item) do - MediaDownloadWorker.kickoff_with_task(media_item) + {:ok, media_ids} = get_recent_media_ids(source) + existing_media_items = list_media_items_by_media_id_for(source, media_ids) + new_media_ids = media_ids -- Enum.map(existing_media_items, & &1.media_id) + + maybe_new_media_items = + Enum.map(new_media_ids, fn media_id -> + case create_media_item_from_media_id(source, media_id) do + {:ok, media_item} -> + DownloadingHelpers.kickoff_download_if_pending(media_item, priority: 0) + media_item + + err -> + Logger.error("Error creating media item '#{media_id}' from URL: #{inspect(err)}") + nil end + end) - {:ok, media_item} + # Pick up any stragglers. Intentionally has a lower priority than the per-media item + # kickoff above + DownloadingHelpers.enqueue_pending_download_tasks(source, priority: 1) - err -> - err + Enum.filter(maybe_new_media_items, & &1) + end + + # If possible, use the YouTube API to fetch media IDs. If that fails, fall back to the RSS feed. + # If the YouTube API isn't set up, just use the RSS feed. + defp get_recent_media_ids(source) do + with true <- YoutubeApi.enabled?(), + {:ok, media_ids} <- YoutubeApi.get_recent_media_ids(source) do + {:ok, media_ids} + else + _ -> YoutubeRss.get_recent_media_ids(source) end end defp list_media_items_by_media_id_for(source, media_ids) do MediaQuery.new() - |> MediaQuery.for_source(source) - |> MediaQuery.with_media_ids(media_ids) + |> where(^dynamic([mi], ^MediaQuery.for_source(source) and mi.media_id in ^media_ids)) |> Repo.all() end - defp create_media_item_from_url(source, url) do - {:ok, media_attrs} = YtDlpMedia.get_media_attributes(url) + defp create_media_item_from_media_id(source, media_id) do + url = "https://www.youtube.com/watch?v=#{media_id}" + # This is set to :metadata instead of :indexing since this happens _after_ the + # actual indexing process. In reality, slow indexing is the only thing that + # should be using :indexing. + should_use_cookies = Sources.use_cookies?(source, :metadata) - Media.create_media_item_from_backend_attrs(source, media_attrs) + command_opts = + [output: DownloadOptionBuilder.build_output_path_for(source)] ++ + DownloadOptionBuilder.build_quality_options_for(source) + + case YtDlpMedia.get_media_attributes(url, command_opts, use_cookies: should_use_cookies) do + {:ok, media_attrs} -> + Media.create_media_item_from_backend_attrs(source, media_attrs) + + err -> + err + end end end diff --git a/lib/pinchflat/fast_indexing/fast_indexing_worker.ex b/lib/pinchflat/fast_indexing/fast_indexing_worker.ex index 1e0f700..ed83bf3 100644 --- a/lib/pinchflat/fast_indexing/fast_indexing_worker.ex +++ b/lib/pinchflat/fast_indexing/fast_indexing_worker.ex @@ -4,15 +4,18 @@ defmodule Pinchflat.FastIndexing.FastIndexingWorker do use Oban.Worker, queue: :fast_indexing, unique: [period: :infinity, states: [:available, :scheduled, :retryable]], - tags: ["media_source", "fast_indexing"] + tags: ["media_source", "fast_indexing", "show_in_dashboard"] require Logger alias __MODULE__ alias Pinchflat.Tasks + alias Pinchflat.Media alias Pinchflat.Sources + alias Pinchflat.Settings alias Pinchflat.Sources.Source alias Pinchflat.FastIndexing.FastIndexingHelpers + alias Pinchflat.Lifecycle.Notifications.SourceNotifications @doc """ Starts the source fast indexing worker and creates a task for the source. @@ -26,9 +29,21 @@ defmodule Pinchflat.FastIndexing.FastIndexingWorker do end @doc """ - Kicks off the fast indexing process for a source, reschedules the job to run again - once complete. See `MediaCollectionIndexingWorker` and `MediaIndexingWorker` comments - for more + Similar to `MediaCollectionIndexingWorker`, but for working with RSS feeds. + `MediaCollectionIndexingWorker` should be preferred in general, but this is + useful for downloading small batches of media items via fast indexing. + + Only kicks off downloads for media that _should_ be downloaded + (ie: the source is set to download and the media matches the profile's format preferences) + + Order of operations: + 1. FastIndexingWorker (this module) periodically checks the YouTube RSS feed for new media. + with `FastIndexingHelpers.index_and_kickoff_downloads` + 2. If the above `index_and_kickoff_downloads` finds new media items in the RSS feed, + it indexes them with a yt-dlp call to create the media item records then kicks off downloading + tasks (MediaDownloadWorker) for any new media items _that should be downloaded_. + 3. Once downloads are kicked off, this worker sends a notification to the apprise server if applicable + then reschedules itself to run again in the future. Returns :ok | {:ok, :job_exists} | {:ok, %Task{}} """ @@ -37,8 +52,7 @@ defmodule Pinchflat.FastIndexing.FastIndexingWorker do source = Sources.get_source!(source_id) if source.fast_index do - FastIndexingHelpers.kickoff_indexing_tasks_from_youtube_rss_feed(source) - + perform_indexing_and_send_notification(source) reschedule_indexing(source) else :ok @@ -48,6 +62,19 @@ defmodule Pinchflat.FastIndexing.FastIndexingWorker do Ecto.StaleEntryError -> Logger.info("#{__MODULE__} discarded: source #{source_id} stale") end + defp perform_indexing_and_send_notification(source) do + apprise_server = Settings.get!(:apprise_server) + + new_media_items = + source + |> FastIndexingHelpers.index_and_kickoff_downloads() + |> Enum.filter(&Media.pending_download?(&1)) + + if source.download_media do + SourceNotifications.send_new_media_notification(apprise_server, source, length(new_media_items)) + end + end + defp reschedule_indexing(source) do next_run_in = Source.fast_index_frequency() * 60 diff --git a/lib/pinchflat/fast_indexing/media_indexing_worker.ex b/lib/pinchflat/fast_indexing/media_indexing_worker.ex deleted file mode 100644 index c4da1e1..0000000 --- a/lib/pinchflat/fast_indexing/media_indexing_worker.ex +++ /dev/null @@ -1,69 +0,0 @@ -defmodule Pinchflat.FastIndexing.MediaIndexingWorker do - @moduledoc false - - use Oban.Worker, - queue: :media_indexing, - unique: [period: :infinity, states: [:available, :scheduled, :retryable]], - tags: ["media_source", "media_indexing"] - - require Logger - - alias __MODULE__ - alias Pinchflat.Tasks - alias Pinchflat.Sources - alias Pinchflat.FastIndexing.FastIndexingHelpers - - @doc """ - Starts the fast media indexing worker and creates a task for the source. - - Returns {:ok, %Task{}} | {:error, :duplicate_job} | {:error, %Ecto.Changeset{}} - """ - def kickoff_with_task(source, media_url, opts \\ []) do - %{id: source.id, media_url: media_url} - |> MediaIndexingWorker.new(opts) - |> Tasks.create_job_with_task(source) - end - - @doc """ - Similar to `MediaCollectionIndexingWorker`, but for individual media items. - Does not reschedule or check anything to do with a source's indexing - frequency - only collects initial metadata then kicks off a download. - `MediaCollectionIndexingWorker` should be preferred in general, but this is - useful for downloading one-off media items based on a URL (like for fast indexing). - - Only downloads media that _should_ be downloaded (ie: the source is set to download - and the media matches the profile's format preferences) - - Order of operations: - 1. FastIndexingHelpers.kickoff_indexing_tasks_from_youtube_rss_feed/1 (which is running - in its own worker) periodically checks the YouTube RSS feed for new media - 2. If new media is found, it enqueues a MediaIndexingWorker (this module) for each new media - item - 3. This worker fetches the media metadata and uses that to determine if it should be - downloaded. If so, it enqueues a MediaDownloadWorker - - Each is a worker because they all either need to be scheduled periodically or call out to - an external service and will be long-running. They're split into different jobs to separate - retry logic for each step and allow us to better optimize various queues (eg: the indexing - steps can keep running while the slow download steps are worked through). - - Returns :ok - """ - @impl Oban.Worker - def perform(%Oban.Job{args: %{"id" => source_id, "media_url" => media_url}}) do - source = Sources.get_source!(source_id) - - case FastIndexingHelpers.index_and_enqueue_download_for_media_item(source, media_url) do - {:ok, media_item} -> - Logger.debug("Indexed and enqueued download for url: #{media_url} (media item: #{media_item.id})") - - {:error, reason} -> - Logger.debug("Failed to index and enqueue download for url: #{media_url} (reason: #{inspect(reason)})") - end - - :ok - rescue - Ecto.NoResultsError -> Logger.info("#{__MODULE__} discarded: source #{source_id} not found") - Ecto.StaleEntryError -> Logger.info("#{__MODULE__} discarded: source #{source_id} stale") - end -end diff --git a/lib/pinchflat/fast_indexing/youtube_api.ex b/lib/pinchflat/fast_indexing/youtube_api.ex new file mode 100644 index 0000000..04c7326 --- /dev/null +++ b/lib/pinchflat/fast_indexing/youtube_api.ex @@ -0,0 +1,131 @@ +defmodule Pinchflat.FastIndexing.YoutubeApi do + @moduledoc """ + Methods for interacting with the YouTube API for fast indexing + """ + + require Logger + + alias Pinchflat.Settings + alias Pinchflat.Sources.Source + alias Pinchflat.Utils.FunctionUtils + alias Pinchflat.FastIndexing.YoutubeBehaviour + + @behaviour YoutubeBehaviour + + @agent_name {:global, __MODULE__.KeyIndex} + + @doc """ + Determines if the YouTube API is enabled for fast indexing by checking + if the user has an API key set + + Returns boolean() + """ + @impl YoutubeBehaviour + def enabled?, do: Enum.any?(api_keys()) + + @doc """ + Fetches the recent media IDs from the YouTube API for a given source. + + Returns {:ok, [binary()]} | {:error, binary()} + """ + @impl YoutubeBehaviour + def get_recent_media_ids(%Source{} = source) do + api_response = + source + |> determine_playlist_id() + |> do_api_request() + + case api_response do + {:ok, parsed_json} -> get_media_ids_from_response(parsed_json) + {:error, reason} -> {:error, reason} + end + end + + # The UC prefix is for channels which won't work with this API endpoint. Swapping + # the prefix to UU will get us the playlist that represents the channel's uploads + defp determine_playlist_id(%{collection_id: c_id}) do + String.replace_prefix(c_id, "UC", "UU") + end + + defp do_api_request(playlist_id) do + Logger.debug("Fetching recent media IDs from YouTube API for playlist: #{playlist_id}") + + playlist_id + |> construct_api_endpoint() + |> http_client().get(accept: "application/json") + |> case do + {:ok, response} -> + Phoenix.json_library().decode(response) + + {:error, reason} -> + Logger.error("Failed to fetch YouTube API: #{inspect(reason)}") + {:error, reason} + end + end + + defp get_media_ids_from_response(parsed_json) do + parsed_json + |> Map.get("items", []) + |> Enum.map(fn item -> + item + |> Map.get("contentDetails", %{}) + |> Map.get("videoId", nil) + end) + |> Enum.reject(&is_nil/1) + |> Enum.uniq() + |> FunctionUtils.wrap_ok() + end + + defp api_keys do + case Settings.get!(:youtube_api_key) do + nil -> + [] + + keys -> + keys + |> String.split(",") + |> Enum.map(&String.trim/1) + |> Enum.reject(&(&1 == "")) + end + end + + defp get_or_start_api_key_agent do + case Agent.start(fn -> 0 end, name: @agent_name) do + {:ok, pid} -> pid + {:error, {:already_started, pid}} -> pid + end + end + + # Gets the next API key in round-robin fashion + defp next_api_key do + keys = api_keys() + + case keys do + [] -> + nil + + keys -> + pid = get_or_start_api_key_agent() + + current_index = + Agent.get_and_update(pid, fn current -> + {current, rem(current + 1, length(keys))} + end) + + Logger.debug("Using YouTube API key: #{Enum.at(keys, current_index)}") + Enum.at(keys, current_index) + end + end + + defp construct_api_endpoint(playlist_id) do + api_base = "https://youtube.googleapis.com/youtube/v3/playlistItems" + property_type = "contentDetails" + max_results = 50 + + "#{api_base}?part=#{property_type}&maxResults=#{max_results}&playlistId=#{playlist_id}&key=#{next_api_key()}" + end + + defp http_client do + Application.get_env(:pinchflat, :http_client, Pinchflat.HTTP.HTTPClient) + end +end diff --git a/lib/pinchflat/fast_indexing/youtube_behaviour.ex b/lib/pinchflat/fast_indexing/youtube_behaviour.ex new file mode 100644 index 0000000..8a6c390 --- /dev/null +++ b/lib/pinchflat/fast_indexing/youtube_behaviour.ex @@ -0,0 +1,11 @@ +defmodule Pinchflat.FastIndexing.YoutubeBehaviour do + @moduledoc """ + This module defines the behaviour for clients that interface with YouTube + for the purpose of fast indexing. + """ + + alias Pinchflat.Sources.Source + + @callback enabled?() :: boolean() + @callback get_recent_media_ids(%Source{}) :: {:ok, [String.t()]} | {:error, String.t()} +end diff --git a/lib/pinchflat/fast_indexing/youtube_rss.ex b/lib/pinchflat/fast_indexing/youtube_rss.ex index 5caaab8..cb83fa9 100644 --- a/lib/pinchflat/fast_indexing/youtube_rss.ex +++ b/lib/pinchflat/fast_indexing/youtube_rss.ex @@ -1,18 +1,31 @@ defmodule Pinchflat.FastIndexing.YoutubeRss do @moduledoc """ - Methods for interacting with YouTube RSS feeds + Methods for interacting with YouTube RSS feeds for fast indexing """ require Logger alias Pinchflat.Sources.Source + alias Pinchflat.FastIndexing.YoutubeBehaviour + + @behaviour YoutubeBehaviour + + @doc """ + Determines if the YouTube RSS feed is enabled for fast indexing. Used to satisfy + the `YoutubeBehaviour` behaviour. + + Returns true + """ + @impl YoutubeBehaviour + def enabled?(), do: true @doc """ Fetches the recent media IDs from a YouTube RSS feed for a given source. Returns {:ok, [binary()]} | {:error, binary()} """ - def get_recent_media_ids_from_rss(%Source{} = source) do + @impl YoutubeBehaviour + def get_recent_media_ids(%Source{} = source) do Logger.debug("Fetching recent media IDs from YouTube RSS feed for source: #{source.collection_id}") case http_client().get(rss_url_for_source(source)) do diff --git a/lib/pinchflat/http/http_client.ex b/lib/pinchflat/http/http_client.ex index 45abda4..c7d4550 100644 --- a/lib/pinchflat/http/http_client.ex +++ b/lib/pinchflat/http/http_client.ex @@ -21,9 +21,11 @@ defmodule Pinchflat.HTTP.HTTPClient do """ @impl HTTPBehaviour def get(url, headers \\ [], opts \\ []) do + headers = parse_headers(headers) + case :httpc.request(:get, {url, headers}, [], opts) do {:ok, {{_version, 200, _reason_phrase}, _headers, body}} -> - {:ok, body} + {:ok, to_string(body)} {:ok, {{_version, status_code, reason_phrase}, _headers, _body}} -> {:error, "HTTP request failed with status code #{status_code}: #{reason_phrase}"} @@ -32,4 +34,8 @@ defmodule Pinchflat.HTTP.HTTPClient do {:error, "HTTP request failed: #{reason}"} end end + + defp parse_headers(headers) do + Enum.map(headers, fn {k, v} -> {to_charlist(k), to_charlist(v)} end) + end end diff --git a/lib/pinchflat/lifecycle/notifications/apprise_command_runner.ex b/lib/pinchflat/lifecycle/notifications/apprise_command_runner.ex new file mode 100644 index 0000000..8251b95 --- /dev/null +++ b/lib/pinchflat/lifecycle/notifications/apprise_command_runner.ex @@ -0,0 +1,12 @@ +defmodule Pinchflat.Lifecycle.Notifications.AppriseCommandRunner do + @moduledoc """ + A behaviour for running CLI commands against a notification backend (apprise). + + Used so we can implement Mox for testing without actually running the + apprise command. + """ + + @callback run(binary(), keyword()) :: :ok | {:error, binary()} + @callback run(List.t(), keyword()) :: :ok | {:error, binary()} + @callback version() :: {:ok, binary()} | {:error, binary()} +end diff --git a/lib/pinchflat/lifecycle/notifications/command_runner.ex b/lib/pinchflat/lifecycle/notifications/command_runner.ex new file mode 100644 index 0000000..77d43ff --- /dev/null +++ b/lib/pinchflat/lifecycle/notifications/command_runner.ex @@ -0,0 +1,63 @@ +defmodule Pinchflat.Lifecycle.Notifications.CommandRunner do + @moduledoc """ + Runs apprise commands using the `System.cmd/3` function + """ + + require Logger + + alias Pinchflat.Utils.CliUtils + alias Pinchflat.Utils.FunctionUtils + alias Pinchflat.Lifecycle.Notifications.AppriseCommandRunner + + @behaviour AppriseCommandRunner + + @doc """ + Runs an apprise command and returns the string output. + Can take a single server string or a list of servers as well as additional + arguments to pass to the command. + + Returns {:ok, binary()} | {:error, :no_servers} | {:error, binary()} + """ + @impl AppriseCommandRunner + def run(nil, _), do: {:error, :no_servers} + def run("", _), do: {:error, :no_servers} + def run([], _), do: {:error, :no_servers} + + def run(endpoints, command_opts) do + endpoints = List.wrap(endpoints) + default_opts = [:verbose] + parsed_opts = CliUtils.parse_options(default_opts ++ command_opts) + + {output, exit_code} = CliUtils.wrap_cmd(backend_executable(), parsed_opts ++ endpoints) + + case exit_code do + 0 -> {:ok, String.trim(output)} + _ -> {:error, String.trim(output)} + end + end + + @doc """ + Returns the version of apprise as a string. + + Returns {:ok, binary()} | {:error, binary()} + """ + @impl AppriseCommandRunner + def version do + case CliUtils.wrap_cmd(backend_executable(), ["--version"]) do + {output, 0} -> + output + |> String.split(~r{\r?\n}) + |> List.first() + |> String.replace("Apprise", "") + |> String.trim() + |> FunctionUtils.wrap_ok() + + {output, _} -> + {:error, output} + end + end + + defp backend_executable do + Application.get_env(:pinchflat, :apprise_executable) + end +end diff --git a/lib/pinchflat/lifecycle/notifications/source_notifications.ex b/lib/pinchflat/lifecycle/notifications/source_notifications.ex new file mode 100644 index 0000000..e6e4c04 --- /dev/null +++ b/lib/pinchflat/lifecycle/notifications/source_notifications.ex @@ -0,0 +1,81 @@ +defmodule Pinchflat.Lifecycle.Notifications.SourceNotifications do + @moduledoc """ + Contains utilities for sending notifications about sources + """ + + require Logger + + use Pinchflat.Media.MediaQuery + + alias Pinchflat.Repo + + @doc """ + Wraps a function that may change the number of pending or downloaded + media items for a source, sending an apprise notification if + the count changes. + + Returns the return value of the provided function + """ + def wrap_new_media_notification(servers, source, func) do + before_count = relevant_media_item_count(source) + retval = func.() + after_count = relevant_media_item_count(source) + + send_new_media_notification(servers, source, after_count - before_count) + + retval + end + + @doc """ + Sends a notification if the count of new media items has changed + + Returns :ok + """ + def send_new_media_notification(_, _, count) when count <= 0, do: :ok + + def send_new_media_notification(servers, source, changed_count) do + opts = [ + title: "[Pinchflat] New media found", + body: "Found #{changed_count} new media item(s) for #{source.custom_name}. Downloading them now" + ] + + case backend_runner().run(servers, opts) do + {:ok, _} -> + Logger.info("Sent new media notification for source #{source.id}") + + {:error, :no_servers} -> + Logger.info("No notification servers provided for source #{source.id}") + + {:error, err} -> + Logger.error("Failed to send new media notification for source #{source.id}: #{err}") + end + + :ok + end + + defp relevant_media_item_count(source) do + if source.download_media do + pending_media_item_count(source) + downloaded_media_item_count(source) + else + 0 + end + end + + defp pending_media_item_count(source) do + MediaQuery.new() + |> MediaQuery.require_assoc(:media_profile) + |> where(^dynamic(^MediaQuery.for_source(source) and ^MediaQuery.pending())) + |> Repo.aggregate(:count) + end + + defp downloaded_media_item_count(source) do + MediaQuery.new() + |> where(^dynamic(^MediaQuery.for_source(source) and ^MediaQuery.downloaded())) + |> Repo.aggregate(:count) + end + + defp backend_runner do + # This approach lets us mock the command for testing + Application.get_env(:pinchflat, :apprise_runner) + end +end diff --git a/lib/pinchflat/lifecycle/user_scripts/command_runner.ex b/lib/pinchflat/lifecycle/user_scripts/command_runner.ex new file mode 100644 index 0000000..9a77ea5 --- /dev/null +++ b/lib/pinchflat/lifecycle/user_scripts/command_runner.ex @@ -0,0 +1,73 @@ +defmodule Pinchflat.Lifecycle.UserScripts.CommandRunner do + @moduledoc """ + Runs custom user commands commands using the `System.cmd/3` function + """ + + require Logger + + alias Pinchflat.Utils.CliUtils + alias Pinchflat.Utils.FilesystemUtils + alias Pinchflat.Lifecycle.UserScripts.UserScriptCommandRunner + + @behaviour UserScriptCommandRunner + + @event_types [ + :app_init, + :media_pre_download, + :media_downloaded, + :media_deleted + ] + + @doc """ + Runs the user script command for the given event type. Passes the event + and the encoded data to the user script command. + + This function will succeed in almost all cases, even if the user script command + failed - this is because I don't want bad scripts to stop the whole process. + If something fails, it'll be logged and returned BUT the tuple will always + start with {:ok, ...}. + + The only things that can cause a true failure are passing in an invalid event + type or if the passed data cannot be encoded into JSON - both indicative of + failures in the development process. + + Returns {:ok, :no_executable} | {:ok, output, exit_code} + """ + @impl UserScriptCommandRunner + def run(event_type, encodable_data) when event_type in @event_types do + case backend_executable() do + {:ok, :no_executable} -> + {:ok, :no_executable} + + {:ok, executable_path} -> + {:ok, encoded_data} = Phoenix.json_library().encode(encodable_data) + + {output, exit_code} = + CliUtils.wrap_cmd( + executable_path, + [to_string(event_type), encoded_data], + [], + logging_arg_override: "[suppressed]" + ) + + {:ok, output, exit_code} + end + end + + def run(event_type, _encodable_data) do + raise ArgumentError, "Invalid event type: #{inspect(event_type)}" + end + + defp backend_executable do + base_dir = Application.get_env(:pinchflat, :extras_directory) + filepath = Path.join([base_dir, "user-scripts", "lifecycle"]) + + if FilesystemUtils.exists_and_nonempty?(filepath) do + {:ok, filepath} + else + Logger.info("User scripts lifecyle file either not present or is empty. Skipping.") + + {:ok, :no_executable} + end + end +end diff --git a/lib/pinchflat/lifecycle/user_scripts/user_script_command_runner.ex b/lib/pinchflat/lifecycle/user_scripts/user_script_command_runner.ex new file mode 100644 index 0000000..a0f6234 --- /dev/null +++ b/lib/pinchflat/lifecycle/user_scripts/user_script_command_runner.ex @@ -0,0 +1,10 @@ +defmodule Pinchflat.Lifecycle.UserScripts.UserScriptCommandRunner do + @moduledoc """ + A behaviour for running custom user scripts on certain events. + + Used so we can implement Mox for testing without actually running the + user's command. + """ + + @callback run(atom(), map()) :: :ok | {:error, binary()} +end diff --git a/lib/pinchflat/media/file_syncing.ex b/lib/pinchflat/media/file_syncing.ex new file mode 100644 index 0000000..7fb5b65 --- /dev/null +++ b/lib/pinchflat/media/file_syncing.ex @@ -0,0 +1,93 @@ +defmodule Pinchflat.Media.FileSyncing do + @moduledoc """ + Functions for ensuring file state is accurately reflected in the database. + """ + + alias Pinchflat.Media + alias Pinchflat.Utils.MapUtils + alias Pinchflat.Media.MediaItem + alias Pinchflat.Utils.FilesystemUtils, as: FSUtils + + @doc """ + Deletes files that are no longer needed by a media item. + + This means that if a media item has been updated, the old and new versions + can be passed and any files that are no longer needed will be deleted. + + An example is a video that gets its quality upgraded and its name changes + between original download and re-download. The old file will exist on-disk + with the old name but the database entry will point to the new file. This + function can be used to delete the old file in this case. + + Returns :ok + """ + def delete_outdated_files(old_media_item, new_media_item) do + non_subtitle_keys = MediaItem.filepath_attributes() -- [:subtitle_filepaths] + + old_non_subtitles = Map.take(old_media_item, non_subtitle_keys) + old_subtitles = MapUtils.from_nested_list(old_media_item.subtitle_filepaths) + new_non_subtitles = Map.take(new_media_item, non_subtitle_keys) + new_subtitles = MapUtils.from_nested_list(new_media_item.subtitle_filepaths) + + handle_file_deletion(old_non_subtitles, new_non_subtitles) + handle_file_deletion(old_subtitles, new_subtitles) + + :ok + end + + @doc """ + Nillifies any media item filepaths that don't exist on disk for a list of media items + + returns [%MediaItem{}] + """ + def sync_file_presence_on_disk(media_items) do + Enum.map(media_items, fn media_item -> + new_attributes = sync_media_item_files(media_item) + # Doing this one-by-one instead of batching since this process + # can take time and a batch could let MediaItem state get out of sync + {:ok, updated_media_item} = Media.update_media_item(media_item, new_attributes) + + updated_media_item + end) + end + + defp handle_file_deletion(old_attributes, new_attributes) do + # The logic: + # - A file should only be deleted if it exists and the new file is different + # - The new attributes are the ones we're interested in keeping + # - If the old attributes have a key that doesn't exist in the new attributes, don't touch it. + # This is good for archiving but may be unpopular for other users so this may change. + + Enum.each(new_attributes, fn {key, new_filepath} -> + old_filepath = Map.get(old_attributes, key) + files_have_changed = old_filepath && new_filepath && old_filepath != new_filepath + files_exist_on_disk = files_have_changed && File.exists?(old_filepath) && File.exists?(new_filepath) + + if files_exist_on_disk && !FSUtils.filepaths_reference_same_file?(old_filepath, new_filepath) do + FSUtils.delete_file_and_remove_empty_directories(old_filepath) + end + end) + end + + defp sync_media_item_files(media_item) do + non_subtitle_keys = MediaItem.filepath_attributes() -- [:subtitle_filepaths] + subtitle_keys = MapUtils.from_nested_list(media_item.subtitle_filepaths) + non_subtitles = Map.take(media_item, non_subtitle_keys) + + # This one is checking for the negative (ie: only update if the file doesn't exist) + new_non_subtitle_attrs = + Enum.reduce(non_subtitles, %{}, fn {key, filepath}, acc -> + if filepath && File.exists?(filepath), do: acc, else: Map.put(acc, key, nil) + end) + + # This one is checking for the positive (ie: only update if the file exists) + # This is because subtitles, being an array type in the DB, are most easily updated + # by a full replacement rather than finding the actual diff + new_subtitle_attrs = + Enum.reduce(subtitle_keys, [], fn {key, filepath}, acc -> + if filepath && File.exists?(filepath), do: acc ++ [[key, filepath]], else: acc + end) + + Map.put(new_non_subtitle_attrs, :subtitle_filepaths, new_subtitle_attrs) + end +end diff --git a/lib/pinchflat/media/file_syncing_worker.ex b/lib/pinchflat/media/file_syncing_worker.ex new file mode 100644 index 0000000..fde7e73 --- /dev/null +++ b/lib/pinchflat/media/file_syncing_worker.ex @@ -0,0 +1,38 @@ +defmodule Pinchflat.Media.FileSyncingWorker do + @moduledoc false + + use Oban.Worker, + queue: :local_data, + tags: ["sources", "local_data"] + + alias __MODULE__ + alias Pinchflat.Repo + alias Pinchflat.Tasks + alias Pinchflat.Sources + alias Pinchflat.Media.FileSyncing + + @doc """ + Starts the source file syncing worker. + + Returns {:ok, %Task{}} | {:error, %Ecto.Changeset{}} + """ + def kickoff_with_task(source, opts \\ []) do + %{id: source.id} + |> FileSyncingWorker.new(opts) + |> Tasks.create_job_with_task(source) + end + + @doc """ + Deletes a profile and optionally deletes its files + + Returns :ok + """ + @impl Oban.Worker + def perform(%Oban.Job{args: %{"id" => source_id}}) do + source = Repo.preload(Sources.get_source!(source_id), :media_items) + + FileSyncing.sync_file_presence_on_disk(source.media_items) + + :ok + end +end diff --git a/lib/pinchflat/media/media.ex b/lib/pinchflat/media/media.ex index ee8531d..e1d8d7d 100644 --- a/lib/pinchflat/media/media.ex +++ b/lib/pinchflat/media/media.ex @@ -4,14 +4,19 @@ defmodule Pinchflat.Media do """ import Ecto.Query, warn: false + use Pinchflat.Media.MediaQuery alias Pinchflat.Repo alias Pinchflat.Tasks alias Pinchflat.Sources.Source alias Pinchflat.Media.MediaItem - alias Pinchflat.Media.MediaQuery + alias Pinchflat.Utils.FilesystemUtils alias Pinchflat.Metadata.MediaMetadata - alias Pinchflat.Filesystem.FilesystemHelpers + + alias Pinchflat.Lifecycle.UserScripts.CommandRunner, as: UserScriptRunner + + # Some fields should only be set on insert and not on update. + @fields_to_drop_on_update [:playlist_index] @doc """ Returns the list of media_items. @@ -23,23 +28,32 @@ defmodule Pinchflat.Media do end @doc """ - Returns a list of media_items that are cullable based on the retention period - of the source they belong to. + Returns a list of media_items that are upgradeable based on the redownload delay + of the media_profile their source belongs to. In this context, upgradeable means + that it's been long enough since upload that the video may be in a higher quality + or have better sponsorblock segments (or similar). + + The logic is that a media_item is past_redownload_delay if the media_item's uploaded_at is + at least redownload_delay_days ago AND `media_downloaded_at` - `redownload_delay_days` + is before the media_item's `uploaded_at`. + + This logic grabs media that we've recently downloaded AND is recently uploaded, but + doesn't grab media that we've recently downloaded and was uploaded a long time ago. + This also makes things work as expected when downloading media from a source for the + first time. Returns [%MediaItem{}, ...] """ - def list_cullable_media_items do + def list_upgradeable_media_items do MediaQuery.new() - |> MediaQuery.with_media_filepath() - |> MediaQuery.with_passed_retention_period() - |> MediaQuery.with_no_culling_prevention() + |> MediaQuery.require_assoc(:media_profile) + |> where(^MediaQuery.upgradeable()) |> Repo.all() end @doc """ Returns a list of pending media_items for a given source, where - pending means the `media_filepath` is `nil` AND the media_item - matches satisfies `MediaQuery.with_media_pending_download`. You + pending means the media_item satisfies `MediaQuery.pending`. You should really check out that function if you need to know more because it has a lot going on. @@ -47,15 +61,14 @@ defmodule Pinchflat.Media do """ def list_pending_media_items_for(%Source{} = source) do MediaQuery.new() - |> MediaQuery.for_source(source) - |> MediaQuery.with_media_pending_download() + |> MediaQuery.require_assoc(:media_profile) + |> where(^dynamic(^MediaQuery.for_source(source) and ^MediaQuery.pending())) |> Repo.all() end @doc """ For a given media_item, tells you if it is pending download. This is defined as - the media_item having a `media_filepath` of `nil` and matching the format selection - rules of the parent media_profile. + the media_item satisfying `MediaQuery.pending` which you should really check out. Intentionally does not take the `download_media` setting of the source into account. @@ -65,8 +78,8 @@ defmodule Pinchflat.Media do media_item = Repo.preload(media_item, source: :media_profile) MediaQuery.new() - |> MediaQuery.with_id(media_item.id) - |> MediaQuery.with_media_pending_download() + |> MediaQuery.require_assoc(:media_profile) + |> where(^dynamic([m, s, mp], m.id == ^media_item.id and ^MediaQuery.pending())) |> Repo.exists?() end @@ -115,7 +128,7 @@ defmodule Pinchflat.Media do Unlike `create_media_item`, this will attempt an update if the media_item already exists. This is so that future indexing can pick up attributes that - we may not have asked for in the past (eg: upload_date) + we may not have asked for in the past (eg: uploaded_at) Returns {:ok, %MediaItem{}} | {:error, %Ecto.Changeset{}} """ @@ -126,7 +139,10 @@ defmodule Pinchflat.Media do |> MediaItem.changeset(attrs) |> Repo.insert( on_conflict: [ - set: Map.to_list(attrs) + set: + attrs + |> Map.drop(@fields_to_drop_on_update) + |> Map.to_list() ], conflict_target: [:source_id, :media_id] ) @@ -138,8 +154,10 @@ defmodule Pinchflat.Media do Returns {:ok, %MediaItem{}} | {:error, %Ecto.Changeset{}} """ def update_media_item(%MediaItem{} = media_item, attrs) do + update_attrs = Map.drop(attrs, @fields_to_drop_on_update) + media_item - |> MediaItem.changeset(attrs) + |> MediaItem.changeset(update_attrs) |> Repo.update() end @@ -156,6 +174,7 @@ defmodule Pinchflat.Media do if delete_files do {:ok, _} = do_delete_media_files(media_item) + run_user_script(:media_deleted, media_item) end # Should delete these no matter what @@ -178,6 +197,7 @@ defmodule Pinchflat.Media do Tasks.delete_tasks_for(media_item) {:ok, _} = do_delete_media_files(media_item) + run_user_script(:media_deleted, media_item) update_media_item(media_item, Map.merge(filepath_attrs, addl_attrs)) end @@ -199,7 +219,7 @@ defmodule Pinchflat.Media do end) |> List.flatten() |> Enum.filter(&is_binary/1) - |> Enum.each(&FilesystemHelpers.delete_file_and_remove_empty_directories/1) + |> Enum.each(&FilesystemUtils.delete_file_and_remove_empty_directories/1) {:ok, media_item} end @@ -211,6 +231,12 @@ defmodule Pinchflat.Media do MediaMetadata.filepath_attributes() |> Enum.map(fn field -> mapped_struct[field] end) |> Enum.filter(&is_binary/1) - |> Enum.each(&FilesystemHelpers.delete_file_and_remove_empty_directories/1) + |> Enum.each(&FilesystemUtils.delete_file_and_remove_empty_directories/1) + end + + defp run_user_script(event, media_item) do + runner = Application.get_env(:pinchflat, :user_script_runner, UserScriptRunner) + + runner.run(event, media_item) end end diff --git a/lib/pinchflat/media/media_item.ex b/lib/pinchflat/media/media_item.ex index 52633b2..ced8e8c 100644 --- a/lib/pinchflat/media/media_item.ex +++ b/lib/pinchflat/media/media_item.ex @@ -4,15 +4,22 @@ defmodule Pinchflat.Media.MediaItem do """ use Ecto.Schema + use Pinchflat.Media.MediaQuery + import Ecto.Changeset import Pinchflat.Utils.ChangesetUtils + alias __MODULE__ + alias Pinchflat.Repo + alias Pinchflat.Sources alias Pinchflat.Tasks.Task alias Pinchflat.Sources.Source alias Pinchflat.Metadata.MediaMetadata alias Pinchflat.Media.MediaItemsSearchIndex @allowed_fields [ + # these fields are only captured on index + :playlist_index, # these fields are captured on indexing (and again on download) :title, :media_id, @@ -21,8 +28,10 @@ defmodule Pinchflat.Media.MediaItem do :livestream, :source_id, :short_form_content, - :upload_date, + :uploaded_at, + :upload_date_index, :duration_seconds, + :predicted_media_filepath, # these fields are captured only on download :media_downloaded_at, :media_filepath, @@ -31,10 +40,12 @@ defmodule Pinchflat.Media.MediaItem do :thumbnail_filepath, :metadata_filepath, :nfo_filepath, + :last_error, # These are user or system controlled fields :prevent_download, :prevent_culling, - :culled_at + :culled_at, + :media_redownloaded_at ] # Pretty much all the fields captured at index are required. @required_fields ~w( @@ -44,7 +55,7 @@ defmodule Pinchflat.Media.MediaItem do livestream media_id source_id - upload_date + uploaded_at short_form_content )a @@ -61,9 +72,13 @@ defmodule Pinchflat.Media.MediaItem do field :livestream, :boolean, default: false field :short_form_content, :boolean, default: false field :media_downloaded_at, :utc_datetime - field :upload_date, :date + field :media_redownloaded_at, :utc_datetime + field :uploaded_at, :utc_datetime + field :upload_date_index, :integer, default: 0 field :duration_seconds, :integer + field :playlist_index, :integer, default: 0 + field :predicted_media_filepath, :string field :media_filepath, :string field :media_size_bytes, :integer field :thumbnail_filepath, :string @@ -74,6 +89,7 @@ defmodule Pinchflat.Media.MediaItem do # Will very likely revisit because I can't leave well-enough alone. field :subtitle_filepaths, {:array, {:array, :string}}, default: [] + field :last_error, :string field :prevent_download, :boolean, default: false field :prevent_culling, :boolean, default: false field :culled_at, :utc_datetime @@ -96,7 +112,11 @@ defmodule Pinchflat.Media.MediaItem do |> cast(attrs, @allowed_fields) |> cast_assoc(:metadata, with: &MediaMetadata.changeset/2, required: false) |> dynamic_default(:uuid, fn _ -> Ecto.UUID.generate() end) + |> update_upload_date_index() |> validate_required(@required_fields) + # Validate that the title does NOT start with "youtube video #" since that indicates a restriction by YouTube. + # See issue #549 for more information. + |> validate_format(:title, ~r/^(?!youtube video #)/) |> unique_constraint([:media_id, :source_id]) end @@ -114,4 +134,65 @@ defmodule Pinchflat.Media.MediaItem do end) |> Enum.into(%{}) end + + @doc false + def json_exluded_fields do + ~w(__meta__ __struct__ metadata tasks media_items_search_index)a + end + + # Run it on new records no matter what. The method we delegate to + # will handle the case where `uploaded_at` is `nil` + defp update_upload_date_index(%{data: %{id: nil}} = changeset) do + do_update_upload_date_index(changeset) + end + + # For the update case, we only want to recalculate if the day itself has changed. + # For instance, this is useful in the migration from `upload_date` to `uploaded_at` + defp update_upload_date_index(%{changes: changes} = changeset) when is_map_key(changes, :uploaded_at) do + old_uploaded_at = changeset.data.uploaded_at + new_uploaded_at = get_change(changeset, :uploaded_at) + upload_dates_match = DateTime.to_date(old_uploaded_at) == DateTime.to_date(new_uploaded_at) + + if upload_dates_match do + changeset + else + do_update_upload_date_index(changeset) + end + end + + # If the record is persisted and the `uploaded_at` field is not being changed, + # we don't need to recalculate the index. + defp update_upload_date_index(changeset), do: changeset + + defp do_update_upload_date_index(%{changes: changes} = changeset) when is_map_key(changes, :uploaded_at) do + source_id = get_field(changeset, :source_id) + source = Sources.get_source!(source_id) + # Channels should count down from 99, playlists should count up from 0 + # This reflects the fact that channels prepend new videos to the top of the list + # and playlists append new videos to the bottom of the list. + default_index = if source.collection_type == :channel, do: 99, else: 0 + aggregator = if source.collection_type == :channel, do: :min, else: :max + change_direction = if source.collection_type == :channel, do: -1, else: 1 + + current_max = + MediaQuery.new() + |> where(^dynamic([mi], ^MediaQuery.upload_date_matches(changes.uploaded_at) and ^MediaQuery.for_source(source))) + |> Repo.aggregate(aggregator, :upload_date_index) + + case current_max do + nil -> put_change(changeset, :upload_date_index, default_index) + max -> put_change(changeset, :upload_date_index, max + change_direction) + end + end + + defp do_update_upload_date_index(changeset), do: changeset + + defimpl Jason.Encoder, for: MediaItem do + def encode(value, opts) do + value + |> Repo.preload(:source) + |> Map.drop(MediaItem.json_exluded_fields()) + |> Jason.Encode.map(opts) + end + end end diff --git a/lib/pinchflat/media/media_query.ex b/lib/pinchflat/media/media_query.ex index 6751c28..840e82c 100644 --- a/lib/pinchflat/media/media_query.ex +++ b/lib/pinchflat/media/media_query.ex @@ -12,102 +12,41 @@ defmodule Pinchflat.Media.MediaQuery do alias Pinchflat.Media.MediaItem - # Prefixes: - # - for_* - belonging to a certain record - # - join_* - for joining on a certain record - # - with_* - for filtering based on full, concrete attributes - # - matching_* - for filtering based on partial attributes (e.g. LIKE, regex, full-text search) - # - # Suffixes: - # - _for - the arg passed is an association record + # This allows the module to be aliased and query methods to be used + # all in one go + # usage: use Pinchflat.Media.MediaQuery + defmacro __using__(_opts) do + quote do + import Ecto.Query, warn: false + + alias unquote(__MODULE__) + end + end def new do MediaItem end - def for_source(query, source) do - where(query, [mi], mi.source_id == ^source.id) - end + def for_source(source_id) when is_integer(source_id), do: dynamic([mi], mi.source_id == ^source_id) + def for_source(source), do: dynamic([mi], mi.source_id == ^source.id) - def join_sources(query) do - from(mi in query, join: s in assoc(mi, :source), as: :sources) - end + def downloaded, do: dynamic([mi], not is_nil(mi.media_filepath)) + def download_prevented, do: dynamic([mi], mi.prevent_download == true) + def culling_prevented, do: dynamic([mi], mi.prevent_culling == true) + def redownloaded, do: dynamic([mi], not is_nil(mi.media_redownloaded_at)) + def upload_date_matches(other_date), do: dynamic([mi], fragment("date(?) = date(?)", mi.uploaded_at, ^other_date)) - def with_passed_retention_period(query) do - query - |> require_assoc(:source) - |> where( + def upload_date_after_source_cutoff do + dynamic( [mi, source], - fragment( - "IFNULL(?, 0) > 0 AND DATETIME('now', '-' || ? || ' day') > ?", - source.retention_period_days, - source.retention_period_days, - mi.media_downloaded_at - ) + is_nil(source.download_cutoff_date) or + fragment("date(?) >= ?", mi.uploaded_at, source.download_cutoff_date) ) end - def with_no_culling_prevention(query) do - where(query, [mi], mi.prevent_culling == false) - end - - def with_id(query, id) do - where(query, [mi], mi.id == ^id) - end - - def with_media_ids(query, media_ids) do - where(query, [mi], mi.media_id in ^media_ids) - end - - def with_media_filepath(query) do - where(query, [mi], not is_nil(mi.media_filepath)) - end - - def with_no_media_filepath(query) do - where(query, [mi], is_nil(mi.media_filepath)) - end - - def with_upload_date_after_source_cutoff(query) do - query - |> require_assoc(:source) - |> where([mi, source], is_nil(source.download_cutoff_date) or mi.upload_date >= source.download_cutoff_date) - end - - def with_no_prevented_download(query) do - where(query, [mi], mi.prevent_download == false) - end - - def matching_source_title_regex(query) do - query - |> require_assoc(:source) - |> where( - [mi, source], - is_nil(source.title_filter_regex) or fragment("regexp_like(?, ?)", mi.title, source.title_filter_regex) - ) - end - - def matching_search_term(query, nil), do: query - - def matching_search_term(query, term) do - from(mi in query, - join: mi_search_index in assoc(mi, :media_items_search_index), - where: fragment("media_items_search_index MATCH ?", ^term), - select_merge: %{ - matching_search_term: - fragment(""" - coalesce(snippet(media_items_search_index, 0, '[PF_HIGHLIGHT]', '[/PF_HIGHLIGHT]', '...', 20), '') || - ' ' || - coalesce(snippet(media_items_search_index, 1, '[PF_HIGHLIGHT]', '[/PF_HIGHLIGHT]', '...', 20), '') - """) - }, - order_by: [desc: fragment("rank")] - ) - end - - def with_format_matching_profile_preference(query) do - query - |> require_assoc(:media_profile) - |> where( + def format_matching_profile_preference do + dynamic( + [mi, source, media_profile], fragment(""" CASE WHEN shorts_behaviour = 'only' AND livestream_behaviour = 'only' THEN @@ -129,16 +68,97 @@ defmodule Pinchflat.Media.MediaQuery do ) end - def with_media_pending_download(query) do - query - |> with_no_prevented_download() - |> with_no_media_filepath() - |> with_upload_date_after_source_cutoff() - |> with_format_matching_profile_preference() - |> matching_source_title_regex() + def matches_source_title_regex do + dynamic( + [mi, source], + is_nil(source.title_filter_regex) or fragment("regexp_like(?, ?)", mi.title, source.title_filter_regex) + ) end - defp require_assoc(query, identifier) do + def meets_min_and_max_duration do + dynamic( + [mi, source], + (is_nil(source.min_duration_seconds) or fragment("duration_seconds >= ?", source.min_duration_seconds)) and + (is_nil(source.max_duration_seconds) or fragment("duration_seconds <= ?", source.max_duration_seconds)) + ) + end + + def past_retention_period do + dynamic( + [mi, source], + fragment(""" + IFNULL(retention_period_days, 0) > 0 AND + DATETIME(media_downloaded_at, '+' || retention_period_days || ' day') < DATETIME('now') + """) + ) + end + + def past_redownload_delay do + dynamic( + [mi, source, media_profile], + # Returns media items where the uploaded_at is at least redownload_delay_days ago AND + # downloaded_at minus the redownload_delay_days is before the upload date + fragment(""" + IFNULL(redownload_delay_days, 0) > 0 AND + DATE('now', '-' || redownload_delay_days || ' day') > DATE(uploaded_at) AND + DATE(media_downloaded_at, '-' || redownload_delay_days || ' day') < DATE(uploaded_at) + """) + ) + end + + def cullable do + dynamic( + [mi, source], + ^downloaded() and + ^past_retention_period() and + not (^culling_prevented()) + ) + end + + def deletable_based_on_source_cutoff do + dynamic( + [mi, source], + ^downloaded() and + not (^upload_date_after_source_cutoff()) and + not (^culling_prevented()) + ) + end + + def pending do + dynamic( + [mi], + not (^downloaded()) and + not (^download_prevented()) and + ^upload_date_after_source_cutoff() and + ^format_matching_profile_preference() and + ^matches_source_title_regex() and + ^meets_min_and_max_duration() + ) + end + + def upgradeable do + dynamic( + [mi, source], + ^downloaded() and + not (^download_prevented()) and + not (^redownloaded()) and + ^past_redownload_delay() + ) + end + + def matches_search_term(nil), do: dynamic([mi], true) + + def matches_search_term(term) do + escaped_term = clean_search_term(term) + + # Matching on `term` instead of `escaped_term` because the latter can mangle empty strings + case String.trim(term) do + "" -> dynamic([mi], true) + _ -> dynamic([mi], fragment("media_items_search_index MATCH ?", ^escaped_term)) + end + end + + def require_assoc(query, identifier) do if has_named_binding?(query, identifier) do query else @@ -146,6 +166,10 @@ defmodule Pinchflat.Media.MediaQuery do end end + defp do_require_assoc(query, :media_items_search_index) do + from(mi in query, join: s in assoc(mi, :media_items_search_index), as: :media_items_search_index) + end + defp do_require_assoc(query, :source) do from(mi in query, join: s in assoc(mi, :source), as: :source) end @@ -155,4 +179,48 @@ defmodule Pinchflat.Media.MediaQuery do |> require_assoc(:source) |> join(:inner, [mi, source], mp in assoc(source, :media_profile), as: :media_profile) end + + # This needs to be a non-dynamic query because it alone should control things like + # ordering and `snippets` for full-text search + def matching_search_term(query, nil), do: query + + def matching_search_term(query, term) do + escaped_term = clean_search_term(term) + + from(mi in query, + join: mi_search_index in assoc(mi, :media_items_search_index), + where: fragment("media_items_search_index MATCH ?", ^escaped_term), + select_merge: %{ + matching_search_term: + fragment(""" + coalesce(snippet(media_items_search_index, 0, '[PF_HIGHLIGHT]', '[/PF_HIGHLIGHT]', '...', 20), '') || + ' ' || + coalesce(snippet(media_items_search_index, 1, '[PF_HIGHLIGHT]', '[/PF_HIGHLIGHT]', '...', 20), '') + """) + }, + order_by: [desc: fragment("rank")] + ) + end + + # SQLite's FTS5 is very picky about what it will accept as a search term. + # To that end, we need to clean up the search term before passing it to the + # MATCH clause. + # This method: + # - Trims leading and trailing whitespace + # - Collapses multiple spaces into a single space + # - Removes quote characters + # - Wraps any word in quotes (must happen after the double quote replacement) + # + # This allows for works with apostrophes and quotes to be searched for correctly + defp clean_search_term(nil), do: "" + defp clean_search_term(""), do: "" + + defp clean_search_term(term) do + term + |> String.trim() + |> String.replace(~r/\s+/, " ") + |> String.split(~r/\s+/) + |> Enum.map(fn str -> String.replace(str, ~s("), "") end) + |> Enum.map_join(" ", fn str -> ~s("#{str}") end) + end end diff --git a/lib/pinchflat/metadata/metadata_file_helpers.ex b/lib/pinchflat/metadata/metadata_file_helpers.ex index 8d4da24..842f6ef 100644 --- a/lib/pinchflat/metadata/metadata_file_helpers.ex +++ b/lib/pinchflat/metadata/metadata_file_helpers.ex @@ -9,7 +9,10 @@ defmodule Pinchflat.Metadata.MetadataFileHelpers do needed """ - alias Pinchflat.Filesystem.FilesystemHelpers + alias Pinchflat.Sources + alias Pinchflat.Utils.FilesystemUtils + + alias Pinchflat.YtDlp.Media, as: YtDlpMedia @doc """ Returns the directory where metadata for a database record should be stored. @@ -36,7 +39,7 @@ defmodule Pinchflat.Metadata.MetadataFileHelpers do filepath = generate_filepath_for(database_record, "metadata.json.gz") {:ok, json} = Phoenix.json_library().encode(metadata_map) - :ok = FilesystemHelpers.write_p!(filepath, json, [:compressed]) + :ok = FilesystemUtils.write_p!(filepath, json, [:compressed]) filepath end @@ -47,36 +50,44 @@ defmodule Pinchflat.Metadata.MetadataFileHelpers do Returns {:ok, map()} | {:error, any} """ def read_compressed_metadata(filepath) do - {:ok, json} = File.open(filepath, [:read, :compressed], &IO.read(&1, :all)) + {:ok, json} = File.open(filepath, [:read, :compressed], &IO.read(&1, :eof)) Phoenix.json_library().decode(json) end @doc """ Downloads and stores a thumbnail for a media item, returning the filepath. + Chooses the highest quality thumbnail available and converts it to a JPG - Returns binary() + Returns nil if no thumbnail is available or if yt-dlp encounters an error + + Returns binary() | nil """ - def download_and_store_thumbnail_for(database_record, metadata_map) do - thumbnail_url = metadata_map["thumbnail"] - filepath = generate_filepath_for(database_record, Path.basename(thumbnail_url)) - thumbnail_blob = fetch_thumbnail_from_url(thumbnail_url) + def download_and_store_thumbnail_for(media_item_with_preloads) do + yt_dlp_filepath = generate_filepath_for(media_item_with_preloads, "thumbnail.%(ext)s") + real_filepath = generate_filepath_for(media_item_with_preloads, "thumbnail.jpg") + command_opts = [output: yt_dlp_filepath] + addl_opts = [use_cookies: Sources.use_cookies?(media_item_with_preloads.source, :metadata)] - :ok = FilesystemHelpers.write_p!(filepath, thumbnail_blob) - - filepath + case YtDlpMedia.download_thumbnail(media_item_with_preloads.original_url, command_opts, addl_opts) do + {:ok, _} -> real_filepath + _ -> nil + end end @doc """ Parses an upload date from the YYYYMMDD string returned in yt-dlp metadata - and returns a Date struct. + and returns a DateTime struct, appending a time of 00:00:00Z. - Returns Date.t() + Returns DateTime.t() """ def parse_upload_date(upload_date) do <> <> <> <> <> = upload_date - Date.from_iso8601!("#{year}-#{month}-#{day}") + case DateTime.from_iso8601("#{year}-#{month}-#{day}T00:00:00Z") do + {:ok, datetime, _} -> datetime + _ -> raise "Invalid upload date: #{upload_date}" + end end @doc """ @@ -116,11 +127,19 @@ defmodule Pinchflat.Metadata.MetadataFileHelpers do end end - defp fetch_thumbnail_from_url(url) do - http_client = Application.get_env(:pinchflat, :http_client, Pinchflat.HTTP.HTTPClient) - {:ok, body} = http_client.get(url, [], body_format: :binary) + @doc """ + Attempts to determine the season and episode number from a media filepath. - body + Returns {:ok, {binary(), binary()}} | {:error, :indeterminable} + """ + def season_and_episode_from_media_filepath(media_filepath) do + # matches s + 1 or more digits + e + 1 or more digits (case-insensitive) + season_episode_regex = ~r/s(\d+)e(\d+)/i + + case Regex.scan(season_episode_regex, media_filepath) do + [[_, season, episode] | _] -> {:ok, {season, episode}} + _ -> {:error, :indeterminable} + end end defp generate_filepath_for(database_record, filename) do diff --git a/lib/pinchflat/metadata/metadata_parser.ex b/lib/pinchflat/metadata/metadata_parser.ex index 1173001..60456d8 100644 --- a/lib/pinchflat/metadata/metadata_parser.ex +++ b/lib/pinchflat/metadata/metadata_parser.ex @@ -8,6 +8,8 @@ defmodule Pinchflat.Metadata.MetadataParser do and not have it, ya know? """ + alias Pinchflat.YtDlp.Media, as: YtDlpMedia + @doc """ Parses the given JSON response from yt-dlp and returns a map of the needful media_item attributes, along with anything needed for @@ -24,15 +26,12 @@ defmodule Pinchflat.Metadata.MetadataParser do end defp parse_media_metadata(metadata) do - %{ - media_id: metadata["id"], - title: metadata["title"], - original_url: metadata["original_url"], - description: metadata["description"], - media_filepath: metadata["filepath"], - livestream: metadata["was_live"], - duration_seconds: metadata["duration"] && round(metadata["duration"]) - } + Map.merge( + Map.from_struct(YtDlpMedia.response_to_struct(metadata)), + %{ + media_filepath: metadata["filepath"] + } + ) end defp parse_subtitle_metadata(metadata) do diff --git a/lib/pinchflat/metadata/nfo_builder.ex b/lib/pinchflat/metadata/nfo_builder.ex index ad42331..38a26dd 100644 --- a/lib/pinchflat/metadata/nfo_builder.ex +++ b/lib/pinchflat/metadata/nfo_builder.ex @@ -6,8 +6,8 @@ defmodule Pinchflat.Metadata.NfoBuilder do import Pinchflat.Utils.XmlUtils, only: [safe: 1] + alias Pinchflat.Utils.FilesystemUtils alias Pinchflat.Metadata.MetadataFileHelpers - alias Pinchflat.Filesystem.FilesystemHelpers @doc """ Builds an NFO file for a media item (read: single "episode") and @@ -15,12 +15,12 @@ defmodule Pinchflat.Metadata.NfoBuilder do Returns the filepath of the NFO file. """ - def build_and_store_for_media_item(filepath, metadata) do - nfo = build_for_media_item(metadata) + def build_and_store_for_media_item(nfo_filepath, metadata) do + nfo = build_for_media_item(nfo_filepath, metadata) - FilesystemHelpers.write_p!(filepath, nfo) + FilesystemUtils.write_p!(nfo_filepath, nfo) - filepath + nfo_filepath end @doc """ @@ -32,15 +32,20 @@ defmodule Pinchflat.Metadata.NfoBuilder do def build_and_store_for_source(filepath, metadata) do nfo = build_for_source(metadata) - FilesystemHelpers.write_p!(filepath, nfo) + FilesystemUtils.write_p!(filepath, nfo) filepath end - defp build_for_media_item(metadata) do + defp build_for_media_item(nfo_filepath, metadata) do upload_date = MetadataFileHelpers.parse_upload_date(metadata["upload_date"]) + # NOTE: the filepath here isn't the path of the media item, it's the path that + # the NFO should be saved to. This works because the NFO's path is the same as + # the media's path, just with a different extension. If this ever changes I'll + # need to pass in the media item's path as well. + {season, episode} = determine_season_and_episode_number(nfo_filepath, upload_date) + # Cribbed from a combination of the Kodi wiki, ytdl-nfo, and ytdl-sub. - # WHO NEEDS A FANCY XML PARSER ANYWAY?! """ @@ -49,8 +54,8 @@ defmodule Pinchflat.Metadata.NfoBuilder do #{safe(metadata["id"])} #{safe(metadata["description"])} #{safe(upload_date)} - #{safe(upload_date.year)} - #{Calendar.strftime(upload_date, "%m%d")} + #{safe(season)} + #{episode} YouTube """ @@ -67,4 +72,11 @@ defmodule Pinchflat.Metadata.NfoBuilder do """ end + + defp determine_season_and_episode_number(filepath, upload_date) do + case MetadataFileHelpers.season_and_episode_from_media_filepath(filepath) do + {:ok, {season, episode}} -> {season, episode} + {:error, _} -> {upload_date.year, Calendar.strftime(upload_date, "%m%d")} + end + end end diff --git a/lib/pinchflat/metadata/source_image_parser.ex b/lib/pinchflat/metadata/source_image_parser.ex index a207462..26bacd7 100644 --- a/lib/pinchflat/metadata/source_image_parser.ex +++ b/lib/pinchflat/metadata/source_image_parser.ex @@ -2,7 +2,7 @@ defmodule Pinchflat.Metadata.SourceImageParser do @moduledoc """ Functions for parsing and storing source images. """ - alias Pinchflat.Filesystem.FilesystemHelpers + alias Pinchflat.Utils.FilesystemUtils @doc """ Given a base directory and source metadata, look for the appropriate images @@ -20,29 +20,49 @@ defmodule Pinchflat.Metadata.SourceImageParser do def store_source_images(base_directory, source_metadata) do (source_metadata["thumbnails"] || []) |> Enum.filter(&(&1["filepath"] != nil)) - |> select_useful_images() + |> select_useful_images(source_metadata) |> Enum.map(&move_image(&1, base_directory)) |> Enum.into(%{}) end - defp select_useful_images(images) do + defp select_useful_images(images, source_metadata) do labelled_images = - Enum.reduce(images, [], fn image_map, acc -> + Enum.reduce(images, %{}, fn image_map, acc -> case image_map do - %{"id" => "avatar_uncropped"} -> - acc ++ [{:poster, :poster_filepath, image_map["filepath"]}] - - %{"id" => "banner_uncropped"} -> - acc ++ [{:fanart, :fanart_filepath, image_map["filepath"]}] - - _ -> - acc + %{"id" => "avatar_uncropped"} -> put_image_key(acc, :poster, image_map["filepath"]) + %{"id" => "banner_uncropped"} -> put_image_key(acc, :fanart, image_map["filepath"]) + _ -> acc end end) labelled_images - |> Enum.concat([{:banner, :banner_filepath, determine_best_banner(images)}]) - |> Enum.filter(fn {_, _, tmp_filepath} -> tmp_filepath end) + |> add_fallback_poster(source_metadata) + |> put_image_key(:banner, determine_best_banner(images)) + |> Enum.filter(fn {_key, attrs} -> attrs.current_filepath end) + end + + # If a poster is set, short-circuit and return the images as-is + defp add_fallback_poster(%{poster: _} = images, _), do: images + + # If a poster is NOT set, see if we can find a suitable image to use as a fallback + defp add_fallback_poster(images, source_metadata) do + case source_metadata["entries"] do + nil -> images + [] -> images + [first_entry | _] -> add_poster_from_entry_thumbnail(images, first_entry) + end + end + + defp add_poster_from_entry_thumbnail(images, entry) do + thumbnail = + (entry["thumbnails"] || []) + |> Enum.reverse() + |> Enum.find(& &1["filepath"]) + + case thumbnail do + nil -> images + _ -> put_image_key(images, :poster, thumbnail["filepath"]) + end end defp determine_best_banner(images) do @@ -58,12 +78,22 @@ defmodule Pinchflat.Metadata.SourceImageParser do Map.get(best_candidate || %{}, "filepath") end - defp move_image({filename, source_attr_name, tmp_filepath}, base_directory) do - extension = Path.extname(tmp_filepath) - final_filepath = Path.join([base_directory, "#{filename}#{extension}"]) + defp move_image({_key, attrs}, base_directory) do + extension = Path.extname(attrs.current_filepath) + final_filepath = Path.join([base_directory, "#{attrs.final_filename}#{extension}"]) - FilesystemHelpers.cp_p!(tmp_filepath, final_filepath) + FilesystemUtils.cp_p!(attrs.current_filepath, final_filepath) - {source_attr_name, final_filepath} + {attrs.attribute_name, final_filepath} + end + + defp put_image_key(map, key, image) do + attribute_atom = String.to_existing_atom("#{key}_filepath") + + Map.put(map, key, %{ + attribute_name: attribute_atom, + final_filename: to_string(key), + current_filepath: image + }) end end diff --git a/lib/pinchflat/metadata/source_metadata_storage_worker.ex b/lib/pinchflat/metadata/source_metadata_storage_worker.ex index eda6297..feb793b 100644 --- a/lib/pinchflat/metadata/source_metadata_storage_worker.ex +++ b/lib/pinchflat/metadata/source_metadata_storage_worker.ex @@ -3,7 +3,7 @@ defmodule Pinchflat.Metadata.SourceMetadataStorageWorker do use Oban.Worker, queue: :remote_metadata, - tags: ["media_source", "source_metadata", "remote_metadata"], + tags: ["media_source", "source_metadata", "remote_metadata", "show_in_dashboard"], max_attempts: 3 require Logger @@ -38,9 +38,9 @@ defmodule Pinchflat.Metadata.SourceMetadataStorageWorker do - The NFO file for the source (if specified) - Downloads and stores source images (if specified) - The worker is kicked off after a source is inserted/updated - this can - take an unknown amount of time so don't rely on this data being here - before, say, the first indexing or downloading task is complete. + The worker is kicked off after a source is inserted or it's original_url + is updated - this can take an unknown amount of time so don't rely on this + data being here before, say, the first indexing or downloading task is complete. Returns :ok """ @@ -77,10 +77,8 @@ defmodule Pinchflat.Metadata.SourceMetadataStorageWorker do defp fetch_source_metadata_and_images(series_directory, source) do metadata_directory = MetadataFileHelpers.metadata_directory_for(source) - tmp_output_path = "#{tmp_directory()}/#{StringUtils.random_string(16)}/source_image.%(ext)S" - opts = [:write_all_thumbnails, convert_thumbnails: "jpg", output: tmp_output_path] - {:ok, metadata} = MediaCollection.get_source_metadata(source.original_url, opts) + {:ok, metadata} = fetch_metadata_for_source(source) metadata_image_attrs = SourceImageParser.store_source_images(metadata_directory, metadata) if source.media_profile.download_source_images && series_directory do @@ -94,7 +92,9 @@ defmodule Pinchflat.Metadata.SourceMetadataStorageWorker do defp determine_series_directory(source) do output_path = DownloadOptionBuilder.build_output_path_for(source) - {:ok, %{filepath: filepath}} = MediaCollection.get_source_details(source.original_url, output: output_path) + runner_opts = [output: output_path] + addl_opts = [use_cookies: Sources.use_cookies?(source, :metadata)] + {:ok, %{filepath: filepath}} = MediaCollection.get_source_details(source.original_url, runner_opts, addl_opts) case MetadataFileHelpers.series_directory_from_media_filepath(filepath) do {:ok, series_directory} -> series_directory @@ -110,6 +110,21 @@ defmodule Pinchflat.Metadata.SourceMetadataStorageWorker do end end + defp fetch_metadata_for_source(source) do + tmp_output_path = "#{tmp_directory()}/#{StringUtils.random_string(16)}/source_image.%(ext)S" + base_opts = [convert_thumbnails: "jpg", output: tmp_output_path] + should_use_cookies = Sources.use_cookies?(source, :metadata) + + opts = + if source.collection_type == :channel do + base_opts ++ [:write_all_thumbnails, playlist_items: 0] + else + base_opts ++ [:write_thumbnail, playlist_items: 1] + end + + MediaCollection.get_source_metadata(source.original_url, opts, use_cookies: should_use_cookies) + end + defp tmp_directory do Application.get_env(:pinchflat, :tmpfile_directory) end diff --git a/lib/pinchflat/podcasts/opml_feed_builder.ex b/lib/pinchflat/podcasts/opml_feed_builder.ex new file mode 100644 index 0000000..c764a66 --- /dev/null +++ b/lib/pinchflat/podcasts/opml_feed_builder.ex @@ -0,0 +1,40 @@ +defmodule Pinchflat.Podcasts.OpmlFeedBuilder do + @moduledoc """ + Methods for building an OPML feed for a list of sources. + """ + + import Pinchflat.Utils.XmlUtils, only: [safe: 1] + + alias PinchflatWeb.Router.Helpers, as: Routes + + @doc """ + Builds an OPML feed for a given list of sources. + + Returns an XML document as a string. + """ + def build(url_base, sources) do + sources_xml = + Enum.map( + sources, + &""" + + """ + ) + + """ + + + + All Sources + + + #{Enum.join(sources_xml, "\n")} + + + """ + end + + defp source_route(url_base, source) do + Path.join(url_base, "#{Routes.podcast_path(PinchflatWeb.Endpoint, :rss_feed, source.uuid)}.xml") + end +end diff --git a/lib/pinchflat/podcasts/podcast_helpers.ex b/lib/pinchflat/podcasts/podcast_helpers.ex index 3353070..30ba17a 100644 --- a/lib/pinchflat/podcasts/podcast_helpers.ex +++ b/lib/pinchflat/podcasts/podcast_helpers.ex @@ -4,11 +4,26 @@ defmodule Pinchflat.Podcasts.PodcastHelpers do or its media items """ + use Pinchflat.Media.MediaQuery + use Pinchflat.Sources.SourcesQuery + alias Pinchflat.Repo - alias Pinchflat.Media.MediaQuery alias Pinchflat.Metadata.MediaMetadata alias Pinchflat.Metadata.SourceMetadata + @doc """ + Returns a list of sources that are not marked for deletion. + + Returns: [%Source{}] + """ + def opml_sources() do + SourcesQuery.new() + |> select([s], %{custom_name: s.custom_name, uuid: s.uuid}) + |> where([s], is_nil(s.marked_for_deletion_at)) + |> order_by(asc: :custom_name) + |> Repo.all() + end + @doc """ Returns a list of media items that have been downloaded to disk and have been proven to still exist there. @@ -23,11 +38,11 @@ defmodule Pinchflat.Podcasts.PodcastHelpers do Returns: [%MediaItem{}] """ def persisted_media_items_for(source, opts \\ []) do - limit = Keyword.get(opts, :limit, 500) + limit = Keyword.get(opts, :limit, 1_000) MediaQuery.new() - |> MediaQuery.for_source(source) - |> MediaQuery.with_media_filepath() + |> where(^dynamic(^MediaQuery.for_source(source) and ^MediaQuery.downloaded())) + |> order_by(desc: :uploaded_at) |> Repo.maybe_limit(limit) |> Repo.all() |> Enum.filter(fn media_item -> File.exists?(media_item.media_filepath) end) diff --git a/lib/pinchflat/podcasts/rss_feed_builder.ex b/lib/pinchflat/podcasts/rss_feed_builder.ex index 5e23edd..d5a5aea 100644 --- a/lib/pinchflat/podcasts/rss_feed_builder.ex +++ b/lib/pinchflat/podcasts/rss_feed_builder.ex @@ -7,7 +7,6 @@ defmodule Pinchflat.Podcasts.RssFeedBuilder do import Pinchflat.Utils.XmlUtils, only: [safe: 1] - alias Pinchflat.Utils.DatetimeUtils alias Pinchflat.Podcasts.PodcastHelpers alias PinchflatWeb.Router.Helpers, as: Routes @@ -16,12 +15,12 @@ defmodule Pinchflat.Podcasts.RssFeedBuilder do Only MediaItems that have been persisted will be included in the feed. ## Options: - - `:limit` - The maximum number of media items to include in the feed. Defaults to 300. + - `:limit` - The maximum number of media items to include in the feed. Defaults to 2,000. Returns an XML document as a string. """ def build(source, opts \\ []) do - limit = Keyword.get(opts, :limit, 300) + limit = Keyword.get(opts, :limit, 2_000) url_base = Keyword.get(opts, :url_base, PinchflatWeb.Endpoint.url()) media_items = PodcastHelpers.persisted_media_items_for(source, limit: limit) @@ -75,13 +74,15 @@ defmodule Pinchflat.Podcasts.RssFeedBuilder do end defp build_media_item_xml(source, media_item, url_base) do + item_image_path = item_image_path(url_base, media_item) + """ #{media_item.uuid} #{safe(media_item.title)} #{safe(media_item.original_url)} #{safe(media_item.description)} - #{generate_upload_date(media_item)} + #{Calendar.strftime(media_item.uploaded_at, @datetime_format)} #{media_item.duration_seconds} #{safe(source.custom_name)} #{safe(media_item.title)} + + #{item_image_path && ~s()} + #{item_image_path && ~s()} + false """ @@ -117,10 +122,14 @@ defmodule Pinchflat.Podcasts.RssFeedBuilder do end end - defp generate_upload_date(media_item) do - media_item.upload_date - |> DatetimeUtils.date_to_datetime() - |> Calendar.strftime(@datetime_format) + def item_image_path(url_base, media_item) do + if media_item.thumbnail_filepath && File.exists?(media_item.thumbnail_filepath) do + extension = Path.extname(media_item.thumbnail_filepath) + + Path.join(url_base, "#{podcast_route(:episode_image, media_item.uuid)}#{extension}") + else + nil + end end defp podcast_route(action, params) do diff --git a/lib/pinchflat/profiles/media_profile.ex b/lib/pinchflat/profiles/media_profile.ex index 57a3b7f..90c93d6 100644 --- a/lib/pinchflat/profiles/media_profile.ex +++ b/lib/pinchflat/profiles/media_profile.ex @@ -6,6 +6,7 @@ defmodule Pinchflat.Profiles.MediaProfile do use Ecto.Schema import Ecto.Changeset + alias __MODULE__ alias Pinchflat.Sources.Source @allowed_fields ~w( @@ -25,13 +26,18 @@ defmodule Pinchflat.Profiles.MediaProfile do sponsorblock_categories shorts_behaviour livestream_behaviour + audio_track preferred_resolution + media_container + redownload_delay_days + marked_for_deletion_at )a @required_fields ~w(name output_path_template)a schema "media_profiles" do field :name, :string + field :redownload_delay_days, :integer field :output_path_template, :string, default: "/{{ source_custom_name }}/{{ upload_yyyy_mm_dd }} {{ title }}/{{ title }} [{{ id }}].{{ ext }}" @@ -49,7 +55,7 @@ defmodule Pinchflat.Profiles.MediaProfile do field :embed_metadata, :boolean, default: false field :download_nfo, :boolean, default: false - field :sponsorblock_behaviour, Ecto.Enum, values: [:disabled, :remove], default: :disabled + field :sponsorblock_behaviour, Ecto.Enum, values: [:disabled, :mark, :remove], default: :disabled field :sponsorblock_categories, {:array, :string}, default: [] # NOTE: these do NOT speed up indexing - the indexer still has to go # through the entire collection to determine if a media is a short or @@ -60,8 +66,11 @@ defmodule Pinchflat.Profiles.MediaProfile do # See `build_format_clauses` in the Media context for more. field :shorts_behaviour, Ecto.Enum, values: ~w(include exclude only)a, default: :include field :livestream_behaviour, Ecto.Enum, values: ~w(include exclude only)a, default: :include + field :audio_track, :string + field :preferred_resolution, Ecto.Enum, values: ~w(4320p 2160p 1440p 1080p 720p 480p 360p audio)a, default: :"1080p" + field :media_container, :string, default: nil - field :preferred_resolution, Ecto.Enum, values: ~w(2160p 1080p 720p 480p 360p audio)a, default: :"1080p" + field :marked_for_deletion_at, :utc_datetime has_many :sources, Source @@ -75,10 +84,25 @@ defmodule Pinchflat.Profiles.MediaProfile do |> validate_required(@required_fields) # Ensures it ends with `.{{ ext }}` or `.%(ext)s` or similar (with a little wiggle room) |> validate_format(:output_path_template, ext_regex(), message: "must end with .{{ ext }}") + |> validate_number(:redownload_delay_days, greater_than_or_equal_to: 0) |> unique_constraint(:name) end - defp ext_regex do + @doc false + def ext_regex do ~r/\.({{ ?ext ?}}|%\( ?ext ?\)[sS])$/ end + + @doc false + def json_exluded_fields do + ~w(__meta__ __struct__ sources)a + end + + defimpl Jason.Encoder, for: MediaProfile do + def encode(value, opts) do + value + |> Map.drop(MediaProfile.json_exluded_fields()) + |> Jason.Encode.map(opts) + end + end end diff --git a/lib/pinchflat/profiles/media_profile_deletion_worker.ex b/lib/pinchflat/profiles/media_profile_deletion_worker.ex new file mode 100644 index 0000000..230a085 --- /dev/null +++ b/lib/pinchflat/profiles/media_profile_deletion_worker.ex @@ -0,0 +1,38 @@ +defmodule Pinchflat.Profiles.MediaProfileDeletionWorker do + @moduledoc false + + use Oban.Worker, + queue: :local_data, + tags: ["media_profiles", "local_data"] + + require Logger + + alias __MODULE__ + alias Pinchflat.Profiles + + @doc """ + Starts the profile deletion worker. Does not attach it to a task like `kickoff_with_task/2` + since deletion also cancels all tasks for the profile + + Returns {:ok, %Oban.Job{}} | {:error, %Ecto.Changeset{}} + """ + def kickoff(profile, job_args \\ %{}, job_opts \\ []) do + %{id: profile.id} + |> Map.merge(job_args) + |> MediaProfileDeletionWorker.new(job_opts) + |> Oban.insert() + end + + @doc """ + Deletes a profile and optionally deletes its files + + Returns :ok + """ + @impl Oban.Worker + def perform(%Oban.Job{args: %{"id" => profile_id} = args}) do + delete_files = Map.get(args, "delete_files", false) + profile = Profiles.get_media_profile!(profile_id) + + Profiles.delete_media_profile(profile, delete_files: delete_files) + end +end diff --git a/lib/pinchflat/profiles/profiles_query.ex b/lib/pinchflat/profiles/profiles_query.ex new file mode 100644 index 0000000..caa1315 --- /dev/null +++ b/lib/pinchflat/profiles/profiles_query.ex @@ -0,0 +1,29 @@ +defmodule Pinchflat.Profiles.ProfilesQuery do + @moduledoc """ + Query helpers for the Profiles context. + + These methods are made to be one-ish liners used + to compose queries. Each method should strive to do + _one_ thing. These don't need to be tested as + they are just building blocks for other functionality + which, itself, will be tested. + """ + import Ecto.Query, warn: false + + alias Pinchflat.Profiles.MediaProfile + + # This allows the module to be aliased and query methods to be used + # all in one go + # usage: use Pinchflat.Profiles.ProfilesQuery + defmacro __using__(_opts) do + quote do + import Ecto.Query, warn: false + + alias unquote(__MODULE__) + end + end + + def new do + MediaProfile + end +end diff --git a/lib/pinchflat/prom_ex.ex b/lib/pinchflat/prom_ex.ex new file mode 100644 index 0000000..a46347d --- /dev/null +++ b/lib/pinchflat/prom_ex.ex @@ -0,0 +1,40 @@ +defmodule Pinchflat.PromEx do + @moduledoc """ + Configuration for the PromEx library which provides Prometheus metrics + """ + + use PromEx, otp_app: :pinchflat + + alias PromEx.Plugins + + @impl true + def plugins do + [ + Plugins.Application, + Plugins.Beam, + {Plugins.Phoenix, router: PinchflatWeb.Router, endpoint: PinchflatWeb.Endpoint}, + Plugins.Ecto, + Plugins.Oban, + Plugins.PhoenixLiveView + ] + end + + @impl true + def dashboard_assigns do + [ + default_selected_interval: "30s" + ] + end + + @impl true + def dashboards do + [ + {:prom_ex, "application.json"}, + {:prom_ex, "beam.json"}, + {:prom_ex, "phoenix.json"}, + {:prom_ex, "ecto.json"}, + {:prom_ex, "oban.json"}, + {:prom_ex, "phoenix_live_view.json"} + ] + end +end diff --git a/lib/pinchflat/release.ex b/lib/pinchflat/release.ex index 7648efe..c6060c3 100644 --- a/lib/pinchflat/release.ex +++ b/lib/pinchflat/release.ex @@ -7,7 +7,7 @@ defmodule Pinchflat.Release do require Logger - alias Pinchflat.Filesystem.FilesystemHelpers + alias Pinchflat.Utils.FilesystemUtils def migrate do load_app() @@ -26,20 +26,25 @@ defmodule Pinchflat.Release do load_app() directories = - Enum.uniq([ + [ "/config", "/downloads", + "/etc/yt-dlp", + "/etc/yt-dlp/plugins", Application.get_env(:pinchflat, :media_directory), Application.get_env(:pinchflat, :tmpfile_directory), Application.get_env(:pinchflat, :extras_directory), - Application.get_env(:pinchflat, :metadata_directory) - ]) + Application.get_env(:pinchflat, :metadata_directory), + Application.get_env(:tzdata, :data_dir) + ] + |> Enum.uniq() + |> Enum.filter(&(&1 != nil)) Enum.each(directories, fn dir -> Logger.info("Checking permissions for #{dir}") filepath = Path.join([dir, ".keep"]) - case FilesystemHelpers.write_p(filepath, "") do + case FilesystemUtils.write_p(filepath, "") do :ok -> Logger.info("Permissions OK") diff --git a/lib/pinchflat/settings/setting.ex b/lib/pinchflat/settings/setting.ex index 5fb7cf6..f2a6b0a 100644 --- a/lib/pinchflat/settings/setting.ex +++ b/lib/pinchflat/settings/setting.ex @@ -9,18 +9,40 @@ defmodule Pinchflat.Settings.Setting do @allowed_fields [ :onboarding, :pro_enabled, - :yt_dlp_version + :yt_dlp_version, + :apprise_version, + :apprise_server, + :video_codec_preference, + :audio_codec_preference, + :youtube_api_key, + :extractor_sleep_interval_seconds, + :download_throughput_limit, + :restrict_filenames ] - @required_fields ~w( - onboarding - pro_enabled - )a + @required_fields [ + :onboarding, + :pro_enabled, + :video_codec_preference, + :audio_codec_preference, + :extractor_sleep_interval_seconds + ] schema "settings" do field :onboarding, :boolean, default: true field :pro_enabled, :boolean, default: false field :yt_dlp_version, :string + field :apprise_version, :string + field :apprise_server, :string + field :youtube_api_key, :string + field :route_token, :string + field :extractor_sleep_interval_seconds, :integer, default: 0 + # This is a string because it accepts values like "100K" or "4.2M" + field :download_throughput_limit, :string + field :restrict_filenames, :boolean, default: false + + field :video_codec_preference, :string + field :audio_codec_preference, :string end @doc false @@ -28,5 +50,6 @@ defmodule Pinchflat.Settings.Setting do setting |> cast(attrs, @allowed_fields) |> validate_required(@required_fields) + |> validate_number(:extractor_sleep_interval_seconds, greater_than_or_equal_to: 0) end end diff --git a/lib/pinchflat/settings/settings.ex b/lib/pinchflat/settings/settings.ex index 4b133c8..25a8d68 100644 --- a/lib/pinchflat/settings/settings.ex +++ b/lib/pinchflat/settings/settings.ex @@ -20,6 +20,17 @@ defmodule Pinchflat.Settings do |> Repo.one() end + @doc """ + Updates the setting record. + + Returns {:ok, %Setting{}} | {:error, %Ecto.Changeset{}} + """ + def update_setting(%Setting{} = setting, attrs) do + setting + |> Setting.changeset(attrs) + |> Repo.update() + end + @doc """ Updates a setting, returning the new value. Is setup to take a keyword list argument so you @@ -29,8 +40,7 @@ defmodule Pinchflat.Settings do """ def set([{attr, value}]) do record() - |> Setting.changeset(%{attr => value}) - |> Repo.update() + |> update_setting(%{attr => value}) |> case do {:ok, %{^attr => _}} -> {:ok, value} {:ok, _} -> {:error, :invalid_key} @@ -61,4 +71,11 @@ defmodule Pinchflat.Settings do {:error, _} -> raise "Setting `#{name}` not found" end end + + @doc """ + Returns `%Ecto.Changeset{}` + """ + def change_setting(%Setting{} = setting, attrs \\ %{}) do + Setting.changeset(setting, attrs) + end end diff --git a/lib/pinchflat/slow_indexing/file_follower_server.ex b/lib/pinchflat/slow_indexing/file_follower_server.ex index 91c514d..655a6da 100644 --- a/lib/pinchflat/slow_indexing/file_follower_server.ex +++ b/lib/pinchflat/slow_indexing/file_follower_server.ex @@ -106,7 +106,7 @@ defmodule Pinchflat.SlowIndexing.FileFollowerServer do {:noreply, %{state | last_activity: DateTime.utc_now()}} :eof -> - Logger.debug("EOF reached, waiting before trying to read new lines") + Logger.debug("Current batch of media processed. Will check again in #{@poll_interval_ms}ms") Process.send_after(self(), :read_new_lines, @poll_interval_ms) {:noreply, state} diff --git a/lib/pinchflat/slow_indexing/media_collection_indexing_worker.ex b/lib/pinchflat/slow_indexing/media_collection_indexing_worker.ex index 70f9c2e..ae555ff 100644 --- a/lib/pinchflat/slow_indexing/media_collection_indexing_worker.ex +++ b/lib/pinchflat/slow_indexing/media_collection_indexing_worker.ex @@ -4,16 +4,18 @@ defmodule Pinchflat.SlowIndexing.MediaCollectionIndexingWorker do use Oban.Worker, queue: :media_collection_indexing, unique: [period: :infinity, states: [:available, :scheduled, :retryable]], - tags: ["media_source", "media_collection_indexing"] + tags: ["media_source", "media_collection_indexing", "show_in_dashboard"] require Logger alias __MODULE__ alias Pinchflat.Tasks alias Pinchflat.Sources + alias Pinchflat.Settings alias Pinchflat.Sources.Source alias Pinchflat.FastIndexing.FastIndexingWorker alias Pinchflat.SlowIndexing.SlowIndexingHelpers + alias Pinchflat.Lifecycle.Notifications.SourceNotifications @doc """ Starts the source slow indexing worker and creates a task for the source. @@ -59,9 +61,8 @@ defmodule Pinchflat.SlowIndexing.MediaCollectionIndexingWorker do 5. If the source uses fast indexing, that job is kicked off as well. It uses RSS to run a smaller, faster, and more frequent index. That job handles rescheduling itself but largely has a similar behaviour to this - job in that it kicks off index and maybe download jobs. The biggest difference - is that an index job is kicked off _for each new media item_ as opposed - to one larger index job. Check out `MediaIndexingWorker` comments for more. + job in that it runs and index and maybe kicks off media download jobs. + Check out `FastIndexingWorker` comments for more. 6. If the job reschedules, the cycle from step 3 repeats until the heat death of the universe. The user changing things like the index frequency can dequeue or reschedule jobs as well @@ -78,21 +79,21 @@ defmodule Pinchflat.SlowIndexing.MediaCollectionIndexingWorker do case {source.index_frequency_minutes, source.last_indexed_at} do {index_freq, _} when index_freq > 0 -> # If the indexing is on a schedule simply run indexing and reschedule - SlowIndexingHelpers.index_and_enqueue_download_for_media_items(source) + perform_indexing_and_notification(source, was_forced: args["force"]) maybe_enqueue_fast_indexing_task(source) reschedule_indexing(source) {_, nil} -> # If the source has never been indexed, index it once # even if it's not meant to reschedule - SlowIndexingHelpers.index_and_enqueue_download_for_media_items(source) + perform_indexing_and_notification(source, was_forced: args["force"]) :ok _ -> # If the source HAS been indexed and is not meant to reschedule, # perform a no-op (unless forced) if args["force"] do - SlowIndexingHelpers.index_and_enqueue_download_for_media_items(source) + perform_indexing_and_notification(source, was_forced: true) end :ok @@ -102,6 +103,14 @@ defmodule Pinchflat.SlowIndexing.MediaCollectionIndexingWorker do Ecto.StaleEntryError -> Logger.info("#{__MODULE__} discarded: source #{source_id} stale") end + defp perform_indexing_and_notification(source, indexing_opts) do + apprise_server = Settings.get!(:apprise_server) + + SourceNotifications.wrap_new_media_notification(apprise_server, source, fn -> + SlowIndexingHelpers.index_and_enqueue_download_for_media_items(source, indexing_opts) + end) + end + defp reschedule_indexing(source) do next_run_in = source.index_frequency_minutes * 60 diff --git a/lib/pinchflat/slow_indexing/slow_indexing_helpers.ex b/lib/pinchflat/slow_indexing/slow_indexing_helpers.ex index c95306c..8721b21 100644 --- a/lib/pinchflat/slow_indexing/slow_indexing_helpers.ex +++ b/lib/pinchflat/slow_indexing/slow_indexing_helpers.ex @@ -5,6 +5,8 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do Many of these methods are made to be kickoff or be consumed by workers. """ + use Pinchflat.Media.MediaQuery + require Logger alias Pinchflat.Repo @@ -14,32 +16,52 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do alias Pinchflat.Sources.Source alias Pinchflat.Media.MediaItem alias Pinchflat.YtDlp.MediaCollection + alias Pinchflat.Utils.FilesystemUtils alias Pinchflat.Downloading.DownloadingHelpers alias Pinchflat.SlowIndexing.FileFollowerServer - alias Pinchflat.Downloading.MediaDownloadWorker + alias Pinchflat.Downloading.DownloadOptionBuilder alias Pinchflat.SlowIndexing.MediaCollectionIndexingWorker alias Pinchflat.YtDlp.Media, as: YtDlpMedia @doc """ - Starts tasks for indexing a source's media regardless of the source's indexing - frequency. It's assumed the caller will check for indexing frequency. + Kills old indexing tasks and starts a new task to index the media collection. - Returns {:ok, %Task{}}. + The job is delayed based on the source's `index_frequency_minutes` setting unless + one of the following is true: + - The `force` option is set to true + - The source has never been indexed before + - The source has been indexed before, but the last indexing job was more than + `index_frequency_minutes` ago + + Returns {:ok, %Task{}} """ def kickoff_indexing_task(%Source{} = source, job_args \\ %{}, job_opts \\ []) do - Tasks.delete_pending_tasks_for(source, "FastIndexingWorker") - Tasks.delete_pending_tasks_for(source, "MediaIndexingWorker") - Tasks.delete_pending_tasks_for(source, "MediaCollectionIndexingWorker") + job_offset_seconds = if job_args[:force], do: 0, else: calculate_job_offset_seconds(source) - MediaCollectionIndexingWorker.kickoff_with_task(source, job_args, job_opts) + Tasks.delete_pending_tasks_for(source, "MediaCollectionIndexingWorker", include_executing: true) + + MediaCollectionIndexingWorker.kickoff_with_task(source, job_args, job_opts ++ [schedule_in: job_offset_seconds]) + end + + @doc """ + A helper method to delete all indexing-related tasks for a source. + Optionally, you can include executing tasks in the deletion process. + + Returns :ok + """ + def delete_indexing_tasks(%Source{} = source, opts \\ []) do + include_executing = Keyword.get(opts, :include_executing, false) + + Tasks.delete_pending_tasks_for(source, "FastIndexingWorker", include_executing: include_executing) + Tasks.delete_pending_tasks_for(source, "MediaCollectionIndexingWorker", include_executing: include_executing) end @doc """ Given a media source, creates (indexes) the media by creating media_items for each media ID in the source. Afterward, kicks off a download task for each pending media - item belonging to the source. You can't tell me the method name isn't descriptive! - Returns a list of media items or changesets (if the media item couldn't be created). + item belonging to the source. Returns a list of media items or changesets + (if the media item couldn't be created). Indexing is slow and usually returns a list of all media data at once for record creation. To help with this, we use a file follower to watch the file that yt-dlp writes to @@ -47,20 +69,33 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do clarity to the user experience. This has a few things to be aware of which are documented below in the file watcher setup method. + Additionally, in the case of a repeat index we create a download archive file that + contains some media IDs that we've indexed in the past. Note that this archive doesn't + contain the most recent IDs but rather a subset of IDs that are offset by some amount. + Practically, this means that we'll re-index a small handful of media that we've recently + indexed, but this is a good thing since it'll let us pick up on any recent changes to the + most recent media items. + + We don't create a download archive for playlists (only channels), nor do we create one if + the indexing was forced by the user. + NOTE: downloads are only enqueued if the source is set to download media. Downloads are also enqueued for ALL pending media items, not just the ones that were indexed in this job run. This should ensure that any stragglers are caught if, for some reason, they weren't enqueued or somehow got de-queued. - Since indexing returns all media data EVERY TIME, we that that opportunity to update - indexing metadata for media items that have already been created. + Available options: + - `was_forced`: Whether the indexing was forced by the user Returns [%MediaItem{} | %Ecto.Changeset{}] """ - def index_and_enqueue_download_for_media_items(%Source{} = source) do + def index_and_enqueue_download_for_media_items(%Source{} = source, opts \\ []) do + # The media_profile is needed to determine the quality options to _then_ determine a more + # accurate predicted filepath + source = Repo.preload(source, [:media_profile]) # See the method definition below for more info on how file watchers work # (important reading if you're not familiar with it) - {:ok, media_attributes} = get_media_attributes_for_collection_and_setup_file_watcher(source) + {:ok, media_attributes} = setup_file_watcher_and_kickoff_indexing(source, opts) # Reload because the source may have been updated during the (long-running) indexing process # and important settings like `download_media` may have changed. source = Repo.reload!(source) @@ -84,19 +119,28 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do # lines (ie: you should gracefully fail if you can't parse a line). # # This works in-tandem with the normal (blocking) media indexing behaviour. When - # the `get_media_attributes_for_collection` method completes it'll return the FULL result to - # the caller for parsing. Ideally, every item in the list will have already + # the `setup_file_watcher_and_kickoff_indexing` method completes it'll return the + # FULL result to the caller for parsing. Ideally, every item in the list will have already # been processed by the file follower, but if not, the caller handles creation # of any media items that were missed/initially failed. # # It attempts a graceful shutdown of the file follower after the indexing is done, # but the FileFollowerServer will also stop itself if it doesn't see any activity # for a sufficiently long time. - defp get_media_attributes_for_collection_and_setup_file_watcher(source) do + defp setup_file_watcher_and_kickoff_indexing(source, opts) do + was_forced = Keyword.get(opts, :was_forced, false) {:ok, pid} = FileFollowerServer.start_link() handler = fn filepath -> setup_file_follower_watcher(pid, filepath, source) end - result = MediaCollection.get_media_attributes_for_collection(source.original_url, file_listener_handler: handler) + should_use_cookies = Sources.use_cookies?(source, :indexing) + + command_opts = + [output: DownloadOptionBuilder.build_output_path_for(source)] ++ + DownloadOptionBuilder.build_quality_options_for(source) ++ + build_download_archive_options(source, was_forced) + + runner_opts = [file_listener_handler: handler, use_cookies: should_use_cookies] + result = MediaCollection.get_media_attributes_for_collection(source.original_url, command_opts, runner_opts) FileFollowerServer.stop(pid) @@ -127,14 +171,74 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do case Media.create_media_item_from_backend_attrs(source, media_attrs) do {:ok, %MediaItem{} = media_item} -> - if source.download_media && Media.pending_download?(media_item) do - Logger.debug("FileFollowerServer Handler: Enqueuing download task for #{inspect(media_attrs)}") - - MediaDownloadWorker.kickoff_with_task(media_item) - end + DownloadingHelpers.kickoff_download_if_pending(media_item) {:error, changeset} -> changeset end end + + # Find the difference between the current time and the last time the source was indexed + defp calculate_job_offset_seconds(%Source{last_indexed_at: nil}), do: 0 + + defp calculate_job_offset_seconds(source) do + offset_seconds = DateTime.diff(DateTime.utc_now(), source.last_indexed_at, :second) + index_frequency_seconds = source.index_frequency_minutes * 60 + + max(0, index_frequency_seconds - offset_seconds) + end + + # The download archive file works in tandem with --break-on-existing to stop + # yt-dlp once we've hit media items we've already indexed. But we generate + # this list with a bit of an offset so we do intentionally re-scan some media + # items to pick up any recent changes (see `get_media_items_for_download_archive`). + # + # From there, we format the media IDs in the way that yt-dlp expects (ie: " ") + # and return the filepath to the caller. + defp create_download_archive_file(source) do + tmpfile = FilesystemUtils.generate_metadata_tmpfile(:txt) + + archive_contents = + source + |> get_media_items_for_download_archive() + |> Enum.map_join("\n", fn media_item -> "youtube #{media_item.media_id}" end) + + case File.write(tmpfile, archive_contents) do + :ok -> tmpfile + err -> err + end + end + + # Sorting by `uploaded_at` is important because we want to re-index the most recent + # media items first but there is no guarantee of any correlation between ID and uploaded_at. + # + # The offset is important because we want to re-index some media items that we've + # recently indexed to pick up on any changes. The limit is because we want this mechanism + # to work even if, for example, the video we were using as a stopping point was deleted. + # It's not a perfect system, but it should do well enough. + # + # The chosen limit and offset are arbitary, independent, and vibes-based. Feel free to + # tweak as-needed + defp get_media_items_for_download_archive(source) do + MediaQuery.new() + |> where(^MediaQuery.for_source(source)) + |> order_by(desc: :uploaded_at) + |> limit(50) + |> offset(20) + |> Repo.all() + end + + # The download archive isn't useful for playlists (since those are ordered arbitrarily) + # and we don't want to use it if the indexing was forced by the user. In other words, + # only create an archive for channels that are being indexed as part of their regular + # indexing schedule. The first indexing pass should also not create an archive. + defp build_download_archive_options(%Source{collection_type: :playlist}, _was_forced), do: [] + defp build_download_archive_options(%Source{last_indexed_at: nil}, _was_forced), do: [] + defp build_download_archive_options(_source, true), do: [] + + defp build_download_archive_options(source, _was_forced) do + archive_file = create_download_archive_file(source) + + [:break_on_existing, download_archive: archive_file] + end end diff --git a/lib/pinchflat/sources/source.ex b/lib/pinchflat/sources/source.ex index c2ef216..00b4776 100644 --- a/lib/pinchflat/sources/source.ex +++ b/lib/pinchflat/sources/source.ex @@ -7,12 +7,15 @@ defmodule Pinchflat.Sources.Source do import Ecto.Changeset import Pinchflat.Utils.ChangesetUtils + alias __MODULE__ + alias Pinchflat.Repo alias Pinchflat.Tasks.Task alias Pinchflat.Media.MediaItem alias Pinchflat.Profiles.MediaProfile alias Pinchflat.Metadata.SourceMetadata @allowed_fields ~w( + enabled collection_name collection_id collection_type @@ -25,6 +28,7 @@ defmodule Pinchflat.Sources.Source do series_directory index_frequency_minutes fast_index + cookie_behaviour download_media last_indexed_at original_url @@ -32,6 +36,10 @@ defmodule Pinchflat.Sources.Source do retention_period_days title_filter_regex media_profile_id + output_path_template_override + marked_for_deletion_at + min_duration_seconds + max_duration_seconds )a # Expensive API calls are made when a source is inserted/updated so @@ -57,6 +65,7 @@ defmodule Pinchflat.Sources.Source do )a schema "sources" do + field :enabled, :boolean, default: true # This is _not_ used as the primary key or internally in the database # relations. This is only used to prevent an enumeration attack on the streaming # and RSS feed endpoints since those _must_ be public (ie: no basic auth) @@ -69,6 +78,7 @@ defmodule Pinchflat.Sources.Source do field :collection_type, Ecto.Enum, values: [:channel, :playlist] field :index_frequency_minutes, :integer, default: 60 * 24 field :fast_index, :boolean, default: false + field :cookie_behaviour, Ecto.Enum, values: [:disabled, :when_needed, :all_operations], default: :disabled field :download_media, :boolean, default: true field :last_indexed_at, :utc_datetime # Only download media items that were published after this date @@ -76,6 +86,10 @@ defmodule Pinchflat.Sources.Source do field :retention_period_days, :integer field :original_url, :string field :title_filter_regex, :string + field :output_path_template_override, :string + + field :min_duration_seconds, :integer + field :max_duration_seconds, :integer field :series_directory, :string field :nfo_filepath, :string @@ -83,6 +97,8 @@ defmodule Pinchflat.Sources.Source do field :fanart_filepath, :string field :banner_filepath, :string + field :marked_for_deletion_at, :utc_datetime + belongs_to :media_profile, MediaProfile has_one :metadata, SourceMetadata, on_replace: :update @@ -108,7 +124,12 @@ defmodule Pinchflat.Sources.Source do |> dynamic_default(:custom_name, fn cs -> get_field(cs, :collection_name) end) |> dynamic_default(:uuid, fn _ -> Ecto.UUID.generate() end) |> validate_required(required_fields) + |> validate_title_regex() + |> validate_min_and_max_durations() |> validate_number(:retention_period_days, greater_than_or_equal_to: 0) + # Ensures it ends with `.{{ ext }}` or `.%(ext)s` or similar (with a little wiggle room) + |> validate_format(:output_path_template_override, MediaProfile.ext_regex(), message: "must end with .{{ ext }}") + |> validate_format(:original_url, youtube_channel_or_playlist_regex(), message: "must be a channel or playlist URL") |> cast_assoc(:metadata, with: &SourceMetadata.changeset/2, required: false) |> unique_constraint([:collection_id, :media_profile_id, :title_filter_regex], error_key: :original_url) end @@ -122,11 +143,52 @@ defmodule Pinchflat.Sources.Source do @doc false def fast_index_frequency do # minutes - 15 + 10 end @doc false def filepath_attributes do ~w(nfo_filepath fanart_filepath poster_filepath banner_filepath)a end + + @doc false + def json_exluded_fields do + ~w(__meta__ __struct__ metadata tasks media_items)a + end + + def youtube_channel_or_playlist_regex do + # Validate that the original URL is not a video URL + # Also matches if the string does NOT contain youtube.com or youtu.be. This preserves my tenuous support + # for non-youtube sources. + ~r<^(?:(?!youtube\.com/(watch|shorts|embed)|youtu\.be).)*$> + end + + defp validate_title_regex(%{changes: %{title_filter_regex: regex}} = changeset) when is_binary(regex) do + case Ecto.Adapters.SQL.query(Repo, "SELECT regexp_like('', ?)", [regex]) do + {:ok, _} -> changeset + _ -> add_error(changeset, :title_filter_regex, "is invalid") + end + end + + defp validate_title_regex(changeset), do: changeset + + defp validate_min_and_max_durations(changeset) do + min_duration = get_change(changeset, :min_duration_seconds) + max_duration = get_change(changeset, :max_duration_seconds) + + case {min_duration, max_duration} do + {min, max} when is_nil(min) or is_nil(max) -> changeset + {min, max} when min >= max -> add_error(changeset, :max_duration_seconds, "must be greater than minumum duration") + _ -> changeset + end + end + + defimpl Jason.Encoder, for: Source do + def encode(value, opts) do + value + |> Repo.preload(:media_profile) + |> Map.drop(Source.json_exluded_fields()) + |> Jason.Encode.map(opts) + end + end end diff --git a/lib/pinchflat/sources/source_deletion_worker.ex b/lib/pinchflat/sources/source_deletion_worker.ex new file mode 100644 index 0000000..9c36837 --- /dev/null +++ b/lib/pinchflat/sources/source_deletion_worker.ex @@ -0,0 +1,38 @@ +defmodule Pinchflat.Sources.SourceDeletionWorker do + @moduledoc false + + use Oban.Worker, + queue: :local_data, + tags: ["sources", "local_data"] + + require Logger + + alias __MODULE__ + alias Pinchflat.Sources + + @doc """ + Starts the source deletion worker. Does not attach it to a task like `kickoff_with_task/2` + since deletion also cancels all tasks for the source + + Returns {:ok, %Task{}} | {:error, %Ecto.Changeset{}} + """ + def kickoff(source, job_args \\ %{}, job_opts \\ []) do + %{id: source.id} + |> Map.merge(job_args) + |> SourceDeletionWorker.new(job_opts) + |> Oban.insert() + end + + @doc """ + Deletes a source and optionally deletes its files + + Returns :ok + """ + @impl Oban.Worker + def perform(%Oban.Job{args: %{"id" => source_id} = args}) do + delete_files = Map.get(args, "delete_files", false) + source = Sources.get_source!(source_id) + + Sources.delete_source(source, delete_files: delete_files) + end +end diff --git a/lib/pinchflat/sources/sources.ex b/lib/pinchflat/sources/sources.ex index a65d75b..edd37f1 100644 --- a/lib/pinchflat/sources/sources.ex +++ b/lib/pinchflat/sources/sources.ex @@ -4,21 +4,47 @@ defmodule Pinchflat.Sources do """ import Ecto.Query, warn: false - alias Pinchflat.Repo + use Pinchflat.Media.MediaQuery + alias Pinchflat.Repo alias Pinchflat.Media alias Pinchflat.Tasks alias Pinchflat.Sources.Source - alias Pinchflat.Media.MediaQuery alias Pinchflat.Profiles.MediaProfile alias Pinchflat.YtDlp.MediaCollection alias Pinchflat.Metadata.SourceMetadata - alias Pinchflat.Filesystem.FilesystemHelpers + alias Pinchflat.Utils.FilesystemUtils alias Pinchflat.Downloading.DownloadingHelpers - alias Pinchflat.FastIndexing.FastIndexingWorker alias Pinchflat.SlowIndexing.SlowIndexingHelpers + alias Pinchflat.FastIndexing.FastIndexingHelpers alias Pinchflat.Metadata.SourceMetadataStorageWorker + @doc """ + Returns the relevant output path template for a source. + Pulls from the source's override if present, otherwise uses the media profile's. + + Returns binary() + """ + def output_path_template(source) do + source = Repo.preload(source, :media_profile) + media_profile = source.media_profile + + source.output_path_template_override || media_profile.output_path_template + end + + @doc """ + Returns a boolean indicating whether or not cookies should be used for a given operation. + + Returns boolean() + """ + def use_cookies?(source, operation) when operation in [:indexing, :downloading, :metadata, :error_recovery] do + case source.cookie_behaviour do + :disabled -> false + :all_operations -> true + :when_needed -> operation in [:indexing, :error_recovery] + end + end + @doc """ Returns the list of sources. Returns [%Source{}, ...] """ @@ -114,7 +140,7 @@ defmodule Pinchflat.Sources do Tasks.delete_tasks_for(source) MediaQuery.new() - |> MediaQuery.for_source(source) + |> where(^MediaQuery.for_source(source)) |> Repo.all() |> Enum.each(fn media_item -> Media.delete_media_item(media_item, delete_files: delete_files) @@ -153,7 +179,7 @@ defmodule Pinchflat.Sources do Source.filepath_attributes() |> Enum.map(fn field -> mapped_struct[field] end) |> Enum.filter(&is_binary/1) - |> Enum.each(&FilesystemHelpers.delete_file_and_remove_empty_directories/1) + |> Enum.each(&FilesystemUtils.delete_file_and_remove_empty_directories/1) end defp delete_internal_metadata_files(source) do @@ -163,15 +189,26 @@ defmodule Pinchflat.Sources do SourceMetadata.filepath_attributes() |> Enum.map(fn field -> mapped_struct[field] end) |> Enum.filter(&is_binary/1) - |> Enum.each(&FilesystemHelpers.delete_file_and_remove_empty_directories/1) + |> Enum.each(&FilesystemUtils.delete_file_and_remove_empty_directories/1) end defp add_source_details_to_changeset(source, changeset) do - case MediaCollection.get_source_details(changeset.changes.original_url) do + original_url = changeset.changes.original_url + should_use_cookies = Ecto.Changeset.get_field(changeset, :cookie_behaviour) == :all_operations + # Skipping sleep interval since this is UI blocking and we want to keep this as fast as possible + addl_opts = [use_cookies: should_use_cookies, skip_sleep_interval: true] + + case MediaCollection.get_source_details(original_url, [], addl_opts) do {:ok, source_details} -> add_source_details_by_collection_type(source, changeset, source_details) - {:error, runner_error, _status_code} -> + err -> + runner_error = + case err do + {:error, error_msg, _status_code} -> error_msg + {:error, error_msg} -> error_msg + end + Ecto.Changeset.add_error( changeset, :original_url, @@ -224,7 +261,7 @@ defmodule Pinchflat.Sources do if run_post_commit_tasks do maybe_handle_media_tasks(changeset, source) maybe_run_indexing_task(changeset, source) - run_metadata_storage_task(source) + maybe_run_metadata_storage_task(changeset, source) end {:ok, source} @@ -234,19 +271,40 @@ defmodule Pinchflat.Sources do end end - # If the source is NOT new (ie: updated) and the download_media flag has changed, + # If the source is new (ie: not persisted), do nothing + defp maybe_handle_media_tasks(%{data: %{__meta__: %{state: state}}}, _source) when state != :loaded do + :ok + end + + # If the source is NOT new (ie: updated), # enqueue or dequeue media download tasks as necessary. defp maybe_handle_media_tasks(changeset, source) do - case {changeset.data, changeset.changes} do - {%{__meta__: %{state: :loaded}}, %{download_media: true}} -> + current_changes = changeset.changes + applied_changes = Ecto.Changeset.apply_changes(changeset) + + # We need both current_changes and applied_changes to determine + # the course of action to take. For example, we only care if a source is supposed + # to be `enabled` or not - we don't care if that information comes from the + # current changes or if that's how it already was in the database. + # Rephrased, we're essentially using it in place of `get_field/2` + case {current_changes, applied_changes} do + {%{download_media: true}, %{enabled: true}} -> DownloadingHelpers.enqueue_pending_download_tasks(source) - {%{__meta__: %{state: :loaded}}, %{download_media: false}} -> + {%{enabled: true}, %{download_media: true}} -> + DownloadingHelpers.enqueue_pending_download_tasks(source) + + {%{download_media: false}, _} -> + DownloadingHelpers.dequeue_pending_download_tasks(source) + + {%{enabled: false}, _} -> DownloadingHelpers.dequeue_pending_download_tasks(source) _ -> - :ok + nil end + + :ok end defp maybe_run_indexing_task(changeset, source) do @@ -255,6 +313,10 @@ defmodule Pinchflat.Sources do %{__meta__: %{state: :built}} -> SlowIndexingHelpers.kickoff_indexing_task(source) + if Ecto.Changeset.get_field(changeset, :fast_index) do + FastIndexingHelpers.kickoff_indexing_task(source) + end + # If the record has been persisted, only run indexing if the # indexing frequency has been changed and is now greater than 0 %{__meta__: %{state: :loaded}} -> @@ -263,20 +325,39 @@ defmodule Pinchflat.Sources do end end - # This runs every time to pick up any changes to the metadata - defp run_metadata_storage_task(source) do - SourceMetadataStorageWorker.kickoff_with_task(source) + defp maybe_run_metadata_storage_task(changeset, source) do + case {changeset.data, changeset.changes} do + # If the changeset is new (not persisted), fetch metadata no matter what + {%{__meta__: %{state: :built}}, _} -> + SourceMetadataStorageWorker.kickoff_with_task(source) + + # If the record has been persisted, only fetch metadata if the + # original_url has changed + {_, %{original_url: _}} -> + SourceMetadataStorageWorker.kickoff_with_task(source) + + _ -> + :ok + end end defp maybe_update_slow_indexing_task(changeset, source) do - case changeset.changes do - %{index_frequency_minutes: mins} when mins > 0 -> + # See comment in `maybe_handle_media_tasks` as to why we need these + current_changes = changeset.changes + applied_changes = Ecto.Changeset.apply_changes(changeset) + + case {current_changes, applied_changes} do + {%{index_frequency_minutes: mins}, %{enabled: true}} when mins > 0 -> SlowIndexingHelpers.kickoff_indexing_task(source) - %{index_frequency_minutes: _} -> - Tasks.delete_pending_tasks_for(source, "FastIndexingWorker") - Tasks.delete_pending_tasks_for(source, "MediaIndexingWorker") - Tasks.delete_pending_tasks_for(source, "MediaCollectionIndexingWorker") + {%{enabled: true}, %{index_frequency_minutes: mins}} when mins > 0 -> + SlowIndexingHelpers.kickoff_indexing_task(source) + + {%{index_frequency_minutes: _}, _} -> + SlowIndexingHelpers.delete_indexing_tasks(source, include_executing: true) + + {%{enabled: false}, _} -> + SlowIndexingHelpers.delete_indexing_tasks(source, include_executing: true) _ -> :ok @@ -284,13 +365,25 @@ defmodule Pinchflat.Sources do end defp maybe_update_fast_indexing_task(changeset, source) do - case changeset.changes do - %{fast_index: true} -> - Tasks.delete_pending_tasks_for(source, "FastIndexingWorker") - FastIndexingWorker.kickoff_with_task(source) + # See comment in `maybe_handle_media_tasks` as to why we need these + current_changes = changeset.changes + applied_changes = Ecto.Changeset.apply_changes(changeset) - %{fast_index: false} -> - Tasks.delete_pending_tasks_for(source, "FastIndexingWorker") + # This technically could be simplified since `maybe_update_slow_indexing_task` + # has some overlap re: deleting pending tasks, but I'm keeping it separate + # for clarity and explicitness. + case {current_changes, applied_changes} do + {%{fast_index: true}, %{enabled: true}} -> + FastIndexingHelpers.kickoff_indexing_task(source) + + {%{enabled: true}, %{fast_index: true}} -> + FastIndexingHelpers.kickoff_indexing_task(source) + + {%{fast_index: false}, _} -> + Tasks.delete_pending_tasks_for(source, "FastIndexingWorker", include_executing: true) + + {%{enabled: false}, _} -> + Tasks.delete_pending_tasks_for(source, "FastIndexingWorker", include_executing: true) _ -> :ok diff --git a/lib/pinchflat/sources/sources_query.ex b/lib/pinchflat/sources/sources_query.ex index 81bd449..850eef6 100644 --- a/lib/pinchflat/sources/sources_query.ex +++ b/lib/pinchflat/sources/sources_query.ex @@ -12,20 +12,26 @@ defmodule Pinchflat.Sources.SourcesQuery do alias Pinchflat.Sources.Source - # Prefixes: - # - for_* - belonging to a certain record - # - join_* - for joining on a certain record - # - with_* - for filtering based on full, concrete attributes - # - matching_* - for filtering based on partial attributes (e.g. LIKE, regex, full-text search) - # - # Suffixes: - # - _for - the arg passed is an association record + # This allows the module to be aliased and query methods to be used + # all in one go + # usage: use Pinchflat.Sources.SourcesQuery + defmacro __using__(_opts) do + quote do + import Ecto.Query, warn: false + + alias unquote(__MODULE__) + end + end def new do Source end - def for_media_profile(query, media_profile) do - where(query, [s], s.media_profile_id == ^media_profile.id) + def for_media_profile(media_profile_id) when is_integer(media_profile_id) do + dynamic([s], s.media_profile_id == ^media_profile_id) + end + + def for_media_profile(media_profile) do + dynamic([s], s.media_profile_id == ^media_profile.id) end end diff --git a/lib/pinchflat/tasks/tasks.ex b/lib/pinchflat/tasks/tasks.ex index 7b94e3c..2dfef0a 100644 --- a/lib/pinchflat/tasks/tasks.ex +++ b/lib/pinchflat/tasks/tasks.ex @@ -53,20 +53,6 @@ defmodule Pinchflat.Tasks do ) end - @doc """ - Returns the list of pending tasks for a given record type and ID. Optionally allows you to specify - which worker to include. - - Returns [%Task{}, ...] - """ - def list_pending_tasks_for(record, worker_name \\ nil) do - list_tasks_for( - record, - worker_name, - [:available, :scheduled, :retryable] - ) - end - @doc """ Gets a single task. @@ -127,13 +113,13 @@ defmodule Pinchflat.Tasks do @doc """ Deletes all tasks attached to a given record, cancelling any attached jobs. - Optionally allows you to specify which worker to include. + Optionally allows you to specify which worker and job states to include. Returns :ok """ - def delete_tasks_for(record, worker_name \\ nil) do + def delete_tasks_for(record, worker_name \\ nil, job_states \\ Oban.Job.states()) do record - |> list_tasks_for(worker_name) + |> list_tasks_for(worker_name, job_states) |> Enum.each(&delete_task/1) end @@ -143,10 +129,12 @@ defmodule Pinchflat.Tasks do Returns :ok """ - def delete_pending_tasks_for(record, worker_name \\ nil) do - record - |> list_pending_tasks_for(worker_name) - |> Enum.each(&delete_task/1) + def delete_pending_tasks_for(record, worker_name \\ nil, opts \\ []) do + include_executing = Keyword.get(opts, :include_executing, false) + base_job_states = [:available, :scheduled, :retryable] + job_states = if include_executing, do: base_job_states ++ [:executing], else: base_job_states + + delete_tasks_for(record, worker_name, job_states) end @doc """ diff --git a/lib/pinchflat/tasks/tasks_query.ex b/lib/pinchflat/tasks/tasks_query.ex new file mode 100644 index 0000000..64d35fb --- /dev/null +++ b/lib/pinchflat/tasks/tasks_query.ex @@ -0,0 +1,45 @@ +defmodule Pinchflat.Tasks.TasksQuery do + @moduledoc """ + Query helpers for the Tasks context. + + These methods are made to be one-ish liners used + to compose queries. Each method should strive to do + _one_ thing. These don't need to be tested as + they are just building blocks for other functionality + which, itself, will be tested. + """ + import Ecto.Query, warn: false + + alias Pinchflat.Tasks.Task + + # This allows the module to be aliased and query methods to be used + # all in one go + # usage: use Pinchflat.Tasks.TasksQuery + defmacro __using__(_opts) do + quote do + import Ecto.Query, warn: false + + alias unquote(__MODULE__) + end + end + + def new do + Task + end + + def join_job(query) do + query + |> join(:left, [t], j in assoc(t, :job)) + |> preload([t, j], job: j) + end + + def in_state(states) when is_list(states) do + dynamic([t, j], j.state in ^states) + end + + def in_state(state), do: in_state([state]) + + def has_tag(tag) do + dynamic([t, j], ^tag in j.tags) + end +end diff --git a/lib/pinchflat/utils/cli_utils.ex b/lib/pinchflat/utils/cli_utils.ex new file mode 100644 index 0000000..2a82423 --- /dev/null +++ b/lib/pinchflat/utils/cli_utils.ex @@ -0,0 +1,94 @@ +defmodule Pinchflat.Utils.CliUtils do + @moduledoc """ + Utility methods for working with CLI executables + """ + + require Logger + + alias Pinchflat.Utils.StringUtils + + @doc """ + Wraps a command in a shell script that will terminate + the command if stdin is closed. Useful for stopping + commands if the job runner is cancelled. + + Delegates to `System.cmd/3` and any options/output + are passed through. Custom options can be passed in. + + Custom options: + - logging_arg_override: if set, the passed value will be logged in place of + the actual arguments passed to the command + + Returns {binary(), integer()} + """ + def wrap_cmd(command, args, passthrough_opts \\ [], opts \\ []) do + wrapper_command = Path.join(:code.priv_dir(:pinchflat), "cmd_wrapper.sh") + actual_command = [command] ++ args + command_opts = set_command_opts() ++ passthrough_opts + logging_arg_override = Keyword.get(opts, :logging_arg_override, Enum.join(args, " ")) + + Logger.info("[command_wrapper]: #{command} called with: #{logging_arg_override}") + + {output, status} = System.cmd(wrapper_command, actual_command, command_opts) + log_cmd_result(command, logging_arg_override, status, output) + + {output, status} + end + + @doc """ + Parses a list of command options into a list of strings suitable for passing to + `System.cmd/3`. + + We want to satisfy the following behaviours: + 1. If the key is an atom, convert it to a string and convert it to kebab case (for convenience) + 2. If the key is a string, assume we want it as-is and don't convert it + 3. If the key is accompanied by a value, append the value to the list + 4. If the key is not accompanied by a value, assume it's a flag and PREpend it to the list + + Returns [binary()] + """ + def parse_options(command_opts) do + command_opts + |> List.wrap() + |> Enum.reduce([], &parse_option/2) + end + + defp parse_option({k, v}, acc) when is_atom(k) do + stringified_key = StringUtils.to_kebab_case(Atom.to_string(k)) + + parse_option({"--#{stringified_key}", v}, acc) + end + + defp parse_option({k, v}, acc) when is_binary(k) do + acc ++ [k, to_string(v)] + end + + defp parse_option(arg, acc) when is_atom(arg) do + stringified_arg = + arg + |> Atom.to_string() + |> StringUtils.to_kebab_case() + + parse_option("--#{stringified_arg}", acc) + end + + defp parse_option(arg, acc) when is_binary(arg) do + acc ++ [arg] + end + + defp log_cmd_result(command, logging_arg_override, status, output) do + log_message = "[command_wrapper]: #{command} called with: #{logging_arg_override} exited: #{status} with: #{output}" + log_level = if status == 0, do: :debug, else: :error + + Logger.log(log_level, log_message) + end + + defp set_command_opts do + # This resolves an issue where yt-dlp would attempt to write to a read-only directory + # if you scanned a new video with `--windows-filenames` enabled. Hopefully can be removed + # in the future. + [ + cd: Application.get_env(:pinchflat, :tmpfile_directory) + ] + end +end diff --git a/lib/pinchflat/utils/datetime_utils.ex b/lib/pinchflat/utils/datetime_utils.ex deleted file mode 100644 index 7fce7c6..0000000 --- a/lib/pinchflat/utils/datetime_utils.ex +++ /dev/null @@ -1,17 +0,0 @@ -defmodule Pinchflat.Utils.DatetimeUtils do - @moduledoc """ - Utility methods for working with dates and datetimes - """ - - @doc """ - Converts a Date to a DateTime - - Returns %DateTime{} - """ - def date_to_datetime(date) do - date - |> Date.to_gregorian_days() - |> Kernel.*(86_400) - |> DateTime.from_gregorian_seconds() - end -end diff --git a/lib/pinchflat/filesystem/filesystem_helpers.ex b/lib/pinchflat/utils/filesystem_utils.ex similarity index 64% rename from lib/pinchflat/filesystem/filesystem_helpers.ex rename to lib/pinchflat/utils/filesystem_utils.ex index 349c4b7..e7acb86 100644 --- a/lib/pinchflat/filesystem/filesystem_helpers.ex +++ b/lib/pinchflat/utils/filesystem_utils.ex @@ -1,10 +1,43 @@ -defmodule Pinchflat.Filesystem.FilesystemHelpers do +defmodule Pinchflat.Utils.FilesystemUtils do @moduledoc """ Utility methods for working with the filesystem """ alias Pinchflat.Media alias Pinchflat.Utils.StringUtils + @doc """ + Checks if a file exists and has non-whitespace contents. + + Returns boolean() + """ + def exists_and_nonempty?(filepath) do + case File.read(filepath) do + {:ok, contents} -> + String.trim(contents) != "" + + _ -> + false + end + end + + @doc """ + Checks if two filepaths reference the same file. + + Useful if you have a relative and absolute filepath and want to be sure they're the same file. + Also works with symlinks. + + Returns boolean() + """ + def filepaths_reference_same_file?(filepath_1, filepath_2) do + {:ok, stat_1} = File.stat(filepath_1) + {:ok, stat_2} = File.stat(filepath_2) + + identifier_1 = "#{stat_1.major_device}:#{stat_1.minor_device}:#{stat_1.inode}" + identifier_2 = "#{stat_2.major_device}:#{stat_2.minor_device}:#{stat_2.inode}" + + identifier_1 == identifier_2 + end + @doc """ Generates a temporary file and returns its path. The file is empty and has the given type. Generates all the directories in the path if they don't exist. @@ -12,8 +45,20 @@ defmodule Pinchflat.Filesystem.FilesystemHelpers do Returns binary() """ def generate_metadata_tmpfile(type) do + filename = StringUtils.random_string(64) + # This "namespacing" is more to help with development since things get + # weird in my editor when there are thousands of files in a single directory + first_two = String.slice(filename, 0..1) + second_two = String.slice(filename, 2..3) tmpfile_directory = Application.get_env(:pinchflat, :tmpfile_directory) - filepath = Path.join([tmpfile_directory, "#{StringUtils.random_string(64)}.#{type}"]) + + filepath = + Path.join([ + tmpfile_directory, + first_two, + second_two, + "#{filename}.#{type}" + ]) :ok = write_p!(filepath, "") diff --git a/lib/pinchflat/utils/map_utils.ex b/lib/pinchflat/utils/map_utils.ex new file mode 100644 index 0000000..41f03a1 --- /dev/null +++ b/lib/pinchflat/utils/map_utils.ex @@ -0,0 +1,17 @@ +defmodule Pinchflat.Utils.MapUtils do + @moduledoc """ + Utility methods for working with maps + """ + + @doc """ + Converts a nested list of 2-element tuples or lists into a map. + + Returns map() + """ + def from_nested_list(list) do + Enum.reduce(list, %{}, fn + [key, value], acc -> Map.put(acc, key, value) + {key, value}, acc -> Map.put(acc, key, value) + end) + end +end diff --git a/lib/pinchflat/utils/number_utils.ex b/lib/pinchflat/utils/number_utils.ex new file mode 100644 index 0000000..d86002b --- /dev/null +++ b/lib/pinchflat/utils/number_utils.ex @@ -0,0 +1,53 @@ +defmodule Pinchflat.Utils.NumberUtils do + @moduledoc """ + Utility methods for working with numbers + """ + + @doc """ + Clamps a number between a minimum and maximum value + + Returns integer() | float() + """ + def clamp(num, minimum, maximum) do + num + |> max(minimum) + |> min(maximum) + end + + @doc """ + Converts a number to a human readable byte size. Can take a precision + option to specify the number of decimal places to round to. + + Returns {integer(), String.t()} + """ + def human_byte_size(number, opts \\ []) + def human_byte_size(nil, opts), do: human_byte_size(0, opts) + + def human_byte_size(number, opts) do + precision = Keyword.get(opts, :precision, 2) + suffixes = ["B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"] + base = 1024 + + Enum.reduce_while(suffixes, {number / 1.0, "B"}, fn suffix, {value, _} -> + if value < base do + {:halt, {Float.round(value, precision), suffix}} + else + {:cont, {value / base, suffix}} + end + end) + end + + @doc """ + Adds jitter to a number based on a percentage. Returns 0 if the number is less than or equal to 0. + + Returns integer() + """ + def add_jitter(num, jitter_percentage \\ 0.5) + def add_jitter(num, _jitter_percentage) when num <= 0, do: 0 + + def add_jitter(num, jitter_percentage) do + jitter = :rand.uniform(round(num * jitter_percentage)) + + round(num + jitter) + end +end diff --git a/lib/pinchflat/utils/string_utils.ex b/lib/pinchflat/utils/string_utils.ex index d96d6c1..66efc5e 100644 --- a/lib/pinchflat/utils/string_utils.ex +++ b/lib/pinchflat/utils/string_utils.ex @@ -26,19 +26,22 @@ defmodule Pinchflat.Utils.StringUtils do end @doc """ - Truncates a string to the given length and adds `...` if the string is longer than the given length. - Will break on a word boundary. Nothing happens if the string is shorter than the given length. + Wraps a string in double braces. Useful as a UI helper now that + LiveView 1.0.0 allows `{}` for interpolation so now we can't use braces + directly in the view. Returns binary() """ - def truncate(string, length) do - if String.length(string) > length do - string - |> String.slice(0..(length - 1)) - |> String.replace(~r/\s+\S*$/, "") - |> Kernel.<>("...") - else - string - end + def double_brace(string) do + "{{ #{string} }}" end + + @doc """ + Wraps a string in quotes if it's not already a string. Useful for working with + error messages whose types can vary. + + Returns binary() + """ + def wrap_string(message) when is_binary(message), do: message + def wrap_string(message), do: "#{inspect(message)}" end diff --git a/lib/pinchflat/yt_dlp/backend_command_runner.ex b/lib/pinchflat/yt_dlp/backend_command_runner.ex deleted file mode 100644 index e09eaeb..0000000 --- a/lib/pinchflat/yt_dlp/backend_command_runner.ex +++ /dev/null @@ -1,12 +0,0 @@ -defmodule Pinchflat.YtDlp.BackendCommandRunner do - @moduledoc """ - A behaviour for running CLI commands against a downloader backend (yt-dlp). - - Used so we can implement Mox for testing without actually running the - yt-dlp command. - """ - - @callback run(binary(), keyword(), binary()) :: {:ok, binary()} | {:error, binary(), integer()} - @callback run(binary(), keyword(), binary(), keyword()) :: {:ok, binary()} | {:error, binary(), integer()} - @callback version() :: {:ok, binary()} | {:error, binary()} -end diff --git a/lib/pinchflat/yt_dlp/command_runner.ex b/lib/pinchflat/yt_dlp/command_runner.ex index 537dbd6..f574d30 100644 --- a/lib/pinchflat/yt_dlp/command_runner.ex +++ b/lib/pinchflat/yt_dlp/command_runner.ex @@ -5,11 +5,13 @@ defmodule Pinchflat.YtDlp.CommandRunner do require Logger - alias Pinchflat.Utils.StringUtils - alias Pinchflat.Filesystem.FilesystemHelpers, as: FSUtils - alias Pinchflat.YtDlp.BackendCommandRunner + alias Pinchflat.Settings + alias Pinchflat.Utils.CliUtils + alias Pinchflat.Utils.NumberUtils + alias Pinchflat.YtDlp.YtDlpCommandRunner + alias Pinchflat.Utils.FilesystemUtils, as: FSUtils - @behaviour BackendCommandRunner + @behaviour YtDlpCommandRunner @doc """ Runs a yt-dlp command and returns the string output. Saves the output to @@ -20,24 +22,32 @@ defmodule Pinchflat.YtDlp.CommandRunner do - :output_filepath - the path to save the output to. If not provided, a temporary file will be created and used. Useful for if you need a reference to the file for a file watcher. + - :use_cookies - if true, will add a cookie file to the command options. Will not + attach a cookie file if the user hasn't set one up. + - :skip_sleep_interval - if true, will not add the sleep interval options to the command. + Usually only used for commands that would be UI-blocking Returns {:ok, binary()} | {:error, output, status}. """ - @impl BackendCommandRunner - def run(url, command_opts, output_template, addl_opts \\ []) do - # This approach lets us mock the command for testing - command = backend_executable() - # These must stay in exactly this order, hence why I'm giving it its own variable. - # Also, can't use RAM file since yt-dlp needs a concrete filepath. + @impl YtDlpCommandRunner + def run(url, action_name, command_opts, output_template, addl_opts \\ []) do + Logger.debug("Running yt-dlp command for action: #{action_name}") + output_filepath = generate_output_filepath(addl_opts) print_to_file_opts = [{:print_to_file, output_template}, output_filepath] - cookie_opts = build_cookie_options() - formatted_command_opts = [url] ++ parse_options(command_opts ++ print_to_file_opts ++ cookie_opts) + user_configured_opts = cookie_file_options(addl_opts) ++ rate_limit_options(addl_opts) ++ misc_options() + # These must stay in exactly this order, hence why I'm giving it its own variable. + all_opts = command_opts ++ print_to_file_opts ++ user_configured_opts ++ global_options() + formatted_command_opts = [url] ++ CliUtils.parse_options(all_opts) - Logger.info("[yt-dlp] called with: #{Enum.join(formatted_command_opts, " ")}") - - case System.cmd(command, formatted_command_opts, stderr_to_stdout: true) do - {_, 0} -> + case CliUtils.wrap_cmd(backend_executable(), formatted_command_opts, stderr_to_stdout: true) do + # yt-dlp exit codes: + # 0 = Everything is successful + # 100 = yt-dlp must restart for update to complete + # 101 = Download cancelled by --max-downloads etc + # 2 = Error in user-provided options + # 1 = Any other error + {_, status} when status in [0, 101] -> # IDEA: consider deleting the file after reading it. It's in the tmp dir, so it's not # a huge deal, but it's still a good idea to clean up after ourselves. # (even on error? especially on error?) @@ -48,11 +58,34 @@ defmodule Pinchflat.YtDlp.CommandRunner do end end - @impl BackendCommandRunner + @doc """ + Returns the version of yt-dlp as a string + + Returns {:ok, binary()} | {:error, binary()} + """ + @impl YtDlpCommandRunner def version do command = backend_executable() - case System.cmd(command, ["--version"]) do + case CliUtils.wrap_cmd(command, ["--version"]) do + {output, 0} -> + {:ok, String.trim(output)} + + {output, _} -> + {:error, output} + end + end + + @doc """ + Updates yt-dlp to the latest version + + Returns {:ok, binary()} | {:error, binary()} + """ + @impl YtDlpCommandRunner + def update do + command = backend_executable() + + case CliUtils.wrap_cmd(command, ["--update"]) do {output, 0} -> {:ok, String.trim(output)} @@ -68,47 +101,60 @@ defmodule Pinchflat.YtDlp.CommandRunner do end end - defp build_cookie_options do - base_dir = Application.get_env(:pinchflat, :extras_directory) - cookie_file = Path.join(base_dir, "cookies.txt") + defp global_options do + [ + :windows_filenames, + :quiet, + cache_dir: Path.join(Application.get_env(:pinchflat, :tmpfile_directory), "yt-dlp-cache") + ] + end - case File.read(cookie_file) do - {:ok, cookie_data} -> - if String.trim(cookie_data) != "", do: [cookies: cookie_file], else: [] - - {:error, _} -> - [] + defp cookie_file_options(addl_opts) do + case Keyword.get(addl_opts, :use_cookies) do + true -> add_cookie_file() + _ -> [] end end - # We want to satisfy the following behaviours: - # - # 1. If the key is an atom, convert it to a string and convert it to kebab case (for convenience) - # 2. If the key is a string, assume we want it as-is and don't convert it - # 3. If the key is accompanied by a value, append the value to the list - # 4. If the key is not accompanied by a value, assume it's a flag and PREpend it to the list - defp parse_options(command_opts) do - Enum.reduce(command_opts, [], &parse_option/2) + defp add_cookie_file do + base_dir = Application.get_env(:pinchflat, :extras_directory) + filename_options_map = %{cookies: "cookies.txt"} + + Enum.reduce(filename_options_map, [], fn {opt_name, filename}, acc -> + filepath = Path.join(base_dir, filename) + + if FSUtils.exists_and_nonempty?(filepath) do + [{opt_name, filepath} | acc] + else + acc + end + end) end - defp parse_option({k, v}, acc) when is_atom(k) do - stringified_key = StringUtils.to_kebab_case(Atom.to_string(k)) + defp rate_limit_options(addl_opts) do + throughput_limit = Settings.get!(:download_throughput_limit) + sleep_interval_opts = sleep_interval_opts(addl_opts) + throughput_option = if throughput_limit, do: [limit_rate: throughput_limit], else: [] - parse_option({"--#{stringified_key}", v}, acc) + throughput_option ++ sleep_interval_opts end - defp parse_option({k, v}, acc) when is_binary(k) do - acc ++ [k, to_string(v)] + defp sleep_interval_opts(addl_opts) do + sleep_interval = Settings.get!(:extractor_sleep_interval_seconds) + + if sleep_interval <= 0 || Keyword.get(addl_opts, :skip_sleep_interval) do + [] + else + [ + sleep_requests: NumberUtils.add_jitter(sleep_interval), + sleep_interval: NumberUtils.add_jitter(sleep_interval), + sleep_subtitles: NumberUtils.add_jitter(sleep_interval) + ] + end end - defp parse_option(arg, acc) when is_atom(arg) do - stringified_arg = StringUtils.to_kebab_case(Atom.to_string(arg)) - - parse_option("--#{stringified_arg}", acc) - end - - defp parse_option(arg, acc) when is_binary(arg) do - acc ++ [arg] + defp misc_options do + if Settings.get!(:restrict_filenames), do: [:restrict_filenames], else: [] end defp backend_executable do diff --git a/lib/pinchflat/yt_dlp/media.ex b/lib/pinchflat/yt_dlp/media.ex index c6b03fa..9abf8e5 100644 --- a/lib/pinchflat/yt_dlp/media.ex +++ b/lib/pinchflat/yt_dlp/media.ex @@ -10,8 +10,9 @@ defmodule Pinchflat.YtDlp.Media do :original_url, :livestream, :short_form_content, - :upload_date, - :duration_seconds + :uploaded_at, + :duration_seconds, + :predicted_media_filepath ] defstruct [ @@ -21,8 +22,10 @@ defmodule Pinchflat.YtDlp.Media do :original_url, :livestream, :short_form_content, - :upload_date, - :duration_seconds + :uploaded_at, + :duration_seconds, + :playlist_index, + :predicted_media_filepath ] alias __MODULE__ @@ -36,9 +39,9 @@ defmodule Pinchflat.YtDlp.Media do Returns {:ok, map()} | {:error, any, ...}. """ def download(url, command_opts \\ [], addl_opts \\ []) do - opts = [:no_simulate] ++ command_opts + all_command_opts = [:no_simulate] ++ command_opts - with {:ok, output} <- backend_runner().run(url, opts, "after_move:%()j", addl_opts), + with {:ok, output} <- backend_runner().run(url, :download, all_command_opts, "after_move:%()j", addl_opts), {:ok, parsed_json} <- Phoenix.json_library().decode(output) do {:ok, parsed_json} else @@ -47,32 +50,72 @@ defmodule Pinchflat.YtDlp.Media do end @doc """ - Returns a map representing the media at the given URL. + Determines if the media at the given URL is ready to be downloaded. + Common examples of non-downloadable media are upcoming or in-progress live streams. - Returns {:ok, [map()]} | {:error, any, ...}. + Returns {:ok, :downloadable | :ignorable} | {:error, any} """ - def get_media_attributes(url) do - runner = Application.get_env(:pinchflat, :yt_dlp_runner) + def get_downloadable_status(url, addl_opts \\ []) do + action = :get_downloadable_status command_opts = [:simulate, :skip_download] + + case backend_runner().run(url, action, command_opts, "%(.{live_status})j", addl_opts) do + {:ok, output} -> + output + |> Phoenix.json_library().decode!() + |> parse_downloadable_status() + + err -> + err + end + end + + @doc """ + Downloads a thumbnail for a single piece of media. Usually used for + downloading thumbnails for internal use + + Returns {:ok, ""} | {:error, any, ...}. + """ + def download_thumbnail(url, command_opts \\ [], addl_opts \\ []) do + all_command_opts = [:no_simulate, :skip_download, :write_thumbnail, convert_thumbnail: "jpg"] ++ command_opts + + # NOTE: it doesn't seem like this command actually returns anything in `after_move` since + # we aren't downloading the main media file + backend_runner().run(url, :download_thumbnail, all_command_opts, "after_move:%()j", addl_opts) + end + + @doc """ + Returns a map representing the media at the given URL. + Optionally takes a list of additional command options to pass to yt-dlp + or configuration-related options to pass to the runner. + + Returns {:ok, %Media{}} | {:error, any, ...}. + """ + def get_media_attributes(url, command_opts \\ [], addl_opts \\ []) do + all_command_opts = [:simulate, :skip_download] ++ command_opts output_template = indexing_output_template() - case runner.run(url, command_opts, output_template) do + case backend_runner().run(url, :get_media_attributes, all_command_opts, output_template, addl_opts) do {:ok, output} -> output |> Phoenix.json_library().decode!() |> response_to_struct() |> FunctionUtils.wrap_ok() - res -> - res + err -> + err end end @doc """ Returns the output template for yt-dlp's indexing command. + + NOTE: playlist_index is really only useful for playlists that will never change their order. + NOTE: I've switched back to `original_url` (from `webpage_url`) since it's started indicating + if something is a short via the URL again """ def indexing_output_template do - "%(.{id,title,was_live,webpage_url,description,aspect_ratio,duration,upload_date})j" + "%(.{id,title,live_status,original_url,description,aspect_ratio,duration,upload_date,timestamp,playlist_index,filename})j" end @doc """ @@ -86,16 +129,18 @@ defmodule Pinchflat.YtDlp.Media do media_id: response["id"], title: response["title"], description: response["description"], - original_url: response["webpage_url"], - livestream: response["was_live"], + original_url: response["original_url"], + livestream: !!response["live_status"] && response["live_status"] != "not_live", duration_seconds: response["duration"] && round(response["duration"]), - short_form_content: response["webpage_url"] && short_form_content?(response), - upload_date: response["upload_date"] && MetadataFileHelpers.parse_upload_date(response["upload_date"]) + short_form_content: response["original_url"] && short_form_content?(response), + uploaded_at: response["upload_date"] && parse_uploaded_at(response), + playlist_index: response["playlist_index"] || 0, + predicted_media_filepath: response["filename"] } end defp short_form_content?(response) do - if String.contains?(response["webpage_url"], "/shorts/") do + if String.contains?(response["original_url"], "/shorts/") do true else # Sometimes shorts are returned without /shorts/ in the URL, @@ -106,7 +151,29 @@ defmodule Pinchflat.YtDlp.Media do # # These don't fail if duration or aspect_ratio are missing # due to Elixir's comparison semantics - response["duration"] <= 60 && response["aspect_ratio"] < 0.8 + response["duration"] <= 180 && response["aspect_ratio"] <= 0.85 + end + end + + defp parse_uploaded_at(%{"timestamp" => ts} = response) when is_number(ts) do + case DateTime.from_unix(ts) do + {:ok, datetime} -> datetime + _ -> MetadataFileHelpers.parse_upload_date(response["upload_date"]) + end + end + + # This field is needed before inserting into the database, but absence + # of this field should fail at insert-time rather than here + defp parse_uploaded_at(%{"upload_date" => nil}), do: nil + defp parse_uploaded_at(response), do: MetadataFileHelpers.parse_upload_date(response["upload_date"]) + + defp parse_downloadable_status(response) do + case response["live_status"] do + status when status in ["is_live", "is_upcoming", "post_live"] -> {:ok, :ignorable} + status when status in ["was_live", "not_live"] -> {:ok, :downloadable} + # This preserves my tenuous support for non-youtube sources. + nil -> {:ok, :downloadable} + _ -> {:error, "Unknown live status: #{response["live_status"]}"} end end diff --git a/lib/pinchflat/yt_dlp/media_collection.ex b/lib/pinchflat/yt_dlp/media_collection.ex index 249a703..aa4abb3 100644 --- a/lib/pinchflat/yt_dlp/media_collection.ex +++ b/lib/pinchflat/yt_dlp/media_collection.ex @@ -6,34 +6,39 @@ defmodule Pinchflat.YtDlp.MediaCollection do require Logger - alias Pinchflat.Filesystem.FilesystemHelpers + alias Pinchflat.Utils.FilesystemUtils alias Pinchflat.YtDlp.Media, as: YtDlpMedia @doc """ Returns a list of maps representing the media in the collection. + Optionally takes a list of additional command options to pass to yt-dlp + or configuration-related options to pass to the runner. - Options: + Runner Options: - :file_listener_handler - a function that will be called with the path to the file that will be written to when yt-dlp is done. This is useful for setting up a file watcher to know when the file is ready to be read. + - :use_cookies - whether or not to use user-provided cookies when fetching the media details Returns {:ok, [map()]} | {:error, any, ...}. """ - def get_media_attributes_for_collection(url, addl_opts \\ []) do - runner = Application.get_env(:pinchflat, :yt_dlp_runner) + def get_media_attributes_for_collection(url, command_opts \\ [], addl_opts \\ []) do # `ignore_no_formats_error` is necessary because yt-dlp will error out if # the first video has not released yet (ie: is a premier). We don't care about # available formats since we're just getting the media details - command_opts = [:simulate, :skip_download, :ignore_no_formats_error] + all_command_opts = [:simulate, :skip_download, :ignore_no_formats_error, :no_warnings] ++ command_opts + use_cookies = Keyword.get(addl_opts, :use_cookies, false) output_template = YtDlpMedia.indexing_output_template() - output_filepath = FilesystemHelpers.generate_metadata_tmpfile(:json) + output_filepath = FilesystemUtils.generate_metadata_tmpfile(:json) file_listener_handler = Keyword.get(addl_opts, :file_listener_handler, false) + runner_opts = [output_filepath: output_filepath, use_cookies: use_cookies] + action = :get_media_attributes_for_collection if file_listener_handler do file_listener_handler.(output_filepath) end - case runner.run(url, command_opts, output_template, output_filepath: output_filepath) do + case backend_runner().run(url, action, all_command_opts, output_template, runner_opts) do {:ok, output} -> parsed_lines = output @@ -50,8 +55,8 @@ defmodule Pinchflat.YtDlp.MediaCollection do {:ok, Enum.filter(parsed_lines, &(&1 != nil))} - res -> - res + err -> + err end end @@ -64,17 +69,26 @@ defmodule Pinchflat.YtDlp.MediaCollection do Returns {:ok, map()} | {:error, any, ...}. """ - def get_source_details(source_url, addl_opts \\ []) do + def get_source_details(source_url, command_opts \\ [], addl_opts \\ []) do # `ignore_no_formats_error` is necessary because yt-dlp will error out if # the first video has not released yet (ie: is a premier). We don't care about # available formats since we're just getting the source details - command_opts = [:simulate, :skip_download, :ignore_no_formats_error, playlist_end: 1] ++ addl_opts - output_template = "%(.{channel,channel_id,playlist_id,playlist_title,filename})j" + default_opts = [ + :simulate, + :skip_download, + :ignore_no_formats_error, + playlist_end: 1 + ] - with {:ok, output} <- backend_runner().run(source_url, command_opts, output_template), + all_command_opts = default_opts ++ command_opts + output_template = "%(.{channel,channel_id,playlist_id,playlist_title,filename})j" + action = :get_source_details + + with {:ok, output} <- backend_runner().run(source_url, action, all_command_opts, output_template, addl_opts), {:ok, parsed_json} <- Phoenix.json_library().decode(output) do {:ok, format_source_details(parsed_json)} else + {:error, %Jason.DecodeError{}} -> {:error, "Error decoding JSON response"} err -> err end end @@ -92,13 +106,25 @@ defmodule Pinchflat.YtDlp.MediaCollection do as a compressed blob for possible future use. That's why it's not getting formatted like `get_source_details/1` + ! IMPORTANT ! - you'll always want to set `playlist_items: int` in `addl_opts. + This is great if you want to also return details about the videos in the playlists, + but it should be set in all cases to not over-fetch data. + For channels you should usually set this to 0 since channels return all the + metadata we need without needing to fetch the videos. On the other hand, playlists + don't return very useful images so you can set this to 1 to get the first video's + images, for instance. + Returns {:ok, map()} | {:error, any, ...}. """ - def get_source_metadata(source_url, addl_opts \\ []) do - opts = [playlist_items: 0] ++ addl_opts - output_template = "playlist:%()j" + def get_source_metadata(source_url, command_opts, addl_opts \\ []) do + # This only validates that the `playlist_items` key is present. It's otherwise unused + _playlist_items = Keyword.fetch!(command_opts, :playlist_items) - with {:ok, output} <- backend_runner().run(source_url, opts, output_template), + all_command_opts = [:skip_download] ++ command_opts + output_template = "playlist:%()j" + action = :get_source_metadata + + with {:ok, output} <- backend_runner().run(source_url, action, all_command_opts, output_template, addl_opts), {:ok, parsed_json} <- Phoenix.json_library().decode(output) do {:ok, parsed_json} else diff --git a/lib/pinchflat/yt_dlp/update_worker.ex b/lib/pinchflat/yt_dlp/update_worker.ex new file mode 100644 index 0000000..2d9b43f --- /dev/null +++ b/lib/pinchflat/yt_dlp/update_worker.ex @@ -0,0 +1,44 @@ +defmodule Pinchflat.YtDlp.UpdateWorker do + @moduledoc false + + use Oban.Worker, + queue: :local_data, + tags: ["local_data"] + + require Logger + + alias __MODULE__ + alias Pinchflat.Settings + + @doc """ + Starts the yt-dlp update worker. Does not attach it to a task like `kickoff_with_task/2` + + Returns {:ok, %Oban.Job{}} | {:error, %Ecto.Changeset{}} + """ + def kickoff do + Oban.insert(UpdateWorker.new(%{})) + end + + @doc """ + Updates yt-dlp and saves the version to the settings. + + This worker is scheduled to run via the Oban Cron plugin as well as on app boot. + + Returns :ok + """ + @impl Oban.Worker + def perform(%Oban.Job{}) do + Logger.info("Updating yt-dlp") + + yt_dlp_runner().update() + + {:ok, yt_dlp_version} = yt_dlp_runner().version() + Settings.set(yt_dlp_version: yt_dlp_version) + + :ok + end + + defp yt_dlp_runner do + Application.get_env(:pinchflat, :yt_dlp_runner) + end +end diff --git a/lib/pinchflat/yt_dlp/yt_dlp_command_runner.ex b/lib/pinchflat/yt_dlp/yt_dlp_command_runner.ex new file mode 100644 index 0000000..e5c770e --- /dev/null +++ b/lib/pinchflat/yt_dlp/yt_dlp_command_runner.ex @@ -0,0 +1,13 @@ +defmodule Pinchflat.YtDlp.YtDlpCommandRunner do + @moduledoc """ + A behaviour for running CLI commands against a downloader backend (yt-dlp). + + Used so we can implement Mox for testing without actually running the + yt-dlp command. + """ + + @callback run(binary(), atom(), keyword(), binary()) :: {:ok, binary()} | {:error, binary(), integer()} + @callback run(binary(), atom(), keyword(), binary(), keyword()) :: {:ok, binary()} | {:error, binary(), integer()} + @callback version() :: {:ok, binary()} | {:error, binary()} + @callback update() :: {:ok, binary()} | {:error, binary()} +end diff --git a/lib/pinchflat_web.ex b/lib/pinchflat_web.ex index ef58d00..9401e3b 100644 --- a/lib/pinchflat_web.ex +++ b/lib/pinchflat_web.ex @@ -43,7 +43,7 @@ defmodule PinchflatWeb do layouts: [html: PinchflatWeb.Layouts] import Plug.Conn - import PinchflatWeb.Gettext + use Gettext, backend: PinchflatWeb.Gettext alias Pinchflat.Settings alias PinchflatWeb.Layouts @@ -94,12 +94,13 @@ defmodule PinchflatWeb do # HTML escaping functionality import Phoenix.HTML # Core UI components and translation - import PinchflatWeb.Gettext + use Gettext, backend: PinchflatWeb.Gettext import PinchflatWeb.CoreComponents import PinchflatWeb.CustomComponents.TabComponents import PinchflatWeb.CustomComponents.TextComponents import PinchflatWeb.CustomComponents.TableComponents import PinchflatWeb.CustomComponents.ButtonComponents + import Pinchflat.Utils.StringUtils, only: [double_brace: 1] alias Pinchflat.Settings alias Pinchflat.Utils.StringUtils diff --git a/lib/pinchflat_web/components/core_components.ex b/lib/pinchflat_web/components/core_components.ex index f124b16..37af104 100644 --- a/lib/pinchflat_web/components/core_components.ex +++ b/lib/pinchflat_web/components/core_components.ex @@ -15,8 +15,7 @@ defmodule PinchflatWeb.CoreComponents do Icons are provided by [heroicons](https://heroicons.com). See `icon/1` for usage. """ use Phoenix.Component, global_prefixes: ~w(x-) - - import PinchflatWeb.Gettext + use Gettext, backend: PinchflatWeb.Gettext alias Phoenix.LiveView.JS alias PinchflatWeb.CustomComponents.TextComponents @@ -82,7 +81,7 @@ defmodule PinchflatWeb.CoreComponents do
- <%= render_slot(@inner_block) %> + {render_slot(@inner_block)}
@@ -126,9 +125,9 @@ defmodule PinchflatWeb.CoreComponents do ]}>
- <%= @title %> + {@title}
-

<%= msg %>

+

{msg}