diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..ff42f7d --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,12 @@ +{ + "name": "Pinchflat Dev", + "dockerComposeFile": "../docker-compose.yml", + "service": "phx", + "workspaceFolder": "/app", + "shutdownAction": "stopCompose", + "customizations": { + "vscode": { + "extensions": ["phoenixframework.phoenix", "JakeBecker.elixir-ls", "esbenp.prettier-vscode"] + } + } +} diff --git a/.github/workflows/docker_release.yml b/.github/workflows/docker_release.yml index f80cca4..2e35dbc 100644 --- a/.github/workflows/docker_release.yml +++ b/.github/workflows/docker_release.yml @@ -72,7 +72,7 @@ jobs: password: ${{ secrets.GITHUB_TOKEN }} - name: Build and Push - uses: docker/build-push-action@v5 + uses: docker/build-push-action@v6 with: context: . file: ./docker/selfhosted.Dockerfile diff --git a/.github/workflows/lint_and_test.yml b/.github/workflows/lint_and_test.yml index 82e62a6..c174559 100644 --- a/.github/workflows/lint_and_test.yml +++ b/.github/workflows/lint_and_test.yml @@ -29,7 +29,7 @@ jobs: uses: docker/setup-buildx-action@v3 - name: Build docker image - uses: docker/build-push-action@v5 + uses: docker/build-push-action@v6 with: context: . file: ./docker/dev.Dockerfile diff --git a/.iex.exs b/.iex.exs index ebe1a24..8533f83 100644 --- a/.iex.exs +++ b/.iex.exs @@ -23,3 +23,11 @@ alias Pinchflat.Metadata.MetadataFileHelpers alias Pinchflat.SlowIndexing.FileFollowerServer Pinchflat.Release.check_file_permissions() + +defmodule IexHelpers do + def restart do + :init.restart() + end +end + +import IexHelpers diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 0000000..bdc7a23 --- /dev/null +++ b/.prettierignore @@ -0,0 +1 @@ +assets/vendor/ diff --git a/tooling/.prettierrc.js b/.prettierrc.js similarity index 100% rename from tooling/.prettierrc.js rename to .prettierrc.js diff --git a/README.md b/README.md index a550c87..d99b196 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ +> [!IMPORTANT] +> (2025-02-14) [zakkarry](https://github.com/sponsors/zakkarry), who is a collaborator on [cross-seed](https://github.com/cross-seed/cross-seed) and an extremely helpful community member in general, is facing hard times due to medical debt and family illness. If you're able, please consider [sponsoring him on GitHub](https://github.com/sponsors/zakkarry) or donating via [buymeacoffee](https://tip.ary.dev). Tell him I sent you! +

[![](https://img.shields.io/github/license/kieraneglin/pinchflat?style=for-the-badge&color=ee512b)](LICENSE) -[![](https://img.shields.io/github/v/release/kieraneglin/pinchflat?style=for-the-badge)](https://github.com/kieraneglin/pinchflat/releases) +[![](https://img.shields.io/github/v/release/kieraneglin/pinchflat?style=for-the-badge&color=purple)](https://github.com/kieraneglin/pinchflat/releases) +[![](https://img.shields.io/static/v1?style=for-the-badge&logo=discord&message=Chat&color=5865F2&label=Discord)](https://discord.gg/j7T6dCuwU4) [![](https://img.shields.io/github/actions/workflow/status/kieraneglin/pinchflat/lint_and_test.yml?style=for-the-badge)](#) +[![](https://img.shields.io/static/v1?label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode&style=for-the-badge)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/kieraneglin/pinchflat) @@ -32,6 +37,7 @@ - [Portainer](#portainer) - [Docker](#docker) - [Environment Variables](#environment-variables) + - [A note on reverse proxies](#reverse-proxies) - [Username and Password (authentication)](https://github.com/kieraneglin/pinchflat/wiki/Username-and-Password) - [Frequently asked questions](https://github.com/kieraneglin/pinchflat/wiki/Frequently-Asked-Questions) - [Documentation](https://github.com/kieraneglin/pinchflat/wiki) @@ -52,7 +58,7 @@ If it doesn't work for your use case, please make a feature request! You can als - Self-contained - just one Docker container with no external dependencies - Powerful naming system so content is stored where and how you want it - Easy-to-use web interface with presets to get you started right away -- First-class support for media center apps like Plex, Jellyfin, and Kodi +- First-class support for media center apps like Plex, Jellyfin, and Kodi ([docs](https://github.com/kieraneglin/pinchflat/wiki/Frequently-Asked-Questions#how-do-i-get-media-into-plexjellyfinkodi)) - Supports serving RSS feeds to your favourite podcast app ([docs](https://github.com/kieraneglin/pinchflat/wiki/Podcast-RSS-Feeds)) - Automatically downloads new content from channels and playlists - Uses a novel approach to download new content more quickly than other apps @@ -62,7 +68,7 @@ If it doesn't work for your use case, please make a feature request! You can als - Allows automatically redownloading new media after a set period - This can help improve the download quality of new content or improve SponsorBlock tags - Optionally automatically delete old content ([docs](https://github.com/kieraneglin/pinchflat/wiki/Automatically-Delete-Media)) -- Advanced options like setting cutoff dates and filtering by title +- Advanced options like setting cutoff dates and filtering by title ([docs](https://github.com/kieraneglin/pinchflat/wiki/Frequently-Asked-Questions#i-only-want-certain-videos-from-a-source---how-can-i-only-download-those)) - Reliable hands-off operation - Can pass cookies to YouTube to download your private playlists ([docs](https://github.com/kieraneglin/pinchflat/wiki/YouTube-Cookies)) - Sponsorblock integration @@ -123,6 +129,23 @@ docker run \ ghcr.io/kieraneglin/pinchflat:latest ``` +### Podman + +The Podman setup is similar to Docker but changes a few flags to run under a User Namespace instead of root. To run Pinchflat under Podman and use the current user's UID/GID for file access run this: + +``` +podman run \ + --security-opt label=disable \ + --userns=keep-id --user=$UID \ + -e TZ=America/Los_Angeles \ + -p 8945:8945 \ + -v /host/path/to/config:/config:rw \ + -v /host/path/to/downloads/:/downloads:rw \ + ghcr.io/kieraneglin/pinchflat:latest +``` + +Using this setup consider creating a new `pinchflat` user and giving that user ownership to the config and download directory. See [Podman --userns](https://docs.podman.io/en/v4.6.1/markdown/options/userns.container.html) docs. + ### IMPORTANT: File permissions You _must_ ensure the host directories you've mounted are writable by the user running the Docker container. If you get a permission error follow the steps it suggests. See [#106](https://github.com/kieraneglin/pinchflat/issues/106) for more. @@ -130,9 +153,6 @@ You _must_ ensure the host directories you've mounted are writable by the user r > [!IMPORTANT] > It's not recommended to run the container as root. Doing so can create permission issues if other apps need to work with the downloaded media. -> [!TIP] -> If you need to run any command as root, you can run `su` from the container's shell as there is no password set for the root user. - ### ADVANCED: Storing Pinchflat config directory on a network share As pointed out in [#137](https://github.com/kieraneglin/pinchflat/issues/137), SQLite doesn't like being run in WAL mode on network shares. If you're running Pinchflat on a network share, you can disable WAL mode by setting the `JOURNAL_MODE` environment variable to `delete`. This will make Pinchflat run in rollback journal mode which is less performant but should work on network shares. @@ -144,16 +164,24 @@ If you change this setting and it works well for you, please leave a comment on ### Environment variables -| Name | Required? | Default | Notes | -| --------------------- | --------- | ------------------------- | ---------------------------------------------------------------------------------------------- | -| TZ | No | `UTC` | Must follow IANA TZ format | -| LOG_LEVEL | No | `debug` | Can be set to `info` | -| BASIC_AUTH_USERNAME | No | | See [authentication docs](https://github.com/kieraneglin/pinchflat/wiki/Username-and-Password) | -| BASIC_AUTH_PASSWORD | No | | See [authentication docs](https://github.com/kieraneglin/pinchflat/wiki/Username-and-Password) | -| EXPOSE_FEED_ENDPOINTS | No | | See [RSS feed docs](https://github.com/kieraneglin/pinchflat/wiki/Podcast-RSS-Feeds) | -| JOURNAL_MODE | No | `wal` | Set to `delete` if your config directory is stored on a network share (not recommended) | -| TZ_DATA_DIR | No | `/etc/elixir_tzdata_data` | The container path where the timezone database is stored | -| BASE_ROUTE_PATH | No | `/` | The base path for route generation. Useful when running behind certain reverse proxies | +| Name | Required? | Default | Notes | +| --------------------------- | --------- | ------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | +| `TZ` | No | `UTC` | Must follow IANA TZ format | +| `LOG_LEVEL` | No | `debug` | Can be set to `info` but `debug` is strongly recommended | +| `UMASK` | No | `022` | Unraid users may want to set this to `000` | +| `BASIC_AUTH_USERNAME` | No | | See [authentication docs](https://github.com/kieraneglin/pinchflat/wiki/Username-and-Password) | +| `BASIC_AUTH_PASSWORD` | No | | See [authentication docs](https://github.com/kieraneglin/pinchflat/wiki/Username-and-Password) | +| `EXPOSE_FEED_ENDPOINTS` | No | `false` | See [RSS feed docs](https://github.com/kieraneglin/pinchflat/wiki/Podcast-RSS-Feeds) | +| `ENABLE_IPV6` | No | `false` | Setting to _any_ non-blank value will enable IPv6 | +| `JOURNAL_MODE` | No | `wal` | Set to `delete` if your config directory is stored on a network share (not recommended) | +| `TZ_DATA_DIR` | No | `/etc/elixir_tzdata_data` | The container path where the timezone database is stored | +| `BASE_ROUTE_PATH` | No | `/` | The base path for route generation. Useful when running behind certain reverse proxies - prefixes must be stripped. | +| `YT_DLP_WORKER_CONCURRENCY` | No | `2` | The number of concurrent workers that use `yt-dlp` _per queue_. Set to 1 if you're getting IP limited, otherwise don't touch it | +| `ENABLE_PROMETHEUS` | No | `false` | Setting to _any_ non-blank value will enable Prometheus. See [docs](https://github.com/kieraneglin/pinchflat/wiki/Prometheus-and-Grafana) | + +### Reverse Proxies + +Pinchflat makes heavy use of websockets for real-time updates. If you're running Pinchflat behind a reverse proxy then you'll need to make sure it's configured to support websockets. ## EFF donations @@ -161,9 +189,9 @@ Prior to 2024-05-10, a portion of all donations were given to the [Electronic Fr The EFF defends your online liberties and [backed](https://github.com/github/dmca/blob/9a85e0f021f7967af80e186b890776a50443f06c/2020/11/2020-11-16-RIAA-reversal-effletter.pdf) `youtube-dl` when Google took them down. -## Pre-release disclaimer +## Stability disclaimer -This is pre-release software and anything can break at any time. I make not guarantees about the stability of this software, forward-compatibility of updates, or integrity (both related to and independent of Pinchflat). Essentially, use at your own risk and expect there will be rough edges for now. +This software is in active development and anything can break at any time. I make no guarantees about the stability of this software, forward-compatibility of updates, or integrity (both related to and independent of Pinchflat). ## License diff --git a/assets/js/alpine_helpers.js b/assets/js/alpine_helpers.js index 56a7d93..9c2367f 100644 --- a/assets/js/alpine_helpers.js +++ b/assets/js/alpine_helpers.js @@ -35,3 +35,14 @@ window.markVersionAsSeen = (versionString) => { window.isVersionSeen = (versionString) => { return localStorage.getItem('seenVersion') === versionString } + +window.dispatchFor = (elementOrId, eventName, detail = {}) => { + const element = + typeof elementOrId === 'string' ? document.getElementById(elementOrId) : elementOrId + + // This is needed to ensure the DOM has updated before dispatching the event. + // Doing so ensures that the latest DOM state is what's sent to the server + setTimeout(() => { + element.dispatchEvent(new Event(eventName, { bubbles: true, detail })) + }, 0) +} diff --git a/assets/js/app.js b/assets/js/app.js index 2be596d..e6e0219 100644 --- a/assets/js/app.js +++ b/assets/js/app.js @@ -39,7 +39,7 @@ let liveSocket = new LiveSocket(document.body.dataset.socketPath, Socket, { } }, hooks: { - supressEnterSubmission: { + 'supress-enter-submission': { mounted() { this.el.addEventListener('keypress', (event) => { if (event.key === 'Enter') { diff --git a/assets/tailwind.config.js b/assets/tailwind.config.js index 49d9b55..fdb2bc4 100644 --- a/assets/tailwind.config.js +++ b/assets/tailwind.config.js @@ -347,6 +347,38 @@ module.exports = { }, { values } ) + }), + plugin(function ({ matchComponents, theme }) { + let iconsDir = path.join(__dirname, './vendor/simple-icons') + let values = {} + + fs.readdirSync(iconsDir).forEach((file) => { + let name = path.basename(file, '.svg') + values[name] = { name, fullPath: path.join(iconsDir, file) } + }) + + matchComponents( + { + si: ({ name, fullPath }) => { + let content = fs + .readFileSync(fullPath) + .toString() + .replace(/\r?\n|\r/g, '') + return { + [`--si-${name}`]: `url('data:image/svg+xml;utf8,${content}')`, + '-webkit-mask': `var(--si-${name})`, + mask: `var(--si-${name})`, + 'mask-repeat': 'no-repeat', + 'background-color': 'currentColor', + 'vertical-align': 'middle', + display: 'inline-block', + width: theme('spacing.5'), + height: theme('spacing.5') + } + } + }, + { values } + ) }) ] } diff --git a/assets/vendor/simple-icons/discord.svg b/assets/vendor/simple-icons/discord.svg new file mode 100644 index 0000000..9d7796b --- /dev/null +++ b/assets/vendor/simple-icons/discord.svg @@ -0,0 +1 @@ +Discord \ No newline at end of file diff --git a/assets/vendor/simple-icons/github.svg b/assets/vendor/simple-icons/github.svg new file mode 100644 index 0000000..2334976 --- /dev/null +++ b/assets/vendor/simple-icons/github.svg @@ -0,0 +1 @@ +GitHub diff --git a/config/config.exs b/config/config.exs index aab4d10..f57e0cc 100644 --- a/config/config.exs +++ b/config/config.exs @@ -10,6 +10,7 @@ import Config config :pinchflat, ecto_repos: [Pinchflat.Repo], generators: [timestamp_type: :utc_datetime], + env: config_env(), # Specifying backend data here makes mocking and local testing SUPER easy yt_dlp_executable: System.find_executable("yt-dlp"), apprise_executable: System.find_executable("apprise"), @@ -41,33 +42,15 @@ config :pinchflat, PinchflatWeb.Endpoint, adapter: Phoenix.Endpoint.Cowboy2Adapter, render_errors: [ formats: [html: PinchflatWeb.ErrorHTML, json: PinchflatWeb.ErrorJSON], - layout: false + root_layout: {PinchflatWeb.Layouts, :root}, + layout: {PinchflatWeb.Layouts, :app} ], pubsub_server: Pinchflat.PubSub, live_view: [signing_salt: "/t5878kO"] config :pinchflat, Oban, engine: Oban.Engines.Lite, - repo: Pinchflat.Repo, - # Keep old jobs for 30 days for display in the UI - plugins: [ - {Oban.Plugins.Pruner, max_age: 30 * 24 * 60 * 60}, - {Oban.Plugins.Cron, - crontab: [ - {"0 1 * * *", Pinchflat.Downloading.MediaRetentionWorker}, - {"0 2 * * *", Pinchflat.Downloading.MediaQualityUpgradeWorker} - ]} - ], - # TODO: consider making this an env var or something? - queues: [ - default: 10, - fast_indexing: 6, - media_indexing: 2, - media_collection_indexing: 2, - media_fetching: 2, - local_data: 8, - remote_metadata: 4 - ] + repo: Pinchflat.Repo # Configures the mailer # @@ -101,13 +84,19 @@ config :tailwind, ] # Configures Elixir's Logger -config :logger, :console, +config :logger, :default_formatter, format: "$date $time $metadata[$level] | $message\n", metadata: [:request_id] # Use Jason for JSON parsing in Phoenix config :phoenix, :json_library, Jason +config :pinchflat, Pinchflat.PromEx, + disabled: true, + manual_metrics_start_delay: :no_delay, + drop_metrics_groups: [], + metrics_server: :disabled + # Import environment specific config. This must remain at the bottom # of this file so it overrides the configuration defined above. import_config "#{config_env()}.exs" diff --git a/config/dev.exs b/config/dev.exs index 887322b..8b9b793 100644 --- a/config/dev.exs +++ b/config/dev.exs @@ -67,7 +67,7 @@ config :pinchflat, PinchflatWeb.Endpoint, config :pinchflat, dev_routes: true # Do not include metadata nor timestamps in development logs -config :logger, :console, format: "[$level] $message\n" +config :logger, :default_formatter, format: "[$level] $message\n" # Set a higher stacktrace during development. Avoid configuring such # in production as building large stacktraces may be expensive. @@ -81,3 +81,5 @@ config :phoenix_live_view, :debug_heex_annotations, true # Disable swoosh api client as it is only required for production adapters. config :swoosh, :api_client, false + +config :pinchflat, Pinchflat.PromEx, disabled: false diff --git a/config/runtime.exs b/config/runtime.exs index 06aa731..5624bfe 100644 --- a/config/runtime.exs +++ b/config/runtime.exs @@ -40,40 +40,79 @@ config :pinchflat, Pinchflat.Repo, Path.join([:code.priv_dir(:pinchflat), "repo", "extensions", "sqlean-linux-#{system_arch}", "sqlean"]) ] +# Some users may want to increase the number of workers that use yt-dlp to improve speeds +# Others may want to decrease the number of these workers to lessen the chance of an IP ban +{yt_dlp_worker_count, _} = Integer.parse(System.get_env("YT_DLP_WORKER_CONCURRENCY", "2")) +# Used to set the cron for the yt-dlp update worker. The reason for this is +# to avoid all instances of PF updating yt-dlp at the same time, which 1) +# could result in rate limiting and 2) gives me time to react if an update +# breaks something +%{hour: current_hour, minute: current_minute} = DateTime.utc_now() + +config :pinchflat, Oban, + queues: [ + default: 10, + fast_indexing: yt_dlp_worker_count, + media_collection_indexing: yt_dlp_worker_count, + media_fetching: yt_dlp_worker_count, + remote_metadata: yt_dlp_worker_count, + local_data: 8 + ], + plugins: [ + # Keep old jobs for 30 days for display in the UI + {Oban.Plugins.Pruner, max_age: 30 * 24 * 60 * 60}, + {Oban.Plugins.Cron, + crontab: [ + {"#{current_minute} #{current_hour} * * *", Pinchflat.YtDlp.UpdateWorker}, + {"0 1 * * *", Pinchflat.Downloading.MediaRetentionWorker}, + {"0 2 * * *", Pinchflat.Downloading.MediaQualityUpgradeWorker} + ]} + ] + if config_env() == :prod do - config_path = "/config" + # Various paths. These ones shouldn't be tweaked if running in Docker + media_path = System.get_env("MEDIA_PATH", "/downloads") + config_path = System.get_env("CONFIG_PATH", "/config") db_path = System.get_env("DATABASE_PATH", Path.join([config_path, "db", "pinchflat.db"])) log_path = System.get_env("LOG_PATH", Path.join([config_path, "logs", "pinchflat.log"])) metadata_path = System.get_env("METADATA_PATH", Path.join([config_path, "metadata"])) extras_path = System.get_env("EXTRAS_PATH", Path.join([config_path, "extras"])) + tmpfile_path = System.get_env("TMPFILE_PATH", Path.join([System.tmp_dir!(), "pinchflat", "data"])) + # This one can be changed if you want + tz_data_path = System.get_env("TZ_DATA_PATH", Path.join([extras_path, "elixir_tz_data"])) # For running PF as a podcast host on self-hosted environments expose_feed_endpoints = String.length(System.get_env("EXPOSE_FEED_ENDPOINTS", "")) > 0 # For testing alternate journal modes (see issue #137) journal_mode = String.to_existing_atom(System.get_env("JOURNAL_MODE", "wal")) # For running PF in a subdirectory via a reverse proxy base_route_path = System.get_env("BASE_ROUTE_PATH", "/") + enable_ipv6 = String.length(System.get_env("ENABLE_IPV6", "")) > 0 + enable_prometheus = String.length(System.get_env("ENABLE_PROMETHEUS", "")) > 0 config :logger, level: String.to_existing_atom(System.get_env("LOG_LEVEL", "debug")) config :pinchflat, yt_dlp_executable: System.find_executable("yt-dlp"), apprise_executable: System.find_executable("apprise"), - media_directory: "/downloads", + media_directory: media_path, metadata_directory: metadata_path, extras_directory: extras_path, - tmpfile_directory: Path.join([System.tmp_dir!(), "pinchflat", "data"]), + tmpfile_directory: tmpfile_path, dns_cluster_query: System.get_env("DNS_CLUSTER_QUERY"), expose_feed_endpoints: expose_feed_endpoints, - timezone: System.get_env("TIMEZONE") || System.get_env("TZ") || "UTC", + # This is configured in application.ex + timezone: "UTC", log_path: log_path, base_route_path: base_route_path - config :tzdata, :data_dir, System.get_env("TZ_DATA_DIR", "/etc/elixir_tzdata_data") + config :tzdata, :data_dir, tz_data_path config :pinchflat, Pinchflat.Repo, database: db_path, journal_mode: journal_mode + config :pinchflat, Pinchflat.PromEx, disabled: !enable_prometheus + # The secret key base is used to sign/encrypt cookies and other secrets. # A default value is used in config/dev.exs and config/test.exs but you # want to use a different value for prod and you most likely don't want @@ -106,7 +145,7 @@ if config_env() == :prod do # Set it to {0, 0, 0, 0, 0, 0, 0, 1} for local network only access. # See the documentation on https://hexdocs.pm/plug_cowboy/Plug.Cowboy.html # for details about using IPv6 vs IPv4 and loopback vs public addresses. - ip: {0, 0, 0, 0}, + ip: if(enable_ipv6, do: {0, 0, 0, 0, 0, 0, 0, 0}, else: {0, 0, 0, 0}), port: String.to_integer(System.get_env("PORT") || "4000") ], url: [path: base_route_path], diff --git a/docker-compose.yml b/docker-compose.yml index aad86e8..e193a16 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,5 +10,3 @@ services: command: bash -c "chmod +x docker/docker-run.dev.sh && docker/docker-run.dev.sh" stdin_open: true tty: true - env_file: - - .env diff --git a/docker/dev.Dockerfile b/docker/dev.Dockerfile index ce7e137..04a3e13 100644 --- a/docker/dev.Dockerfile +++ b/docker/dev.Dockerfile @@ -1,6 +1,7 @@ -ARG ELIXIR_VERSION=1.17.0 -ARG OTP_VERSION=26.2.5 -ARG DEBIAN_VERSION=bookworm-20240612-slim +ARG ELIXIR_VERSION=1.18.4 +ARG OTP_VERSION=27.2.4 +ARG DEBIAN_VERSION=bookworm-20250428-slim + ARG DEV_IMAGE="hexpm/elixir:${ELIXIR_VERSION}-erlang-${OTP_VERSION}-debian-${DEBIAN_VERSION}" FROM ${DEV_IMAGE} @@ -12,7 +13,7 @@ RUN echo "Building for ${TARGETPLATFORM:?}" RUN apt-get update -qq && \ apt-get install -y inotify-tools curl git openssh-client jq \ python3 python3-setuptools python3-wheel python3-dev pipx \ - python3-mutagen locales procps build-essential graphviz zsh + python3-mutagen locales procps build-essential graphviz zsh unzip # Install ffmpeg RUN export FFMPEG_DOWNLOAD=$(case ${TARGETPLATFORM:-linux/amd64} in \ @@ -31,8 +32,14 @@ RUN curl -sL https://deb.nodesource.com/setup_20.x -o nodesource_setup.sh && \ # Install baseline Elixir packages mix local.hex --force && \ mix local.rebar --force && \ + # Install Deno - required for YouTube downloads (See yt-dlp#14404) + curl -fsSL https://deno.land/install.sh | DENO_INSTALL=/usr/local sh -s -- -y --no-modify-path && \ # Download and update YT-DLP - curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp && \ + export YT_DLP_DOWNLOAD=$(case ${TARGETPLATFORM:-linux/amd64} in \ + "linux/amd64") echo "https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux" ;; \ + "linux/arm64") echo "https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64" ;; \ + *) echo "https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux" ;; esac) && \ + curl -L ${YT_DLP_DOWNLOAD} -o /usr/local/bin/yt-dlp && \ chmod a+rx /usr/local/bin/yt-dlp && \ yt-dlp -U && \ # Install Apprise @@ -49,14 +56,16 @@ ENV LANG=en_US.UTF-8 ENV LANGUAGE=en_US:en ENV LC_ALL=en_US.UTF-8 -# Create app directory and copy the Elixir projects into it. WORKDIR /app -COPY . ./ +COPY mix.exs mix.lock ./ # Install Elixir deps -# RUN mix archive.install github hexpm/hex branch latest +# NOTE: this has to be before the bulk copy to ensure that deps are cached RUN MIX_ENV=dev mix deps.get && MIX_ENV=dev mix deps.compile RUN MIX_ENV=test mix deps.get && MIX_ENV=test mix deps.compile + +COPY . ./ + # Gives us iex shell history ENV ERL_AFLAGS="-kernel shell_history enabled" diff --git a/docker/selfhosted.Dockerfile b/docker/selfhosted.Dockerfile index 0449c88..17f7af7 100644 --- a/docker/selfhosted.Dockerfile +++ b/docker/selfhosted.Dockerfile @@ -1,13 +1,13 @@ # Find eligible builder and runner images on Docker Hub. We use Ubuntu/Debian # instead of Alpine to avoid DNS resolution issues in production. -ARG ELIXIR_VERSION=1.17.0 -ARG OTP_VERSION=26.2.5 -ARG DEBIAN_VERSION=bookworm-20240612-slim +ARG ELIXIR_VERSION=1.18.4 +ARG OTP_VERSION=27.2.4 +ARG DEBIAN_VERSION=bookworm-20250428-slim ARG BUILDER_IMAGE="hexpm/elixir:${ELIXIR_VERSION}-erlang-${OTP_VERSION}-debian-${DEBIAN_VERSION}" ARG RUNNER_IMAGE="debian:${DEBIAN_VERSION}" -FROM ${BUILDER_IMAGE} as builder +FROM ${BUILDER_IMAGE} AS builder ARG TARGETPLATFORM RUN echo "Building for ${TARGETPLATFORM:?}" @@ -27,10 +27,10 @@ RUN apt-get update -y && \ # Hex and Rebar mix local.hex --force && \ mix local.rebar --force && \ - # FFmpeg + # FFmpeg (latest build that doesn't cause an illegal instruction error for some users - see #347) export FFMPEG_DOWNLOAD=$(case ${TARGETPLATFORM:-linux/amd64} in \ - "linux/amd64") echo "https://github.com/yt-dlp/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linux64-gpl.tar.xz" ;; \ - "linux/arm64") echo "https://github.com/yt-dlp/FFmpeg-Builds/releases/download/latest/ffmpeg-master-latest-linuxarm64-gpl.tar.xz" ;; \ + "linux/amd64") echo "https://github.com/yt-dlp/FFmpeg-Builds/releases/download/autobuild-2024-07-30-14-10/ffmpeg-N-116468-g0e09f6d690-linux64-gpl.tar.xz" ;; \ + "linux/arm64") echo "https://github.com/yt-dlp/FFmpeg-Builds/releases/download/autobuild-2024-07-30-14-10/ffmpeg-N-116468-g0e09f6d690-linuxarm64-gpl.tar.xz" ;; \ *) echo "" ;; esac) && \ curl -L ${FFMPEG_DOWNLOAD} --output /tmp/ffmpeg.tar.xz && \ tar -xf /tmp/ffmpeg.tar.xz --strip-components=2 --no-anchored -C /usr/local/bin/ "ffmpeg" && \ @@ -73,6 +73,7 @@ RUN mix release FROM ${RUNNER_IMAGE} +ARG TARGETPLATFORM ARG PORT=8945 COPY --from=builder ./usr/local/bin/ffmpeg /usr/bin/ffmpeg @@ -88,18 +89,27 @@ RUN apt-get update -y && \ ca-certificates \ python3-mutagen \ curl \ + zip \ openssh-client \ nano \ python3 \ pipx \ jq \ + # unzip is needed for Deno + unzip \ procps && \ + # Install Deno - required for YouTube downloads (See yt-dlp#14404) + curl -fsSL https://deno.land/install.sh | DENO_INSTALL=/usr/local sh -s -- -y --no-modify-path && \ # Apprise export PIPX_HOME=/opt/pipx && \ export PIPX_BIN_DIR=/usr/local/bin && \ pipx install apprise && \ # yt-dlp - curl -L https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp -o /usr/local/bin/yt-dlp && \ + export YT_DLP_DOWNLOAD=$(case ${TARGETPLATFORM:-linux/amd64} in \ + "linux/amd64") echo "https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux" ;; \ + "linux/arm64") echo "https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64" ;; \ + *) echo "https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux" ;; esac) && \ + curl -L ${YT_DLP_DOWNLOAD} -o /usr/local/bin/yt-dlp && \ chmod a+rx /usr/local/bin/yt-dlp && \ yt-dlp -U && \ # Set the locale @@ -109,36 +119,27 @@ RUN apt-get update -y && \ rm -rf /var/lib/apt/lists/* # More locale setup -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US:en -ENV LC_ALL en_US.UTF-8 +ENV LANG=en_US.UTF-8 +ENV LANGUAGE=en_US:en +ENV LC_ALL=en_US.UTF-8 WORKDIR "/app" # Set up data volumes -RUN mkdir /config /downloads /etc/elixir_tzdata_data && chmod ugo+rw /etc/elixir_tzdata_data +RUN mkdir -p /config /downloads /etc/elixir_tzdata_data /etc/yt-dlp/plugins && \ + chmod ugo+rw /etc/elixir_tzdata_data /etc/yt-dlp /etc/yt-dlp/plugins /usr/local/bin /usr/local/bin/yt-dlp # set runner ENV ENV MIX_ENV="prod" ENV PORT=${PORT} ENV RUN_CONTEXT="selfhosted" +ENV UMASK=022 EXPOSE ${PORT} # Only copy the final release from the build stage COPY --from=builder /app/_build/${MIX_ENV}/rel/pinchflat ./ -# NEVER do this if you're running in an environment where you don't trust the user -# (ie: most environments). This is only acceptable in a self-hosted environment. -# The user could just run the whole container as root and bypass this anyway so -# it's not a huge deal. -# This removes the root password to allow users to assume root if needed. This is -# preferrable to running the whole container as root so that the files/directories -# created by the app aren't owned by root and are therefore easier for other users -# and processes to interact with. If you want to just run the whole container as -# root, use --user 0:0 or something. -RUN passwd -d root - -HEALTHCHECK --interval=120s --start-period=10s \ +HEALTHCHECK --interval=30s --start-period=15s \ CMD curl --fail http://localhost:${PORT}/healthcheck || exit 1 # Start the app diff --git a/lib/pinchflat/application.ex b/lib/pinchflat/application.ex index 49adcf0..3f823f4 100644 --- a/lib/pinchflat/application.ex +++ b/lib/pinchflat/application.ex @@ -4,10 +4,18 @@ defmodule Pinchflat.Application do @moduledoc false use Application + require Logger @impl true def start(_type, _args) do - children = [ + check_and_update_timezone() + attach_oban_telemetry() + Logger.add_handlers(:pinchflat) + + # See https://hexdocs.pm/elixir/Supervisor.html + # for other strategies and supported options + [ + Pinchflat.PromEx, PinchflatWeb.Telemetry, Pinchflat.Repo, # Must be before startup tasks @@ -20,17 +28,11 @@ defmodule Pinchflat.Application do {Finch, name: Pinchflat.Finch}, # Start a worker by calling: Pinchflat.Worker.start_link(arg) # {Pinchflat.Worker, arg}, - # Start to serve requests, typically the last entry - PinchflatWeb.Endpoint + # Start to serve requests, typically the last entry (except for the post-boot tasks) + PinchflatWeb.Endpoint, + Pinchflat.Boot.PostBootStartupTasks ] - - attach_oban_telemetry() - Logger.add_handlers(:pinchflat) - - # See https://hexdocs.pm/elixir/Supervisor.html - # for other strategies and supported options - opts = [strategy: :one_for_one, name: Pinchflat.Supervisor] - Supervisor.start_link(children, opts) + |> Supervisor.start_link(strategy: :one_for_one, name: Pinchflat.Supervisor) end # Tell Phoenix to update the endpoint configuration @@ -47,4 +49,20 @@ defmodule Pinchflat.Application do :ok = Oban.Telemetry.attach_default_logger() :telemetry.attach_many("job-telemetry-broadcast", events, &PinchflatWeb.Telemetry.job_state_change_broadcast/4, []) end + + # This has to be here (rather than runtime.exs) since the `tzdata` application + # has to be started before we can check the timezone + defp check_and_update_timezone do + attempted_timezone = System.get_env("TIMEZONE") || System.get_env("TZ") || "UTC" + + valid_timezone = + if Tzdata.zone_exists?(attempted_timezone) do + attempted_timezone + else + Logger.warning("Invalid timezone #{attempted_timezone}, defaulting to UTC") + "UTC" + end + + Application.put_env(:pinchflat, :timezone, valid_timezone) + end end diff --git a/lib/pinchflat/boot/post_boot_startup_tasks.ex b/lib/pinchflat/boot/post_boot_startup_tasks.ex new file mode 100644 index 0000000..d6ae6eb --- /dev/null +++ b/lib/pinchflat/boot/post_boot_startup_tasks.ex @@ -0,0 +1,46 @@ +defmodule Pinchflat.Boot.PostBootStartupTasks do + @moduledoc """ + This module is responsible for running startup tasks on app boot + AFTER all other boot steps have taken place and the app is ready to serve requests. + + It's a GenServer because that plays REALLY nicely with the existing + Phoenix supervision tree. + """ + + alias Pinchflat.YtDlp.UpdateWorker, as: YtDlpUpdateWorker + + # restart: :temporary means that this process will never be restarted (ie: will run once and then die) + use GenServer, restart: :temporary + import Ecto.Query, warn: false + + def start_link(opts \\ []) do + GenServer.start_link(__MODULE__, %{env: Application.get_env(:pinchflat, :env)}, opts) + end + + @doc """ + Runs post-boot application startup tasks. + + Any code defined here will run every time the application starts. You must + make sure that the code is idempotent and safe to run multiple times. + + This is a good place to set up default settings, create initial records, stuff like that. + Should be fast - anything with the potential to be slow should be kicked off as a job instead. + """ + @impl true + def init(%{env: :test} = state) do + # Do nothing _as part of the app bootup process_. + # Since bootup calls `start_link` and that's where the `env` state is injected, + # you can still call `.init()` manually to run these tasks for testing purposes + {:ok, state} + end + + def init(state) do + update_yt_dlp() + + {:ok, state} + end + + defp update_yt_dlp do + YtDlpUpdateWorker.kickoff() + end +end diff --git a/lib/pinchflat/boot/post_job_startup_tasks.ex b/lib/pinchflat/boot/post_job_startup_tasks.ex index 5043a25..6eba701 100644 --- a/lib/pinchflat/boot/post_job_startup_tasks.ex +++ b/lib/pinchflat/boot/post_job_startup_tasks.ex @@ -1,7 +1,7 @@ defmodule Pinchflat.Boot.PostJobStartupTasks do @moduledoc """ This module is responsible for running startup tasks on app boot - AFTER the job runner has initiallized. + AFTER the job runner has initialized. It's a GenServer because that plays REALLY nicely with the existing Phoenix supervision tree. @@ -12,7 +12,7 @@ defmodule Pinchflat.Boot.PostJobStartupTasks do import Ecto.Query, warn: false def start_link(opts \\ []) do - GenServer.start_link(__MODULE__, %{}, opts) + GenServer.start_link(__MODULE__, %{env: Application.get_env(:pinchflat, :env)}, opts) end @doc """ @@ -25,6 +25,13 @@ defmodule Pinchflat.Boot.PostJobStartupTasks do Should be fast - anything with the potential to be slow should be kicked off as a job instead. """ @impl true + def init(%{env: :test} = state) do + # Do nothing _as part of the app bootup process_. + # Since bootup calls `start_link` and that's where the `env` state is injected, + # you can still call `.init()` manually to run these tasks for testing purposes + {:ok, state} + end + def init(state) do # Nothing at the moment! diff --git a/lib/pinchflat/boot/pre_job_startup_tasks.ex b/lib/pinchflat/boot/pre_job_startup_tasks.ex index 85fb399..5035e35 100644 --- a/lib/pinchflat/boot/pre_job_startup_tasks.ex +++ b/lib/pinchflat/boot/pre_job_startup_tasks.ex @@ -16,8 +16,10 @@ defmodule Pinchflat.Boot.PreJobStartupTasks do alias Pinchflat.Settings alias Pinchflat.Utils.FilesystemUtils + alias Pinchflat.Lifecycle.UserScripts.CommandRunner, as: UserScriptRunner + def start_link(opts \\ []) do - GenServer.start_link(__MODULE__, %{}, opts) + GenServer.start_link(__MODULE__, %{env: Application.get_env(:pinchflat, :env)}, opts) end @doc """ @@ -30,12 +32,20 @@ defmodule Pinchflat.Boot.PreJobStartupTasks do Should be fast - anything with the potential to be slow should be kicked off as a job instead. """ @impl true + def init(%{env: :test} = state) do + # Do nothing _as part of the app bootup process_. + # Since bootup calls `start_link` and that's where the `env` state is injected, + # you can still call `.init()` manually to run these tasks for testing purposes + {:ok, state} + end + def init(state) do ensure_tmpfile_directory() reset_executing_jobs() create_blank_yt_dlp_files() create_blank_user_script_file() apply_default_settings() + run_app_init_script() {:ok, state} end @@ -95,6 +105,12 @@ defmodule Pinchflat.Boot.PreJobStartupTasks do Settings.set(apprise_version: apprise_version) end + defp run_app_init_script do + runner = Application.get_env(:pinchflat, :user_script_runner, UserScriptRunner) + + runner.run(:app_init, %{}) + end + defp yt_dlp_runner do Application.get_env(:pinchflat, :yt_dlp_runner) end diff --git a/lib/pinchflat/downloading/download_option_builder.ex b/lib/pinchflat/downloading/download_option_builder.ex index 3e04472..ab2b56a 100644 --- a/lib/pinchflat/downloading/download_option_builder.ex +++ b/lib/pinchflat/downloading/download_option_builder.ex @@ -4,10 +4,10 @@ defmodule Pinchflat.Downloading.DownloadOptionBuilder do """ alias Pinchflat.Sources - alias Pinchflat.Settings alias Pinchflat.Sources.Source alias Pinchflat.Media.MediaItem alias Pinchflat.Downloading.OutputPathBuilder + alias Pinchflat.Downloading.QualityOptionBuilder alias Pinchflat.Utils.FilesystemUtils, as: FSUtils @@ -34,21 +34,38 @@ defmodule Pinchflat.Downloading.DownloadOptionBuilder do @doc """ Builds the output path for yt-dlp to download media based on the given source's - media profile. Uses the source's override output path template if it exists. + or media_item's media profile. Uses the source's override output path template if it exists. Accepts a %MediaItem{} or %Source{} struct. If a %Source{} struct is passed, it will use a default %MediaItem{} struct with the given source. Returns binary() """ + def build_output_path_for(%Source{} = source_with_preloads) do + build_output_path_for(%MediaItem{source: source_with_preloads}) + end + def build_output_path_for(%MediaItem{} = media_item_with_preloads) do output_path_template = Sources.output_path_template(media_item_with_preloads.source) build_output_path(output_path_template, media_item_with_preloads) end - def build_output_path_for(%Source{} = source_with_preloads) do - build_output_path_for(%MediaItem{source: source_with_preloads}) + @doc """ + Builds the quality options for yt-dlp to download media based on the given source's + or media_item's media profile. Useful for helping predict final filepath of downloaded + media. + + returns [Keyword.t()] + """ + def build_quality_options_for(%Source{} = source_with_preloads) do + build_quality_options_for(%MediaItem{source: source_with_preloads}) + end + + def build_quality_options_for(%MediaItem{} = media_item_with_preloads) do + media_profile = media_item_with_preloads.source.media_profile + + quality_options(media_profile) end defp default_options(override_opts) do @@ -125,26 +142,7 @@ defmodule Pinchflat.Downloading.DownloadOptionBuilder do end defp quality_options(media_profile) do - vcodec = Settings.get!(:video_codec_preference) - acodec = Settings.get!(:audio_codec_preference) - - case media_profile.preferred_resolution do - # Also be aware that :audio disabled all embedding options for subtitles - :audio -> - [:extract_audio, format_sort: "+acodec:#{acodec}"] - - resolution_atom -> - {resolution_string, _} = - resolution_atom - |> Atom.to_string() - |> Integer.parse() - - [ - # Since Plex doesn't support reading metadata from MKV - remux_video: "mp4", - format_sort: "res:#{resolution_string},+codec:#{vcodec}:#{acodec}" - ] - end + QualityOptionBuilder.build(media_profile) end defp sponsorblock_options(media_profile) do @@ -154,6 +152,7 @@ defmodule Pinchflat.Downloading.DownloadOptionBuilder do case {behaviour, categories} do {_, []} -> [] {:remove, _} -> [sponsorblock_remove: Enum.join(categories, ",")] + {:mark, _} -> [sponsorblock_mark: Enum.join(categories, ",")] {:disabled, _} -> [] end end @@ -202,6 +201,9 @@ defmodule Pinchflat.Downloading.DownloadOptionBuilder do source = media_item_with_preloads.source %{ + "media_item_id" => to_string(media_item_with_preloads.id), + "source_id" => to_string(source.id), + "media_profile_id" => to_string(source.media_profile_id), "source_custom_name" => source.custom_name, "source_collection_id" => source.collection_id, "source_collection_name" => source.collection_name, diff --git a/lib/pinchflat/downloading/downloading_helpers.ex b/lib/pinchflat/downloading/downloading_helpers.ex index 5898533..eae187c 100644 --- a/lib/pinchflat/downloading/downloading_helpers.ex +++ b/lib/pinchflat/downloading/downloading_helpers.ex @@ -27,13 +27,15 @@ defmodule Pinchflat.Downloading.DownloadingHelpers do Returns :ok """ - def enqueue_pending_download_tasks(%Source{download_media: true} = source) do + def enqueue_pending_download_tasks(source, job_opts \\ []) + + def enqueue_pending_download_tasks(%Source{download_media: true} = source, job_opts) do source |> Media.list_pending_media_items_for() - |> Enum.each(&MediaDownloadWorker.kickoff_with_task/1) + |> Enum.each(&MediaDownloadWorker.kickoff_with_task(&1, %{}, job_opts)) end - def enqueue_pending_download_tasks(%Source{download_media: false}) do + def enqueue_pending_download_tasks(%Source{download_media: false}, _job_opts) do :ok end @@ -55,13 +57,13 @@ defmodule Pinchflat.Downloading.DownloadingHelpers do Returns {:ok, %Task{}} | {:error, :should_not_download} | {:error, any()} """ - def kickoff_download_if_pending(%MediaItem{} = media_item) do + def kickoff_download_if_pending(%MediaItem{} = media_item, job_opts \\ []) do media_item = Repo.preload(media_item, :source) if media_item.source.download_media && Media.pending_download?(media_item) do Logger.info("Kicking off download for media item ##{media_item.id} (#{media_item.media_id})") - MediaDownloadWorker.kickoff_with_task(media_item) + MediaDownloadWorker.kickoff_with_task(media_item, %{}, job_opts) else {:error, :should_not_download} end diff --git a/lib/pinchflat/downloading/media_download_worker.ex b/lib/pinchflat/downloading/media_download_worker.ex index c7ea232..a0fbceb 100644 --- a/lib/pinchflat/downloading/media_download_worker.ex +++ b/lib/pinchflat/downloading/media_download_worker.ex @@ -3,6 +3,7 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do use Oban.Worker, queue: :media_fetching, + priority: 5, unique: [period: :infinity, states: [:available, :scheduled, :retryable, :executing]], tags: ["media_item", "media_fetching", "show_in_dashboard"] @@ -12,6 +13,7 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do alias Pinchflat.Tasks alias Pinchflat.Repo alias Pinchflat.Media + alias Pinchflat.Media.FileSyncing alias Pinchflat.Downloading.MediaDownloader alias Pinchflat.Lifecycle.UserScripts.CommandRunner, as: UserScriptRunner @@ -48,8 +50,7 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do media_item = fetch_and_run_prevent_download_user_script(media_item_id) - # If the source or media item is set to not download media, perform a no-op unless forced - if (media_item.source.download_media && !media_item.prevent_download) || should_force do + if should_download_media?(media_item, should_force, is_quality_upgrade) do download_media_and_schedule_jobs(media_item, is_quality_upgrade, should_force) else :ok @@ -59,6 +60,20 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do Ecto.StaleEntryError -> Logger.info("#{__MODULE__} discarded: media item #{media_item_id} stale") end + # If this is a quality upgrade, only check if the source is set to download media + # or that the media item's download hasn't been prevented + defp should_download_media?(media_item, should_force, true = _is_quality_upgrade) do + (media_item.source.download_media && !media_item.prevent_download) || should_force + end + + # If it's not a quality upgrade, additionally check if the media item is pending download + defp should_download_media?(media_item, should_force, _is_quality_upgrade) do + source = media_item.source + is_pending = Media.pending_download?(media_item) + + (is_pending && source.download_media && !media_item.prevent_download) || should_force + end + # If a user script exists and, when run, returns a non-zero exit code, prevent this and all future downloads # of the media item. defp fetch_and_run_prevent_download_user_script(media_item_id) do @@ -85,14 +100,18 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do media_redownloaded_at: get_redownloaded_at(is_quality_upgrade) }) + :ok = FileSyncing.delete_outdated_files(media_item, updated_media_item) run_user_script(:media_downloaded, updated_media_item) :ok - {:recovered, _} -> + {:recovered, _media_item, _message} -> {:error, :retry} - {:error, message} -> + {:error, :unsuitable_for_download, _message} -> + {:ok, :non_retry} + + {:error, _error_atom, message} -> action_on_error(message) end end @@ -110,7 +129,11 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do defp action_on_error(message) do # This will attempt re-download at the next indexing, but it won't be retried # immediately as part of job failure logic - non_retryable_errors = ["Video unavailable"] + non_retryable_errors = [ + "Video unavailable", + "Sign in to confirm", + "This video is available to this channel's members" + ] if String.contains?(to_string(message), non_retryable_errors) do Logger.error("yt-dlp download will not be retried: #{inspect(message)}") diff --git a/lib/pinchflat/downloading/media_downloader.ex b/lib/pinchflat/downloading/media_downloader.ex index a10e6b9..1a1ee2f 100644 --- a/lib/pinchflat/downloading/media_downloader.ex +++ b/lib/pinchflat/downloading/media_downloader.ex @@ -9,7 +9,9 @@ defmodule Pinchflat.Downloading.MediaDownloader do alias Pinchflat.Repo alias Pinchflat.Media + alias Pinchflat.Sources alias Pinchflat.Media.MediaItem + alias Pinchflat.Utils.StringUtils alias Pinchflat.Metadata.NfoBuilder alias Pinchflat.Metadata.MetadataParser alias Pinchflat.Metadata.MetadataFileHelpers @@ -20,16 +22,57 @@ defmodule Pinchflat.Downloading.MediaDownloader do @doc """ Downloads media for a media item, updating the media item based on the metadata - returned by yt-dlp. Also saves the entire metadata response to the associated - media_metadata record. + returned by yt-dlp. Encountered errors are saved to the Media Item record. Saves + the entire metadata response to the associated media_metadata record. - NOTE: related methods (like the download worker) won't download if the media item's source + NOTE: related methods (like the download worker) won't download if Pthe media item's source is set to not download media. However, I'm not enforcing that here since I need this for testing. This may change in the future but I'm not stressed. - Returns {:ok, %MediaItem{}} | {:error, any, ...any} + Returns {:ok, %MediaItem{}} | {:error, atom(), String.t()} | {:recovered, %MediaItem{}, String.t()} """ def download_for_media_item(%MediaItem{} = media_item, override_opts \\ []) do + case attempt_download_and_update_for_media_item(media_item, override_opts) do + {:ok, media_item} -> + # Returns {:ok, %MediaItem{}} + Media.update_media_item(media_item, %{last_error: nil}) + + {:error, error_atom, message} -> + Media.update_media_item(media_item, %{last_error: StringUtils.wrap_string(message)}) + + {:error, error_atom, message} + + {:recovered, media_item, message} -> + {:ok, updated_media_item} = Media.update_media_item(media_item, %{last_error: StringUtils.wrap_string(message)}) + + {:recovered, updated_media_item, message} + end + end + + # Looks complicated, but here's the key points: + # - download_with_options runs a pre-check to see if the media item is suitable for download. + # - If the media item fails the precheck, it returns {:error, :unsuitable_for_download, message} + # - However, if the precheck fails in a way that we think can be fixed by using cookies, we retry with cookies + # and return the result of that + # - If the precheck passes but the download fails, it normally returns {:error, :download_failed, message} + # - However, there are some errors we can recover from (eg: failure to communicate with SponsorBlock). + # In this case, we attempt the download anyway and update the media item with what details we do have. + # This case returns {:recovered, updated_media_item, message} + # - If we attempt a retry but it fails, we return {:error, :unrecoverable, message} + # - If there is an unknown error unrelated to the above, we return {:error, :unknown, message} + # - Finally, if there is no error, we update the media item with the parsed JSON and return {:ok, updated_media_item} + # + # Restated, here are the return values for each case: + # - On success: {:ok, updated_media_item} + # - On initial failure but successfully recovered: {:recovered, updated_media_item, message} + # - On error: {:error, error_atom, message} where error_atom is one of: + # - `:unsuitable_for_download` if the media item fails the precheck + # - `:unrecoverable` if there was an initial failure and the recovery attempt failed + # - `:download_failed` for all other yt-dlp-related downloading errors + # - `:unknown` for any other errors, including those not related to yt-dlp + # - If we retry using cookies, all of the above return values apply. The cookie retry + # logic is handled transparently as far as the caller is concerned + defp attempt_download_and_update_for_media_item(media_item, override_opts) do output_filepath = FilesystemUtils.generate_metadata_tmpfile(:json) media_with_preloads = Repo.preload(media_item, [:metadata, source: :media_profile]) @@ -37,25 +80,31 @@ defmodule Pinchflat.Downloading.MediaDownloader do {:ok, parsed_json} -> update_media_item_from_parsed_json(media_with_preloads, parsed_json) + {:error, :unsuitable_for_download} -> + message = + "Media item ##{media_with_preloads.id} isn't suitable for download yet. May be an active or processing live stream" + + Logger.warning(message) + + {:error, :unsuitable_for_download, message} + {:error, message, _exit_code} -> Logger.error("yt-dlp download error for media item ##{media_with_preloads.id}: #{inspect(message)}") if String.contains?(to_string(message), recoverable_errors()) do - attempt_update_media_item(media_with_preloads, output_filepath) - - {:recovered, message} + attempt_recovery_from_error(media_with_preloads, output_filepath, message) else - {:error, message} + {:error, :download_failed, message} end err -> Logger.error("Unknown error downloading media item ##{media_with_preloads.id}: #{inspect(err)}") - {:error, "Unknown error: #{inspect(err)}"} + {:error, :unknown, "Unknown error: #{inspect(err)}"} end end - defp attempt_update_media_item(media_with_preloads, output_filepath) do + defp attempt_recovery_from_error(media_with_preloads, output_filepath, error_message) do with {:ok, contents} <- File.read(output_filepath), {:ok, parsed_json} <- Phoenix.json_library().decode(contents) do Logger.info(""" @@ -64,12 +113,13 @@ defmodule Pinchflat.Downloading.MediaDownloader do anyway """) - update_media_item_from_parsed_json(media_with_preloads, parsed_json) + {:ok, updated_media_item} = update_media_item_from_parsed_json(media_with_preloads, parsed_json) + {:recovered, updated_media_item, error_message} else err -> Logger.error("Unable to recover error for media item ##{media_with_preloads.id}: #{inspect(err)}") - {:error, :retry_failed} + {:error, :unrecoverable, error_message} end end @@ -106,8 +156,49 @@ defmodule Pinchflat.Downloading.MediaDownloader do defp download_with_options(url, item_with_preloads, output_filepath, override_opts) do {:ok, options} = DownloadOptionBuilder.build(item_with_preloads, override_opts) + force_use_cookies = Keyword.get(override_opts, :force_use_cookies, false) + source_uses_cookies = Sources.use_cookies?(item_with_preloads.source, :downloading) + should_use_cookies = force_use_cookies || source_uses_cookies - YtDlpMedia.download(url, options, output_filepath: output_filepath) + runner_opts = [output_filepath: output_filepath, use_cookies: should_use_cookies] + + case {YtDlpMedia.get_downloadable_status(url, use_cookies: should_use_cookies), should_use_cookies} do + {{:ok, :downloadable}, _} -> + YtDlpMedia.download(url, options, runner_opts) + + {{:ok, :ignorable}, _} -> + {:error, :unsuitable_for_download} + + {{:error, _message, _exit_code} = err, false} -> + # If there was an error and we don't have cookies, this method will retry with cookies + # if doing so would help AND the source allows. Otherwise, it will return the error as-is + maybe_retry_with_cookies(url, item_with_preloads, output_filepath, override_opts, err) + + # This gets hit if cookies are enabled which, importantly, also covers the case where we + # retry a download with cookies and it fails again + {{:error, message, exit_code}, true} -> + {:error, message, exit_code} + + {err, _} -> + err + end + end + + defp maybe_retry_with_cookies(url, item_with_preloads, output_filepath, override_opts, err) do + {:error, message, _} = err + source = item_with_preloads.source + message_contains_cookie_error = String.contains?(to_string(message), recoverable_cookie_errors()) + + if Sources.use_cookies?(source, :error_recovery) && message_contains_cookie_error do + download_with_options( + url, + item_with_preloads, + output_filepath, + Keyword.put(override_opts, :force_use_cookies, true) + ) + else + err + end end defp recoverable_errors do @@ -115,4 +206,11 @@ defmodule Pinchflat.Downloading.MediaDownloader do "Unable to communicate with SponsorBlock" ] end + + defp recoverable_cookie_errors do + [ + "Sign in to confirm", + "This video is available to this channel's members" + ] + end end diff --git a/lib/pinchflat/downloading/media_retention_worker.ex b/lib/pinchflat/downloading/media_retention_worker.ex index 3a36100..461ac1c 100644 --- a/lib/pinchflat/downloading/media_retention_worker.ex +++ b/lib/pinchflat/downloading/media_retention_worker.ex @@ -49,6 +49,8 @@ defmodule Pinchflat.Downloading.MediaRetentionWorker do end) end + # NOTE: Since this is a date and not a datetime, we can't add logic to have to-the-minute + # comparison like we can with retention periods. We can only compare to the day. defp delete_media_items_from_before_cutoff do deletable_media = MediaQuery.new() diff --git a/lib/pinchflat/downloading/quality_option_builder.ex b/lib/pinchflat/downloading/quality_option_builder.ex new file mode 100644 index 0000000..cb89435 --- /dev/null +++ b/lib/pinchflat/downloading/quality_option_builder.ex @@ -0,0 +1,66 @@ +defmodule Pinchflat.Downloading.QualityOptionBuilder do + @moduledoc """ + A standalone builder module for building quality-related options for yt-dlp to download media. + + Currently exclusively used in DownloadOptionBuilder since this logic is too complex to just + place in the main module. + """ + + alias Pinchflat.Settings + alias Pinchflat.Profiles.MediaProfile + + @doc """ + Builds the quality-related options for yt-dlp to download media based on the given media profile + + Includes things like container, preferred format/codec, and audio track options. + """ + def build(%MediaProfile{preferred_resolution: :audio, media_container: container} = media_profile) do + acodec = Settings.get!(:audio_codec_preference) + + [ + :extract_audio, + format_sort: "+acodec:#{acodec}", + audio_format: container || "best", + format: build_format_string(media_profile) + ] + end + + def build(%MediaProfile{preferred_resolution: resolution_atom, media_container: container} = media_profile) do + vcodec = Settings.get!(:video_codec_preference) + acodec = Settings.get!(:audio_codec_preference) + {resolution_string, _} = resolution_atom |> Atom.to_string() |> Integer.parse() + + [ + # Since Plex doesn't support reading metadata from MKV + remux_video: container || "mp4", + format_sort: "res:#{resolution_string},+codec:#{vcodec}:#{acodec}", + format: build_format_string(media_profile) + ] + end + + defp build_format_string(%MediaProfile{preferred_resolution: :audio, audio_track: audio_track}) do + if audio_track do + "bestaudio[#{build_format_modifier(audio_track)}]/bestaudio/best" + else + "bestaudio/best" + end + end + + defp build_format_string(%MediaProfile{audio_track: audio_track}) do + if audio_track do + "bestvideo+bestaudio[#{build_format_modifier(audio_track)}]/bestvideo*+bestaudio/best" + else + "bestvideo*+bestaudio/best" + end + end + + # Reminder to self: this conflicts with `--extractor-args "youtube:lang="` + # since that will translate the format_notes as well, which means they may not match. + # At least that's what happens now - worth a re-check if I have to come back to this + defp build_format_modifier("original"), do: "format_note*=original" + defp build_format_modifier("default"), do: "format_note*='(default)'" + # This uses the carat to anchor the language to the beginning of the string + # since that's what's needed to match `en` to `en-US` and `en-GB`, etc. The user + # can always specify the full language code if they want. + defp build_format_modifier(language_code), do: "language^=#{language_code}" +end diff --git a/lib/pinchflat/fast_indexing/fast_indexing_helpers.ex b/lib/pinchflat/fast_indexing/fast_indexing_helpers.ex index 02fc00f..15a4342 100644 --- a/lib/pinchflat/fast_indexing/fast_indexing_helpers.ex +++ b/lib/pinchflat/fast_indexing/fast_indexing_helpers.ex @@ -11,13 +11,28 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpers do alias Pinchflat.Repo alias Pinchflat.Media + alias Pinchflat.Tasks + alias Pinchflat.Sources alias Pinchflat.Sources.Source alias Pinchflat.FastIndexing.YoutubeRss alias Pinchflat.FastIndexing.YoutubeApi alias Pinchflat.Downloading.DownloadingHelpers + alias Pinchflat.FastIndexing.FastIndexingWorker + alias Pinchflat.Downloading.DownloadOptionBuilder alias Pinchflat.YtDlp.Media, as: YtDlpMedia + @doc """ + Kicks off a new fast indexing task for a source. This will delete any existing fast indexing + tasks for the source before starting a new one. + + Returns {:ok, %Task{}} + """ + def kickoff_indexing_task(%Source{} = source) do + Tasks.delete_pending_tasks_for(source, "FastIndexingWorker", include_executing: true) + FastIndexingWorker.kickoff_with_task(source) + end + @doc """ Fetches new media IDs for a source from YT's API or RSS, indexes them, and kicks off downloading tasks for any pending media items. See comments in `FastIndexingWorker` for more info on the @@ -26,7 +41,11 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpers do Returns [%MediaItem{}] where each item is a new media item that was created _but not necessarily downloaded_. """ - def kickoff_download_tasks_from_youtube_rss_feed(%Source{} = source) do + def index_and_kickoff_downloads(%Source{} = source) do + # The media_profile is needed to determine the quality options to _then_ determine a more + # accurate predicted filepath + source = Repo.preload(source, [:media_profile]) + {:ok, media_ids} = get_recent_media_ids(source) existing_media_items = list_media_items_by_media_id_for(source, media_ids) new_media_ids = media_ids -- Enum.map(existing_media_items, & &1.media_id) @@ -35,6 +54,7 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpers do Enum.map(new_media_ids, fn media_id -> case create_media_item_from_media_id(source, media_id) do {:ok, media_item} -> + DownloadingHelpers.kickoff_download_if_pending(media_item, priority: 0) media_item err -> @@ -43,7 +63,9 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpers do end end) - DownloadingHelpers.enqueue_pending_download_tasks(source) + # Pick up any stragglers. Intentionally has a lower priority than the per-media item + # kickoff above + DownloadingHelpers.enqueue_pending_download_tasks(source, priority: 1) Enum.filter(maybe_new_media_items, & &1) end @@ -67,8 +89,16 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpers do defp create_media_item_from_media_id(source, media_id) do url = "https://www.youtube.com/watch?v=#{media_id}" + # This is set to :metadata instead of :indexing since this happens _after_ the + # actual indexing process. In reality, slow indexing is the only thing that + # should be using :indexing. + should_use_cookies = Sources.use_cookies?(source, :metadata) - case YtDlpMedia.get_media_attributes(url) do + command_opts = + [output: DownloadOptionBuilder.build_output_path_for(source)] ++ + DownloadOptionBuilder.build_quality_options_for(source) + + case YtDlpMedia.get_media_attributes(url, command_opts, use_cookies: should_use_cookies) do {:ok, media_attrs} -> Media.create_media_item_from_backend_attrs(source, media_attrs) diff --git a/lib/pinchflat/fast_indexing/fast_indexing_worker.ex b/lib/pinchflat/fast_indexing/fast_indexing_worker.ex index 368da17..ed83bf3 100644 --- a/lib/pinchflat/fast_indexing/fast_indexing_worker.ex +++ b/lib/pinchflat/fast_indexing/fast_indexing_worker.ex @@ -38,8 +38,8 @@ defmodule Pinchflat.FastIndexing.FastIndexingWorker do Order of operations: 1. FastIndexingWorker (this module) periodically checks the YouTube RSS feed for new media. - with `FastIndexingHelpers.kickoff_download_tasks_from_youtube_rss_feed` - 2. If the above `kickoff_download_tasks_from_youtube_rss_feed` finds new media items in the RSS feed, + with `FastIndexingHelpers.index_and_kickoff_downloads` + 2. If the above `index_and_kickoff_downloads` finds new media items in the RSS feed, it indexes them with a yt-dlp call to create the media item records then kicks off downloading tasks (MediaDownloadWorker) for any new media items _that should be downloaded_. 3. Once downloads are kicked off, this worker sends a notification to the apprise server if applicable @@ -67,7 +67,7 @@ defmodule Pinchflat.FastIndexing.FastIndexingWorker do new_media_items = source - |> FastIndexingHelpers.kickoff_download_tasks_from_youtube_rss_feed() + |> FastIndexingHelpers.index_and_kickoff_downloads() |> Enum.filter(&Media.pending_download?(&1)) if source.download_media do diff --git a/lib/pinchflat/fast_indexing/youtube_api.ex b/lib/pinchflat/fast_indexing/youtube_api.ex index 95e6b39..04c7326 100644 --- a/lib/pinchflat/fast_indexing/youtube_api.ex +++ b/lib/pinchflat/fast_indexing/youtube_api.ex @@ -12,6 +12,8 @@ defmodule Pinchflat.FastIndexing.YoutubeApi do @behaviour YoutubeBehaviour + @agent_name {:global, __MODULE__.KeyIndex} + @doc """ Determines if the YouTube API is enabled for fast indexing by checking if the user has an API key set @@ -19,7 +21,7 @@ defmodule Pinchflat.FastIndexing.YoutubeApi do Returns boolean() """ @impl YoutubeBehaviour - def enabled?(), do: is_binary(api_key()) + def enabled?, do: Enum.any?(api_keys()) @doc """ Fetches the recent media IDs from the YouTube API for a given source. @@ -74,8 +76,45 @@ defmodule Pinchflat.FastIndexing.YoutubeApi do |> FunctionUtils.wrap_ok() end - defp api_key do - Settings.get!(:youtube_api_key) + defp api_keys do + case Settings.get!(:youtube_api_key) do + nil -> + [] + + keys -> + keys + |> String.split(",") + |> Enum.map(&String.trim/1) + |> Enum.reject(&(&1 == "")) + end + end + + defp get_or_start_api_key_agent do + case Agent.start(fn -> 0 end, name: @agent_name) do + {:ok, pid} -> pid + {:error, {:already_started, pid}} -> pid + end + end + + # Gets the next API key in round-robin fashion + defp next_api_key do + keys = api_keys() + + case keys do + [] -> + nil + + keys -> + pid = get_or_start_api_key_agent() + + current_index = + Agent.get_and_update(pid, fn current -> + {current, rem(current + 1, length(keys))} + end) + + Logger.debug("Using YouTube API key: #{Enum.at(keys, current_index)}") + Enum.at(keys, current_index) + end end defp construct_api_endpoint(playlist_id) do @@ -83,7 +122,7 @@ defmodule Pinchflat.FastIndexing.YoutubeApi do property_type = "contentDetails" max_results = 50 - "#{api_base}?part=#{property_type}&maxResults=#{max_results}&playlistId=#{playlist_id}&key=#{api_key()}" + "#{api_base}?part=#{property_type}&maxResults=#{max_results}&playlistId=#{playlist_id}&key=#{next_api_key()}" end defp http_client do diff --git a/lib/pinchflat/lifecycle/user_scripts/command_runner.ex b/lib/pinchflat/lifecycle/user_scripts/command_runner.ex index 54aa38c..9a77ea5 100644 --- a/lib/pinchflat/lifecycle/user_scripts/command_runner.ex +++ b/lib/pinchflat/lifecycle/user_scripts/command_runner.ex @@ -12,6 +12,7 @@ defmodule Pinchflat.Lifecycle.UserScripts.CommandRunner do @behaviour UserScriptCommandRunner @event_types [ + :app_init, :media_pre_download, :media_downloaded, :media_deleted diff --git a/lib/pinchflat/media/file_syncing.ex b/lib/pinchflat/media/file_syncing.ex new file mode 100644 index 0000000..7fb5b65 --- /dev/null +++ b/lib/pinchflat/media/file_syncing.ex @@ -0,0 +1,93 @@ +defmodule Pinchflat.Media.FileSyncing do + @moduledoc """ + Functions for ensuring file state is accurately reflected in the database. + """ + + alias Pinchflat.Media + alias Pinchflat.Utils.MapUtils + alias Pinchflat.Media.MediaItem + alias Pinchflat.Utils.FilesystemUtils, as: FSUtils + + @doc """ + Deletes files that are no longer needed by a media item. + + This means that if a media item has been updated, the old and new versions + can be passed and any files that are no longer needed will be deleted. + + An example is a video that gets its quality upgraded and its name changes + between original download and re-download. The old file will exist on-disk + with the old name but the database entry will point to the new file. This + function can be used to delete the old file in this case. + + Returns :ok + """ + def delete_outdated_files(old_media_item, new_media_item) do + non_subtitle_keys = MediaItem.filepath_attributes() -- [:subtitle_filepaths] + + old_non_subtitles = Map.take(old_media_item, non_subtitle_keys) + old_subtitles = MapUtils.from_nested_list(old_media_item.subtitle_filepaths) + new_non_subtitles = Map.take(new_media_item, non_subtitle_keys) + new_subtitles = MapUtils.from_nested_list(new_media_item.subtitle_filepaths) + + handle_file_deletion(old_non_subtitles, new_non_subtitles) + handle_file_deletion(old_subtitles, new_subtitles) + + :ok + end + + @doc """ + Nillifies any media item filepaths that don't exist on disk for a list of media items + + returns [%MediaItem{}] + """ + def sync_file_presence_on_disk(media_items) do + Enum.map(media_items, fn media_item -> + new_attributes = sync_media_item_files(media_item) + # Doing this one-by-one instead of batching since this process + # can take time and a batch could let MediaItem state get out of sync + {:ok, updated_media_item} = Media.update_media_item(media_item, new_attributes) + + updated_media_item + end) + end + + defp handle_file_deletion(old_attributes, new_attributes) do + # The logic: + # - A file should only be deleted if it exists and the new file is different + # - The new attributes are the ones we're interested in keeping + # - If the old attributes have a key that doesn't exist in the new attributes, don't touch it. + # This is good for archiving but may be unpopular for other users so this may change. + + Enum.each(new_attributes, fn {key, new_filepath} -> + old_filepath = Map.get(old_attributes, key) + files_have_changed = old_filepath && new_filepath && old_filepath != new_filepath + files_exist_on_disk = files_have_changed && File.exists?(old_filepath) && File.exists?(new_filepath) + + if files_exist_on_disk && !FSUtils.filepaths_reference_same_file?(old_filepath, new_filepath) do + FSUtils.delete_file_and_remove_empty_directories(old_filepath) + end + end) + end + + defp sync_media_item_files(media_item) do + non_subtitle_keys = MediaItem.filepath_attributes() -- [:subtitle_filepaths] + subtitle_keys = MapUtils.from_nested_list(media_item.subtitle_filepaths) + non_subtitles = Map.take(media_item, non_subtitle_keys) + + # This one is checking for the negative (ie: only update if the file doesn't exist) + new_non_subtitle_attrs = + Enum.reduce(non_subtitles, %{}, fn {key, filepath}, acc -> + if filepath && File.exists?(filepath), do: acc, else: Map.put(acc, key, nil) + end) + + # This one is checking for the positive (ie: only update if the file exists) + # This is because subtitles, being an array type in the DB, are most easily updated + # by a full replacement rather than finding the actual diff + new_subtitle_attrs = + Enum.reduce(subtitle_keys, [], fn {key, filepath}, acc -> + if filepath && File.exists?(filepath), do: acc ++ [[key, filepath]], else: acc + end) + + Map.put(new_non_subtitle_attrs, :subtitle_filepaths, new_subtitle_attrs) + end +end diff --git a/lib/pinchflat/media/file_syncing_worker.ex b/lib/pinchflat/media/file_syncing_worker.ex new file mode 100644 index 0000000..fde7e73 --- /dev/null +++ b/lib/pinchflat/media/file_syncing_worker.ex @@ -0,0 +1,38 @@ +defmodule Pinchflat.Media.FileSyncingWorker do + @moduledoc false + + use Oban.Worker, + queue: :local_data, + tags: ["sources", "local_data"] + + alias __MODULE__ + alias Pinchflat.Repo + alias Pinchflat.Tasks + alias Pinchflat.Sources + alias Pinchflat.Media.FileSyncing + + @doc """ + Starts the source file syncing worker. + + Returns {:ok, %Task{}} | {:error, %Ecto.Changeset{}} + """ + def kickoff_with_task(source, opts \\ []) do + %{id: source.id} + |> FileSyncingWorker.new(opts) + |> Tasks.create_job_with_task(source) + end + + @doc """ + Deletes a profile and optionally deletes its files + + Returns :ok + """ + @impl Oban.Worker + def perform(%Oban.Job{args: %{"id" => source_id}}) do + source = Repo.preload(Sources.get_source!(source_id), :media_items) + + FileSyncing.sync_file_presence_on_disk(source.media_items) + + :ok + end +end diff --git a/lib/pinchflat/media/media.ex b/lib/pinchflat/media/media.ex index 013bf09..e1d8d7d 100644 --- a/lib/pinchflat/media/media.ex +++ b/lib/pinchflat/media/media.ex @@ -15,6 +15,9 @@ defmodule Pinchflat.Media do alias Pinchflat.Lifecycle.UserScripts.CommandRunner, as: UserScriptRunner + # Some fields should only be set on insert and not on update. + @fields_to_drop_on_update [:playlist_index] + @doc """ Returns the list of media_items. @@ -131,8 +134,6 @@ defmodule Pinchflat.Media do """ def create_media_item_from_backend_attrs(source, media_attrs_struct) do attrs = Map.merge(%{source_id: source.id}, Map.from_struct(media_attrs_struct)) - # Some fields should only be set on insert and not on update. - fields_to_drop_on_update = [:playlist_index] %MediaItem{} |> MediaItem.changeset(attrs) @@ -140,7 +141,7 @@ defmodule Pinchflat.Media do on_conflict: [ set: attrs - |> Map.drop(fields_to_drop_on_update) + |> Map.drop(@fields_to_drop_on_update) |> Map.to_list() ], conflict_target: [:source_id, :media_id] @@ -153,8 +154,10 @@ defmodule Pinchflat.Media do Returns {:ok, %MediaItem{}} | {:error, %Ecto.Changeset{}} """ def update_media_item(%MediaItem{} = media_item, attrs) do + update_attrs = Map.drop(attrs, @fields_to_drop_on_update) + media_item - |> MediaItem.changeset(attrs) + |> MediaItem.changeset(update_attrs) |> Repo.update() end diff --git a/lib/pinchflat/media/media_item.ex b/lib/pinchflat/media/media_item.ex index f85fcfb..ced8e8c 100644 --- a/lib/pinchflat/media/media_item.ex +++ b/lib/pinchflat/media/media_item.ex @@ -31,6 +31,7 @@ defmodule Pinchflat.Media.MediaItem do :uploaded_at, :upload_date_index, :duration_seconds, + :predicted_media_filepath, # these fields are captured only on download :media_downloaded_at, :media_filepath, @@ -39,6 +40,7 @@ defmodule Pinchflat.Media.MediaItem do :thumbnail_filepath, :metadata_filepath, :nfo_filepath, + :last_error, # These are user or system controlled fields :prevent_download, :prevent_culling, @@ -76,6 +78,7 @@ defmodule Pinchflat.Media.MediaItem do field :duration_seconds, :integer field :playlist_index, :integer, default: 0 + field :predicted_media_filepath, :string field :media_filepath, :string field :media_size_bytes, :integer field :thumbnail_filepath, :string @@ -86,6 +89,7 @@ defmodule Pinchflat.Media.MediaItem do # Will very likely revisit because I can't leave well-enough alone. field :subtitle_filepaths, {:array, {:array, :string}}, default: [] + field :last_error, :string field :prevent_download, :boolean, default: false field :prevent_culling, :boolean, default: false field :culled_at, :utc_datetime @@ -110,6 +114,9 @@ defmodule Pinchflat.Media.MediaItem do |> dynamic_default(:uuid, fn _ -> Ecto.UUID.generate() end) |> update_upload_date_index() |> validate_required(@required_fields) + # Validate that the title does NOT start with "youtube video #" since that indicates a restriction by YouTube. + # See issue #549 for more information. + |> validate_format(:title, ~r/^(?!youtube video #)/) |> unique_constraint([:media_id, :source_id]) end diff --git a/lib/pinchflat/media/media_query.ex b/lib/pinchflat/media/media_query.ex index e038683..840e82c 100644 --- a/lib/pinchflat/media/media_query.ex +++ b/lib/pinchflat/media/media_query.ex @@ -75,12 +75,20 @@ defmodule Pinchflat.Media.MediaQuery do ) end + def meets_min_and_max_duration do + dynamic( + [mi, source], + (is_nil(source.min_duration_seconds) or fragment("duration_seconds >= ?", source.min_duration_seconds)) and + (is_nil(source.max_duration_seconds) or fragment("duration_seconds <= ?", source.max_duration_seconds)) + ) + end + def past_retention_period do dynamic( [mi, source], fragment(""" IFNULL(retention_period_days, 0) > 0 AND - DATETIME('now', '-' || retention_period_days || ' day') > media_downloaded_at + DATETIME(media_downloaded_at, '+' || retention_period_days || ' day') < DATETIME('now') """) ) end @@ -92,8 +100,8 @@ defmodule Pinchflat.Media.MediaQuery do # downloaded_at minus the redownload_delay_days is before the upload date fragment(""" IFNULL(redownload_delay_days, 0) > 0 AND - DATETIME('now', '-' || redownload_delay_days || ' day') > uploaded_at AND - DATETIME(media_downloaded_at, '-' || redownload_delay_days || ' day') < uploaded_at + DATE('now', '-' || redownload_delay_days || ' day') > DATE(uploaded_at) AND + DATE(media_downloaded_at, '-' || redownload_delay_days || ' day') < DATE(uploaded_at) """) ) end @@ -123,7 +131,8 @@ defmodule Pinchflat.Media.MediaQuery do not (^download_prevented()) and ^upload_date_after_source_cutoff() and ^format_matching_profile_preference() and - ^matches_source_title_regex() + ^matches_source_title_regex() and + ^meets_min_and_max_duration() ) end diff --git a/lib/pinchflat/metadata/metadata_file_helpers.ex b/lib/pinchflat/metadata/metadata_file_helpers.ex index b728f47..842f6ef 100644 --- a/lib/pinchflat/metadata/metadata_file_helpers.ex +++ b/lib/pinchflat/metadata/metadata_file_helpers.ex @@ -9,6 +9,7 @@ defmodule Pinchflat.Metadata.MetadataFileHelpers do needed """ + alias Pinchflat.Sources alias Pinchflat.Utils.FilesystemUtils alias Pinchflat.YtDlp.Media, as: YtDlpMedia @@ -62,11 +63,13 @@ defmodule Pinchflat.Metadata.MetadataFileHelpers do Returns binary() | nil """ - def download_and_store_thumbnail_for(database_record) do - yt_dlp_filepath = generate_filepath_for(database_record, "thumbnail.%(ext)s") - real_filepath = generate_filepath_for(database_record, "thumbnail.jpg") + def download_and_store_thumbnail_for(media_item_with_preloads) do + yt_dlp_filepath = generate_filepath_for(media_item_with_preloads, "thumbnail.%(ext)s") + real_filepath = generate_filepath_for(media_item_with_preloads, "thumbnail.jpg") + command_opts = [output: yt_dlp_filepath] + addl_opts = [use_cookies: Sources.use_cookies?(media_item_with_preloads.source, :metadata)] - case YtDlpMedia.download_thumbnail(database_record.original_url, output: yt_dlp_filepath) do + case YtDlpMedia.download_thumbnail(media_item_with_preloads.original_url, command_opts, addl_opts) do {:ok, _} -> real_filepath _ -> nil end diff --git a/lib/pinchflat/metadata/source_metadata_storage_worker.ex b/lib/pinchflat/metadata/source_metadata_storage_worker.ex index 8ccfdf9..feb793b 100644 --- a/lib/pinchflat/metadata/source_metadata_storage_worker.ex +++ b/lib/pinchflat/metadata/source_metadata_storage_worker.ex @@ -92,7 +92,9 @@ defmodule Pinchflat.Metadata.SourceMetadataStorageWorker do defp determine_series_directory(source) do output_path = DownloadOptionBuilder.build_output_path_for(source) - {:ok, %{filepath: filepath}} = MediaCollection.get_source_details(source.original_url, output: output_path) + runner_opts = [output: output_path] + addl_opts = [use_cookies: Sources.use_cookies?(source, :metadata)] + {:ok, %{filepath: filepath}} = MediaCollection.get_source_details(source.original_url, runner_opts, addl_opts) case MetadataFileHelpers.series_directory_from_media_filepath(filepath) do {:ok, series_directory} -> series_directory @@ -111,6 +113,7 @@ defmodule Pinchflat.Metadata.SourceMetadataStorageWorker do defp fetch_metadata_for_source(source) do tmp_output_path = "#{tmp_directory()}/#{StringUtils.random_string(16)}/source_image.%(ext)S" base_opts = [convert_thumbnails: "jpg", output: tmp_output_path] + should_use_cookies = Sources.use_cookies?(source, :metadata) opts = if source.collection_type == :channel do @@ -119,7 +122,7 @@ defmodule Pinchflat.Metadata.SourceMetadataStorageWorker do base_opts ++ [:write_thumbnail, playlist_items: 1] end - MediaCollection.get_source_metadata(source.original_url, opts) + MediaCollection.get_source_metadata(source.original_url, opts, use_cookies: should_use_cookies) end defp tmp_directory do diff --git a/lib/pinchflat/podcasts/opml_feed_builder.ex b/lib/pinchflat/podcasts/opml_feed_builder.ex new file mode 100644 index 0000000..c764a66 --- /dev/null +++ b/lib/pinchflat/podcasts/opml_feed_builder.ex @@ -0,0 +1,40 @@ +defmodule Pinchflat.Podcasts.OpmlFeedBuilder do + @moduledoc """ + Methods for building an OPML feed for a list of sources. + """ + + import Pinchflat.Utils.XmlUtils, only: [safe: 1] + + alias PinchflatWeb.Router.Helpers, as: Routes + + @doc """ + Builds an OPML feed for a given list of sources. + + Returns an XML document as a string. + """ + def build(url_base, sources) do + sources_xml = + Enum.map( + sources, + &""" + + """ + ) + + """ + + + + All Sources + + + #{Enum.join(sources_xml, "\n")} + + + """ + end + + defp source_route(url_base, source) do + Path.join(url_base, "#{Routes.podcast_path(PinchflatWeb.Endpoint, :rss_feed, source.uuid)}.xml") + end +end diff --git a/lib/pinchflat/podcasts/podcast_helpers.ex b/lib/pinchflat/podcasts/podcast_helpers.ex index 041107c..30ba17a 100644 --- a/lib/pinchflat/podcasts/podcast_helpers.ex +++ b/lib/pinchflat/podcasts/podcast_helpers.ex @@ -5,11 +5,25 @@ defmodule Pinchflat.Podcasts.PodcastHelpers do """ use Pinchflat.Media.MediaQuery + use Pinchflat.Sources.SourcesQuery alias Pinchflat.Repo alias Pinchflat.Metadata.MediaMetadata alias Pinchflat.Metadata.SourceMetadata + @doc """ + Returns a list of sources that are not marked for deletion. + + Returns: [%Source{}] + """ + def opml_sources() do + SourcesQuery.new() + |> select([s], %{custom_name: s.custom_name, uuid: s.uuid}) + |> where([s], is_nil(s.marked_for_deletion_at)) + |> order_by(asc: :custom_name) + |> Repo.all() + end + @doc """ Returns a list of media items that have been downloaded to disk and have been proven to still exist there. diff --git a/lib/pinchflat/profiles/media_profile.ex b/lib/pinchflat/profiles/media_profile.ex index da86df3..90c93d6 100644 --- a/lib/pinchflat/profiles/media_profile.ex +++ b/lib/pinchflat/profiles/media_profile.ex @@ -26,7 +26,9 @@ defmodule Pinchflat.Profiles.MediaProfile do sponsorblock_categories shorts_behaviour livestream_behaviour + audio_track preferred_resolution + media_container redownload_delay_days marked_for_deletion_at )a @@ -53,7 +55,7 @@ defmodule Pinchflat.Profiles.MediaProfile do field :embed_metadata, :boolean, default: false field :download_nfo, :boolean, default: false - field :sponsorblock_behaviour, Ecto.Enum, values: [:disabled, :remove], default: :disabled + field :sponsorblock_behaviour, Ecto.Enum, values: [:disabled, :mark, :remove], default: :disabled field :sponsorblock_categories, {:array, :string}, default: [] # NOTE: these do NOT speed up indexing - the indexer still has to go # through the entire collection to determine if a media is a short or @@ -64,7 +66,9 @@ defmodule Pinchflat.Profiles.MediaProfile do # See `build_format_clauses` in the Media context for more. field :shorts_behaviour, Ecto.Enum, values: ~w(include exclude only)a, default: :include field :livestream_behaviour, Ecto.Enum, values: ~w(include exclude only)a, default: :include - field :preferred_resolution, Ecto.Enum, values: ~w(4320p 2160p 1080p 720p 480p 360p audio)a, default: :"1080p" + field :audio_track, :string + field :preferred_resolution, Ecto.Enum, values: ~w(4320p 2160p 1440p 1080p 720p 480p 360p audio)a, default: :"1080p" + field :media_container, :string, default: nil field :marked_for_deletion_at, :utc_datetime diff --git a/lib/pinchflat/profiles/media_profile_deletion_worker.ex b/lib/pinchflat/profiles/media_profile_deletion_worker.ex index 529474d..230a085 100644 --- a/lib/pinchflat/profiles/media_profile_deletion_worker.ex +++ b/lib/pinchflat/profiles/media_profile_deletion_worker.ex @@ -14,7 +14,7 @@ defmodule Pinchflat.Profiles.MediaProfileDeletionWorker do Starts the profile deletion worker. Does not attach it to a task like `kickoff_with_task/2` since deletion also cancels all tasks for the profile - Returns {:ok, %Task{}} | {:error, %Ecto.Changeset{}} + Returns {:ok, %Oban.Job{}} | {:error, %Ecto.Changeset{}} """ def kickoff(profile, job_args \\ %{}, job_opts \\ []) do %{id: profile.id} diff --git a/lib/pinchflat/profiles/profiles_query.ex b/lib/pinchflat/profiles/profiles_query.ex new file mode 100644 index 0000000..caa1315 --- /dev/null +++ b/lib/pinchflat/profiles/profiles_query.ex @@ -0,0 +1,29 @@ +defmodule Pinchflat.Profiles.ProfilesQuery do + @moduledoc """ + Query helpers for the Profiles context. + + These methods are made to be one-ish liners used + to compose queries. Each method should strive to do + _one_ thing. These don't need to be tested as + they are just building blocks for other functionality + which, itself, will be tested. + """ + import Ecto.Query, warn: false + + alias Pinchflat.Profiles.MediaProfile + + # This allows the module to be aliased and query methods to be used + # all in one go + # usage: use Pinchflat.Profiles.ProfilesQuery + defmacro __using__(_opts) do + quote do + import Ecto.Query, warn: false + + alias unquote(__MODULE__) + end + end + + def new do + MediaProfile + end +end diff --git a/lib/pinchflat/prom_ex.ex b/lib/pinchflat/prom_ex.ex new file mode 100644 index 0000000..a46347d --- /dev/null +++ b/lib/pinchflat/prom_ex.ex @@ -0,0 +1,40 @@ +defmodule Pinchflat.PromEx do + @moduledoc """ + Configuration for the PromEx library which provides Prometheus metrics + """ + + use PromEx, otp_app: :pinchflat + + alias PromEx.Plugins + + @impl true + def plugins do + [ + Plugins.Application, + Plugins.Beam, + {Plugins.Phoenix, router: PinchflatWeb.Router, endpoint: PinchflatWeb.Endpoint}, + Plugins.Ecto, + Plugins.Oban, + Plugins.PhoenixLiveView + ] + end + + @impl true + def dashboard_assigns do + [ + default_selected_interval: "30s" + ] + end + + @impl true + def dashboards do + [ + {:prom_ex, "application.json"}, + {:prom_ex, "beam.json"}, + {:prom_ex, "phoenix.json"}, + {:prom_ex, "ecto.json"}, + {:prom_ex, "oban.json"}, + {:prom_ex, "phoenix_live_view.json"} + ] + end +end diff --git a/lib/pinchflat/release.ex b/lib/pinchflat/release.ex index 4547c0a..c6060c3 100644 --- a/lib/pinchflat/release.ex +++ b/lib/pinchflat/release.ex @@ -29,6 +29,8 @@ defmodule Pinchflat.Release do [ "/config", "/downloads", + "/etc/yt-dlp", + "/etc/yt-dlp/plugins", Application.get_env(:pinchflat, :media_directory), Application.get_env(:pinchflat, :tmpfile_directory), Application.get_env(:pinchflat, :extras_directory), diff --git a/lib/pinchflat/settings/setting.ex b/lib/pinchflat/settings/setting.ex index d449ca0..f2a6b0a 100644 --- a/lib/pinchflat/settings/setting.ex +++ b/lib/pinchflat/settings/setting.ex @@ -14,15 +14,19 @@ defmodule Pinchflat.Settings.Setting do :apprise_server, :video_codec_preference, :audio_codec_preference, - :youtube_api_key + :youtube_api_key, + :extractor_sleep_interval_seconds, + :download_throughput_limit, + :restrict_filenames ] - @required_fields ~w( - onboarding - pro_enabled - video_codec_preference - audio_codec_preference - )a + @required_fields [ + :onboarding, + :pro_enabled, + :video_codec_preference, + :audio_codec_preference, + :extractor_sleep_interval_seconds + ] schema "settings" do field :onboarding, :boolean, default: true @@ -31,6 +35,11 @@ defmodule Pinchflat.Settings.Setting do field :apprise_version, :string field :apprise_server, :string field :youtube_api_key, :string + field :route_token, :string + field :extractor_sleep_interval_seconds, :integer, default: 0 + # This is a string because it accepts values like "100K" or "4.2M" + field :download_throughput_limit, :string + field :restrict_filenames, :boolean, default: false field :video_codec_preference, :string field :audio_codec_preference, :string @@ -41,5 +50,6 @@ defmodule Pinchflat.Settings.Setting do setting |> cast(attrs, @allowed_fields) |> validate_required(@required_fields) + |> validate_number(:extractor_sleep_interval_seconds, greater_than_or_equal_to: 0) end end diff --git a/lib/pinchflat/slow_indexing/file_follower_server.ex b/lib/pinchflat/slow_indexing/file_follower_server.ex index 91c514d..655a6da 100644 --- a/lib/pinchflat/slow_indexing/file_follower_server.ex +++ b/lib/pinchflat/slow_indexing/file_follower_server.ex @@ -106,7 +106,7 @@ defmodule Pinchflat.SlowIndexing.FileFollowerServer do {:noreply, %{state | last_activity: DateTime.utc_now()}} :eof -> - Logger.debug("EOF reached, waiting before trying to read new lines") + Logger.debug("Current batch of media processed. Will check again in #{@poll_interval_ms}ms") Process.send_after(self(), :read_new_lines, @poll_interval_ms) {:noreply, state} diff --git a/lib/pinchflat/slow_indexing/media_collection_indexing_worker.ex b/lib/pinchflat/slow_indexing/media_collection_indexing_worker.ex index 5dbac3e..ae555ff 100644 --- a/lib/pinchflat/slow_indexing/media_collection_indexing_worker.ex +++ b/lib/pinchflat/slow_indexing/media_collection_indexing_worker.ex @@ -79,21 +79,21 @@ defmodule Pinchflat.SlowIndexing.MediaCollectionIndexingWorker do case {source.index_frequency_minutes, source.last_indexed_at} do {index_freq, _} when index_freq > 0 -> # If the indexing is on a schedule simply run indexing and reschedule - perform_indexing_and_notification(source) + perform_indexing_and_notification(source, was_forced: args["force"]) maybe_enqueue_fast_indexing_task(source) reschedule_indexing(source) {_, nil} -> # If the source has never been indexed, index it once # even if it's not meant to reschedule - perform_indexing_and_notification(source) + perform_indexing_and_notification(source, was_forced: args["force"]) :ok _ -> # If the source HAS been indexed and is not meant to reschedule, # perform a no-op (unless forced) if args["force"] do - perform_indexing_and_notification(source) + perform_indexing_and_notification(source, was_forced: true) end :ok @@ -103,11 +103,11 @@ defmodule Pinchflat.SlowIndexing.MediaCollectionIndexingWorker do Ecto.StaleEntryError -> Logger.info("#{__MODULE__} discarded: source #{source_id} stale") end - defp perform_indexing_and_notification(source) do + defp perform_indexing_and_notification(source, indexing_opts) do apprise_server = Settings.get!(:apprise_server) SourceNotifications.wrap_new_media_notification(apprise_server, source, fn -> - SlowIndexingHelpers.index_and_enqueue_download_for_media_items(source) + SlowIndexingHelpers.index_and_enqueue_download_for_media_items(source, indexing_opts) end) end diff --git a/lib/pinchflat/slow_indexing/slow_indexing_helpers.ex b/lib/pinchflat/slow_indexing/slow_indexing_helpers.ex index 2434b92..8721b21 100644 --- a/lib/pinchflat/slow_indexing/slow_indexing_helpers.ex +++ b/lib/pinchflat/slow_indexing/slow_indexing_helpers.ex @@ -5,6 +5,8 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do Many of these methods are made to be kickoff or be consumed by workers. """ + use Pinchflat.Media.MediaQuery + require Logger alias Pinchflat.Repo @@ -14,30 +16,52 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do alias Pinchflat.Sources.Source alias Pinchflat.Media.MediaItem alias Pinchflat.YtDlp.MediaCollection + alias Pinchflat.Utils.FilesystemUtils alias Pinchflat.Downloading.DownloadingHelpers alias Pinchflat.SlowIndexing.FileFollowerServer + alias Pinchflat.Downloading.DownloadOptionBuilder alias Pinchflat.SlowIndexing.MediaCollectionIndexingWorker alias Pinchflat.YtDlp.Media, as: YtDlpMedia @doc """ - Starts tasks for indexing a source's media regardless of the source's indexing - frequency. It's assumed the caller will check for indexing frequency. + Kills old indexing tasks and starts a new task to index the media collection. - Returns {:ok, %Task{}}. + The job is delayed based on the source's `index_frequency_minutes` setting unless + one of the following is true: + - The `force` option is set to true + - The source has never been indexed before + - The source has been indexed before, but the last indexing job was more than + `index_frequency_minutes` ago + + Returns {:ok, %Task{}} """ def kickoff_indexing_task(%Source{} = source, job_args \\ %{}, job_opts \\ []) do - Tasks.delete_pending_tasks_for(source, "FastIndexingWorker") - Tasks.delete_pending_tasks_for(source, "MediaCollectionIndexingWorker") + job_offset_seconds = if job_args[:force], do: 0, else: calculate_job_offset_seconds(source) - MediaCollectionIndexingWorker.kickoff_with_task(source, job_args, job_opts) + Tasks.delete_pending_tasks_for(source, "MediaCollectionIndexingWorker", include_executing: true) + + MediaCollectionIndexingWorker.kickoff_with_task(source, job_args, job_opts ++ [schedule_in: job_offset_seconds]) + end + + @doc """ + A helper method to delete all indexing-related tasks for a source. + Optionally, you can include executing tasks in the deletion process. + + Returns :ok + """ + def delete_indexing_tasks(%Source{} = source, opts \\ []) do + include_executing = Keyword.get(opts, :include_executing, false) + + Tasks.delete_pending_tasks_for(source, "FastIndexingWorker", include_executing: include_executing) + Tasks.delete_pending_tasks_for(source, "MediaCollectionIndexingWorker", include_executing: include_executing) end @doc """ Given a media source, creates (indexes) the media by creating media_items for each media ID in the source. Afterward, kicks off a download task for each pending media - item belonging to the source. You can't tell me the method name isn't descriptive! - Returns a list of media items or changesets (if the media item couldn't be created). + item belonging to the source. Returns a list of media items or changesets + (if the media item couldn't be created). Indexing is slow and usually returns a list of all media data at once for record creation. To help with this, we use a file follower to watch the file that yt-dlp writes to @@ -45,20 +69,33 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do clarity to the user experience. This has a few things to be aware of which are documented below in the file watcher setup method. + Additionally, in the case of a repeat index we create a download archive file that + contains some media IDs that we've indexed in the past. Note that this archive doesn't + contain the most recent IDs but rather a subset of IDs that are offset by some amount. + Practically, this means that we'll re-index a small handful of media that we've recently + indexed, but this is a good thing since it'll let us pick up on any recent changes to the + most recent media items. + + We don't create a download archive for playlists (only channels), nor do we create one if + the indexing was forced by the user. + NOTE: downloads are only enqueued if the source is set to download media. Downloads are also enqueued for ALL pending media items, not just the ones that were indexed in this job run. This should ensure that any stragglers are caught if, for some reason, they weren't enqueued or somehow got de-queued. - Since indexing returns all media data EVERY TIME, we that that opportunity to update - indexing metadata for media items that have already been created. + Available options: + - `was_forced`: Whether the indexing was forced by the user Returns [%MediaItem{} | %Ecto.Changeset{}] """ - def index_and_enqueue_download_for_media_items(%Source{} = source) do + def index_and_enqueue_download_for_media_items(%Source{} = source, opts \\ []) do + # The media_profile is needed to determine the quality options to _then_ determine a more + # accurate predicted filepath + source = Repo.preload(source, [:media_profile]) # See the method definition below for more info on how file watchers work # (important reading if you're not familiar with it) - {:ok, media_attributes} = setup_file_watcher_and_kickoff_indexing(source) + {:ok, media_attributes} = setup_file_watcher_and_kickoff_indexing(source, opts) # Reload because the source may have been updated during the (long-running) indexing process # and important settings like `download_media` may have changed. source = Repo.reload!(source) @@ -90,11 +127,20 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do # It attempts a graceful shutdown of the file follower after the indexing is done, # but the FileFollowerServer will also stop itself if it doesn't see any activity # for a sufficiently long time. - defp setup_file_watcher_and_kickoff_indexing(source) do + defp setup_file_watcher_and_kickoff_indexing(source, opts) do + was_forced = Keyword.get(opts, :was_forced, false) {:ok, pid} = FileFollowerServer.start_link() handler = fn filepath -> setup_file_follower_watcher(pid, filepath, source) end - result = MediaCollection.get_media_attributes_for_collection(source.original_url, file_listener_handler: handler) + should_use_cookies = Sources.use_cookies?(source, :indexing) + + command_opts = + [output: DownloadOptionBuilder.build_output_path_for(source)] ++ + DownloadOptionBuilder.build_quality_options_for(source) ++ + build_download_archive_options(source, was_forced) + + runner_opts = [file_listener_handler: handler, use_cookies: should_use_cookies] + result = MediaCollection.get_media_attributes_for_collection(source.original_url, command_opts, runner_opts) FileFollowerServer.stop(pid) @@ -131,4 +177,68 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do changeset end end + + # Find the difference between the current time and the last time the source was indexed + defp calculate_job_offset_seconds(%Source{last_indexed_at: nil}), do: 0 + + defp calculate_job_offset_seconds(source) do + offset_seconds = DateTime.diff(DateTime.utc_now(), source.last_indexed_at, :second) + index_frequency_seconds = source.index_frequency_minutes * 60 + + max(0, index_frequency_seconds - offset_seconds) + end + + # The download archive file works in tandem with --break-on-existing to stop + # yt-dlp once we've hit media items we've already indexed. But we generate + # this list with a bit of an offset so we do intentionally re-scan some media + # items to pick up any recent changes (see `get_media_items_for_download_archive`). + # + # From there, we format the media IDs in the way that yt-dlp expects (ie: " ") + # and return the filepath to the caller. + defp create_download_archive_file(source) do + tmpfile = FilesystemUtils.generate_metadata_tmpfile(:txt) + + archive_contents = + source + |> get_media_items_for_download_archive() + |> Enum.map_join("\n", fn media_item -> "youtube #{media_item.media_id}" end) + + case File.write(tmpfile, archive_contents) do + :ok -> tmpfile + err -> err + end + end + + # Sorting by `uploaded_at` is important because we want to re-index the most recent + # media items first but there is no guarantee of any correlation between ID and uploaded_at. + # + # The offset is important because we want to re-index some media items that we've + # recently indexed to pick up on any changes. The limit is because we want this mechanism + # to work even if, for example, the video we were using as a stopping point was deleted. + # It's not a perfect system, but it should do well enough. + # + # The chosen limit and offset are arbitary, independent, and vibes-based. Feel free to + # tweak as-needed + defp get_media_items_for_download_archive(source) do + MediaQuery.new() + |> where(^MediaQuery.for_source(source)) + |> order_by(desc: :uploaded_at) + |> limit(50) + |> offset(20) + |> Repo.all() + end + + # The download archive isn't useful for playlists (since those are ordered arbitrarily) + # and we don't want to use it if the indexing was forced by the user. In other words, + # only create an archive for channels that are being indexed as part of their regular + # indexing schedule. The first indexing pass should also not create an archive. + defp build_download_archive_options(%Source{collection_type: :playlist}, _was_forced), do: [] + defp build_download_archive_options(%Source{last_indexed_at: nil}, _was_forced), do: [] + defp build_download_archive_options(_source, true), do: [] + + defp build_download_archive_options(source, _was_forced) do + archive_file = create_download_archive_file(source) + + [:break_on_existing, download_archive: archive_file] + end end diff --git a/lib/pinchflat/sources/source.ex b/lib/pinchflat/sources/source.ex index b6d9154..00b4776 100644 --- a/lib/pinchflat/sources/source.ex +++ b/lib/pinchflat/sources/source.ex @@ -15,6 +15,7 @@ defmodule Pinchflat.Sources.Source do alias Pinchflat.Metadata.SourceMetadata @allowed_fields ~w( + enabled collection_name collection_id collection_type @@ -27,6 +28,7 @@ defmodule Pinchflat.Sources.Source do series_directory index_frequency_minutes fast_index + cookie_behaviour download_media last_indexed_at original_url @@ -36,6 +38,8 @@ defmodule Pinchflat.Sources.Source do media_profile_id output_path_template_override marked_for_deletion_at + min_duration_seconds + max_duration_seconds )a # Expensive API calls are made when a source is inserted/updated so @@ -61,6 +65,7 @@ defmodule Pinchflat.Sources.Source do )a schema "sources" do + field :enabled, :boolean, default: true # This is _not_ used as the primary key or internally in the database # relations. This is only used to prevent an enumeration attack on the streaming # and RSS feed endpoints since those _must_ be public (ie: no basic auth) @@ -73,6 +78,7 @@ defmodule Pinchflat.Sources.Source do field :collection_type, Ecto.Enum, values: [:channel, :playlist] field :index_frequency_minutes, :integer, default: 60 * 24 field :fast_index, :boolean, default: false + field :cookie_behaviour, Ecto.Enum, values: [:disabled, :when_needed, :all_operations], default: :disabled field :download_media, :boolean, default: true field :last_indexed_at, :utc_datetime # Only download media items that were published after this date @@ -82,6 +88,9 @@ defmodule Pinchflat.Sources.Source do field :title_filter_regex, :string field :output_path_template_override, :string + field :min_duration_seconds, :integer + field :max_duration_seconds, :integer + field :series_directory, :string field :nfo_filepath, :string field :poster_filepath, :string @@ -116,6 +125,7 @@ defmodule Pinchflat.Sources.Source do |> dynamic_default(:uuid, fn _ -> Ecto.UUID.generate() end) |> validate_required(required_fields) |> validate_title_regex() + |> validate_min_and_max_durations() |> validate_number(:retention_period_days, greater_than_or_equal_to: 0) # Ensures it ends with `.{{ ext }}` or `.%(ext)s` or similar (with a little wiggle room) |> validate_format(:output_path_template_override, MediaProfile.ext_regex(), message: "must end with .{{ ext }}") @@ -162,6 +172,17 @@ defmodule Pinchflat.Sources.Source do defp validate_title_regex(changeset), do: changeset + defp validate_min_and_max_durations(changeset) do + min_duration = get_change(changeset, :min_duration_seconds) + max_duration = get_change(changeset, :max_duration_seconds) + + case {min_duration, max_duration} do + {min, max} when is_nil(min) or is_nil(max) -> changeset + {min, max} when min >= max -> add_error(changeset, :max_duration_seconds, "must be greater than minumum duration") + _ -> changeset + end + end + defimpl Jason.Encoder, for: Source do def encode(value, opts) do value diff --git a/lib/pinchflat/sources/sources.ex b/lib/pinchflat/sources/sources.ex index 6cd45c1..edd37f1 100644 --- a/lib/pinchflat/sources/sources.ex +++ b/lib/pinchflat/sources/sources.ex @@ -15,8 +15,8 @@ defmodule Pinchflat.Sources do alias Pinchflat.Metadata.SourceMetadata alias Pinchflat.Utils.FilesystemUtils alias Pinchflat.Downloading.DownloadingHelpers - alias Pinchflat.FastIndexing.FastIndexingWorker alias Pinchflat.SlowIndexing.SlowIndexingHelpers + alias Pinchflat.FastIndexing.FastIndexingHelpers alias Pinchflat.Metadata.SourceMetadataStorageWorker @doc """ @@ -32,6 +32,19 @@ defmodule Pinchflat.Sources do source.output_path_template_override || media_profile.output_path_template end + @doc """ + Returns a boolean indicating whether or not cookies should be used for a given operation. + + Returns boolean() + """ + def use_cookies?(source, operation) when operation in [:indexing, :downloading, :metadata, :error_recovery] do + case source.cookie_behaviour do + :disabled -> false + :all_operations -> true + :when_needed -> operation in [:indexing, :error_recovery] + end + end + @doc """ Returns the list of sources. Returns [%Source{}, ...] """ @@ -180,11 +193,22 @@ defmodule Pinchflat.Sources do end defp add_source_details_to_changeset(source, changeset) do - case MediaCollection.get_source_details(changeset.changes.original_url) do + original_url = changeset.changes.original_url + should_use_cookies = Ecto.Changeset.get_field(changeset, :cookie_behaviour) == :all_operations + # Skipping sleep interval since this is UI blocking and we want to keep this as fast as possible + addl_opts = [use_cookies: should_use_cookies, skip_sleep_interval: true] + + case MediaCollection.get_source_details(original_url, [], addl_opts) do {:ok, source_details} -> add_source_details_by_collection_type(source, changeset, source_details) - {:error, runner_error, _status_code} -> + err -> + runner_error = + case err do + {:error, error_msg, _status_code} -> error_msg + {:error, error_msg} -> error_msg + end + Ecto.Changeset.add_error( changeset, :original_url, @@ -247,19 +271,40 @@ defmodule Pinchflat.Sources do end end - # If the source is NOT new (ie: updated) and the download_media flag has changed, + # If the source is new (ie: not persisted), do nothing + defp maybe_handle_media_tasks(%{data: %{__meta__: %{state: state}}}, _source) when state != :loaded do + :ok + end + + # If the source is NOT new (ie: updated), # enqueue or dequeue media download tasks as necessary. defp maybe_handle_media_tasks(changeset, source) do - case {changeset.data, changeset.changes} do - {%{__meta__: %{state: :loaded}}, %{download_media: true}} -> + current_changes = changeset.changes + applied_changes = Ecto.Changeset.apply_changes(changeset) + + # We need both current_changes and applied_changes to determine + # the course of action to take. For example, we only care if a source is supposed + # to be `enabled` or not - we don't care if that information comes from the + # current changes or if that's how it already was in the database. + # Rephrased, we're essentially using it in place of `get_field/2` + case {current_changes, applied_changes} do + {%{download_media: true}, %{enabled: true}} -> DownloadingHelpers.enqueue_pending_download_tasks(source) - {%{__meta__: %{state: :loaded}}, %{download_media: false}} -> + {%{enabled: true}, %{download_media: true}} -> + DownloadingHelpers.enqueue_pending_download_tasks(source) + + {%{download_media: false}, _} -> + DownloadingHelpers.dequeue_pending_download_tasks(source) + + {%{enabled: false}, _} -> DownloadingHelpers.dequeue_pending_download_tasks(source) _ -> - :ok + nil end + + :ok end defp maybe_run_indexing_task(changeset, source) do @@ -268,6 +313,10 @@ defmodule Pinchflat.Sources do %{__meta__: %{state: :built}} -> SlowIndexingHelpers.kickoff_indexing_task(source) + if Ecto.Changeset.get_field(changeset, :fast_index) do + FastIndexingHelpers.kickoff_indexing_task(source) + end + # If the record has been persisted, only run indexing if the # indexing frequency has been changed and is now greater than 0 %{__meta__: %{state: :loaded}} -> @@ -293,13 +342,22 @@ defmodule Pinchflat.Sources do end defp maybe_update_slow_indexing_task(changeset, source) do - case changeset.changes do - %{index_frequency_minutes: mins} when mins > 0 -> + # See comment in `maybe_handle_media_tasks` as to why we need these + current_changes = changeset.changes + applied_changes = Ecto.Changeset.apply_changes(changeset) + + case {current_changes, applied_changes} do + {%{index_frequency_minutes: mins}, %{enabled: true}} when mins > 0 -> SlowIndexingHelpers.kickoff_indexing_task(source) - %{index_frequency_minutes: _} -> - Tasks.delete_pending_tasks_for(source, "FastIndexingWorker") - Tasks.delete_pending_tasks_for(source, "MediaCollectionIndexingWorker") + {%{enabled: true}, %{index_frequency_minutes: mins}} when mins > 0 -> + SlowIndexingHelpers.kickoff_indexing_task(source) + + {%{index_frequency_minutes: _}, _} -> + SlowIndexingHelpers.delete_indexing_tasks(source, include_executing: true) + + {%{enabled: false}, _} -> + SlowIndexingHelpers.delete_indexing_tasks(source, include_executing: true) _ -> :ok @@ -307,13 +365,25 @@ defmodule Pinchflat.Sources do end defp maybe_update_fast_indexing_task(changeset, source) do - case changeset.changes do - %{fast_index: true} -> - Tasks.delete_pending_tasks_for(source, "FastIndexingWorker") - FastIndexingWorker.kickoff_with_task(source) + # See comment in `maybe_handle_media_tasks` as to why we need these + current_changes = changeset.changes + applied_changes = Ecto.Changeset.apply_changes(changeset) - %{fast_index: false} -> - Tasks.delete_pending_tasks_for(source, "FastIndexingWorker") + # This technically could be simplified since `maybe_update_slow_indexing_task` + # has some overlap re: deleting pending tasks, but I'm keeping it separate + # for clarity and explicitness. + case {current_changes, applied_changes} do + {%{fast_index: true}, %{enabled: true}} -> + FastIndexingHelpers.kickoff_indexing_task(source) + + {%{enabled: true}, %{fast_index: true}} -> + FastIndexingHelpers.kickoff_indexing_task(source) + + {%{fast_index: false}, _} -> + Tasks.delete_pending_tasks_for(source, "FastIndexingWorker", include_executing: true) + + {%{enabled: false}, _} -> + Tasks.delete_pending_tasks_for(source, "FastIndexingWorker", include_executing: true) _ -> :ok diff --git a/lib/pinchflat/tasks/tasks.ex b/lib/pinchflat/tasks/tasks.ex index 7b94e3c..2dfef0a 100644 --- a/lib/pinchflat/tasks/tasks.ex +++ b/lib/pinchflat/tasks/tasks.ex @@ -53,20 +53,6 @@ defmodule Pinchflat.Tasks do ) end - @doc """ - Returns the list of pending tasks for a given record type and ID. Optionally allows you to specify - which worker to include. - - Returns [%Task{}, ...] - """ - def list_pending_tasks_for(record, worker_name \\ nil) do - list_tasks_for( - record, - worker_name, - [:available, :scheduled, :retryable] - ) - end - @doc """ Gets a single task. @@ -127,13 +113,13 @@ defmodule Pinchflat.Tasks do @doc """ Deletes all tasks attached to a given record, cancelling any attached jobs. - Optionally allows you to specify which worker to include. + Optionally allows you to specify which worker and job states to include. Returns :ok """ - def delete_tasks_for(record, worker_name \\ nil) do + def delete_tasks_for(record, worker_name \\ nil, job_states \\ Oban.Job.states()) do record - |> list_tasks_for(worker_name) + |> list_tasks_for(worker_name, job_states) |> Enum.each(&delete_task/1) end @@ -143,10 +129,12 @@ defmodule Pinchflat.Tasks do Returns :ok """ - def delete_pending_tasks_for(record, worker_name \\ nil) do - record - |> list_pending_tasks_for(worker_name) - |> Enum.each(&delete_task/1) + def delete_pending_tasks_for(record, worker_name \\ nil, opts \\ []) do + include_executing = Keyword.get(opts, :include_executing, false) + base_job_states = [:available, :scheduled, :retryable] + job_states = if include_executing, do: base_job_states ++ [:executing], else: base_job_states + + delete_tasks_for(record, worker_name, job_states) end @doc """ diff --git a/lib/pinchflat/utils/filesystem_utils.ex b/lib/pinchflat/utils/filesystem_utils.ex index 8652192..e7acb86 100644 --- a/lib/pinchflat/utils/filesystem_utils.ex +++ b/lib/pinchflat/utils/filesystem_utils.ex @@ -20,6 +20,24 @@ defmodule Pinchflat.Utils.FilesystemUtils do end end + @doc """ + Checks if two filepaths reference the same file. + + Useful if you have a relative and absolute filepath and want to be sure they're the same file. + Also works with symlinks. + + Returns boolean() + """ + def filepaths_reference_same_file?(filepath_1, filepath_2) do + {:ok, stat_1} = File.stat(filepath_1) + {:ok, stat_2} = File.stat(filepath_2) + + identifier_1 = "#{stat_1.major_device}:#{stat_1.minor_device}:#{stat_1.inode}" + identifier_2 = "#{stat_2.major_device}:#{stat_2.minor_device}:#{stat_2.inode}" + + identifier_1 == identifier_2 + end + @doc """ Generates a temporary file and returns its path. The file is empty and has the given type. Generates all the directories in the path if they don't exist. @@ -27,8 +45,20 @@ defmodule Pinchflat.Utils.FilesystemUtils do Returns binary() """ def generate_metadata_tmpfile(type) do + filename = StringUtils.random_string(64) + # This "namespacing" is more to help with development since things get + # weird in my editor when there are thousands of files in a single directory + first_two = String.slice(filename, 0..1) + second_two = String.slice(filename, 2..3) tmpfile_directory = Application.get_env(:pinchflat, :tmpfile_directory) - filepath = Path.join([tmpfile_directory, "#{StringUtils.random_string(64)}.#{type}"]) + + filepath = + Path.join([ + tmpfile_directory, + first_two, + second_two, + "#{filename}.#{type}" + ]) :ok = write_p!(filepath, "") diff --git a/lib/pinchflat/utils/map_utils.ex b/lib/pinchflat/utils/map_utils.ex new file mode 100644 index 0000000..41f03a1 --- /dev/null +++ b/lib/pinchflat/utils/map_utils.ex @@ -0,0 +1,17 @@ +defmodule Pinchflat.Utils.MapUtils do + @moduledoc """ + Utility methods for working with maps + """ + + @doc """ + Converts a nested list of 2-element tuples or lists into a map. + + Returns map() + """ + def from_nested_list(list) do + Enum.reduce(list, %{}, fn + [key, value], acc -> Map.put(acc, key, value) + {key, value}, acc -> Map.put(acc, key, value) + end) + end +end diff --git a/lib/pinchflat/utils/number_utils.ex b/lib/pinchflat/utils/number_utils.ex index b7128f8..d86002b 100644 --- a/lib/pinchflat/utils/number_utils.ex +++ b/lib/pinchflat/utils/number_utils.ex @@ -36,4 +36,18 @@ defmodule Pinchflat.Utils.NumberUtils do end end) end + + @doc """ + Adds jitter to a number based on a percentage. Returns 0 if the number is less than or equal to 0. + + Returns integer() + """ + def add_jitter(num, jitter_percentage \\ 0.5) + def add_jitter(num, _jitter_percentage) when num <= 0, do: 0 + + def add_jitter(num, jitter_percentage) do + jitter = :rand.uniform(round(num * jitter_percentage)) + + round(num + jitter) + end end diff --git a/lib/pinchflat/utils/string_utils.ex b/lib/pinchflat/utils/string_utils.ex index d96d6c1..66efc5e 100644 --- a/lib/pinchflat/utils/string_utils.ex +++ b/lib/pinchflat/utils/string_utils.ex @@ -26,19 +26,22 @@ defmodule Pinchflat.Utils.StringUtils do end @doc """ - Truncates a string to the given length and adds `...` if the string is longer than the given length. - Will break on a word boundary. Nothing happens if the string is shorter than the given length. + Wraps a string in double braces. Useful as a UI helper now that + LiveView 1.0.0 allows `{}` for interpolation so now we can't use braces + directly in the view. Returns binary() """ - def truncate(string, length) do - if String.length(string) > length do - string - |> String.slice(0..(length - 1)) - |> String.replace(~r/\s+\S*$/, "") - |> Kernel.<>("...") - else - string - end + def double_brace(string) do + "{{ #{string} }}" end + + @doc """ + Wraps a string in quotes if it's not already a string. Useful for working with + error messages whose types can vary. + + Returns binary() + """ + def wrap_string(message) when is_binary(message), do: message + def wrap_string(message), do: "#{inspect(message)}" end diff --git a/lib/pinchflat/yt_dlp/command_runner.ex b/lib/pinchflat/yt_dlp/command_runner.ex index 30b2404..f574d30 100644 --- a/lib/pinchflat/yt_dlp/command_runner.ex +++ b/lib/pinchflat/yt_dlp/command_runner.ex @@ -3,7 +3,11 @@ defmodule Pinchflat.YtDlp.CommandRunner do Runs yt-dlp commands using the `System.cmd/3` function """ + require Logger + + alias Pinchflat.Settings alias Pinchflat.Utils.CliUtils + alias Pinchflat.Utils.NumberUtils alias Pinchflat.YtDlp.YtDlpCommandRunner alias Pinchflat.Utils.FilesystemUtils, as: FSUtils @@ -18,23 +22,32 @@ defmodule Pinchflat.YtDlp.CommandRunner do - :output_filepath - the path to save the output to. If not provided, a temporary file will be created and used. Useful for if you need a reference to the file for a file watcher. + - :use_cookies - if true, will add a cookie file to the command options. Will not + attach a cookie file if the user hasn't set one up. + - :skip_sleep_interval - if true, will not add the sleep interval options to the command. + Usually only used for commands that would be UI-blocking Returns {:ok, binary()} | {:error, output, status}. """ @impl YtDlpCommandRunner - def run(url, command_opts, output_template, addl_opts \\ []) do - # This approach lets us mock the command for testing - command = backend_executable() + def run(url, action_name, command_opts, output_template, addl_opts \\ []) do + Logger.debug("Running yt-dlp command for action: #{action_name}") output_filepath = generate_output_filepath(addl_opts) print_to_file_opts = [{:print_to_file, output_template}, output_filepath] - user_configured_opts = cookie_file_options() + user_configured_opts = cookie_file_options(addl_opts) ++ rate_limit_options(addl_opts) ++ misc_options() # These must stay in exactly this order, hence why I'm giving it its own variable. all_opts = command_opts ++ print_to_file_opts ++ user_configured_opts ++ global_options() formatted_command_opts = [url] ++ CliUtils.parse_options(all_opts) - case CliUtils.wrap_cmd(command, formatted_command_opts, stderr_to_stdout: true) do - {_, 0} -> + case CliUtils.wrap_cmd(backend_executable(), formatted_command_opts, stderr_to_stdout: true) do + # yt-dlp exit codes: + # 0 = Everything is successful + # 100 = yt-dlp must restart for update to complete + # 101 = Download cancelled by --max-downloads etc + # 2 = Error in user-provided options + # 1 = Any other error + {_, status} when status in [0, 101] -> # IDEA: consider deleting the file after reading it. It's in the tmp dir, so it's not # a huge deal, but it's still a good idea to clean up after ourselves. # (even on error? especially on error?) @@ -63,6 +76,24 @@ defmodule Pinchflat.YtDlp.CommandRunner do end end + @doc """ + Updates yt-dlp to the latest version + + Returns {:ok, binary()} | {:error, binary()} + """ + @impl YtDlpCommandRunner + def update do + command = backend_executable() + + case CliUtils.wrap_cmd(command, ["--update"]) do + {output, 0} -> + {:ok, String.trim(output)} + + {output, _} -> + {:error, output} + end + end + defp generate_output_filepath(addl_opts) do case Keyword.get(addl_opts, :output_filepath) do nil -> FSUtils.generate_metadata_tmpfile(:json) @@ -78,7 +109,14 @@ defmodule Pinchflat.YtDlp.CommandRunner do ] end - defp cookie_file_options do + defp cookie_file_options(addl_opts) do + case Keyword.get(addl_opts, :use_cookies) do + true -> add_cookie_file() + _ -> [] + end + end + + defp add_cookie_file do base_dir = Application.get_env(:pinchflat, :extras_directory) filename_options_map = %{cookies: "cookies.txt"} @@ -93,6 +131,32 @@ defmodule Pinchflat.YtDlp.CommandRunner do end) end + defp rate_limit_options(addl_opts) do + throughput_limit = Settings.get!(:download_throughput_limit) + sleep_interval_opts = sleep_interval_opts(addl_opts) + throughput_option = if throughput_limit, do: [limit_rate: throughput_limit], else: [] + + throughput_option ++ sleep_interval_opts + end + + defp sleep_interval_opts(addl_opts) do + sleep_interval = Settings.get!(:extractor_sleep_interval_seconds) + + if sleep_interval <= 0 || Keyword.get(addl_opts, :skip_sleep_interval) do + [] + else + [ + sleep_requests: NumberUtils.add_jitter(sleep_interval), + sleep_interval: NumberUtils.add_jitter(sleep_interval), + sleep_subtitles: NumberUtils.add_jitter(sleep_interval) + ] + end + end + + defp misc_options do + if Settings.get!(:restrict_filenames), do: [:restrict_filenames], else: [] + end + defp backend_executable do Application.get_env(:pinchflat, :yt_dlp_executable) end diff --git a/lib/pinchflat/yt_dlp/media.ex b/lib/pinchflat/yt_dlp/media.ex index cecf08a..9abf8e5 100644 --- a/lib/pinchflat/yt_dlp/media.ex +++ b/lib/pinchflat/yt_dlp/media.ex @@ -11,7 +11,8 @@ defmodule Pinchflat.YtDlp.Media do :livestream, :short_form_content, :uploaded_at, - :duration_seconds + :duration_seconds, + :predicted_media_filepath ] defstruct [ @@ -23,7 +24,8 @@ defmodule Pinchflat.YtDlp.Media do :short_form_content, :uploaded_at, :duration_seconds, - :playlist_index + :playlist_index, + :predicted_media_filepath ] alias __MODULE__ @@ -37,9 +39,9 @@ defmodule Pinchflat.YtDlp.Media do Returns {:ok, map()} | {:error, any, ...}. """ def download(url, command_opts \\ [], addl_opts \\ []) do - opts = [:no_simulate] ++ command_opts + all_command_opts = [:no_simulate] ++ command_opts - with {:ok, output} <- backend_runner().run(url, opts, "after_move:%()j", addl_opts), + with {:ok, output} <- backend_runner().run(url, :download, all_command_opts, "after_move:%()j", addl_opts), {:ok, parsed_json} <- Phoenix.json_library().decode(output) do {:ok, parsed_json} else @@ -47,31 +49,53 @@ defmodule Pinchflat.YtDlp.Media do end end + @doc """ + Determines if the media at the given URL is ready to be downloaded. + Common examples of non-downloadable media are upcoming or in-progress live streams. + + Returns {:ok, :downloadable | :ignorable} | {:error, any} + """ + def get_downloadable_status(url, addl_opts \\ []) do + action = :get_downloadable_status + command_opts = [:simulate, :skip_download] + + case backend_runner().run(url, action, command_opts, "%(.{live_status})j", addl_opts) do + {:ok, output} -> + output + |> Phoenix.json_library().decode!() + |> parse_downloadable_status() + + err -> + err + end + end + @doc """ Downloads a thumbnail for a single piece of media. Usually used for downloading thumbnails for internal use Returns {:ok, ""} | {:error, any, ...}. """ - def download_thumbnail(url, command_opts \\ []) do - opts = [:no_simulate, :skip_download, :write_thumbnail, convert_thumbnail: "jpg"] ++ command_opts + def download_thumbnail(url, command_opts \\ [], addl_opts \\ []) do + all_command_opts = [:no_simulate, :skip_download, :write_thumbnail, convert_thumbnail: "jpg"] ++ command_opts # NOTE: it doesn't seem like this command actually returns anything in `after_move` since # we aren't downloading the main media file - backend_runner().run(url, opts, "after_move:%()j") + backend_runner().run(url, :download_thumbnail, all_command_opts, "after_move:%()j", addl_opts) end @doc """ Returns a map representing the media at the given URL. + Optionally takes a list of additional command options to pass to yt-dlp + or configuration-related options to pass to the runner. Returns {:ok, %Media{}} | {:error, any, ...}. """ - def get_media_attributes(url) do - runner = Application.get_env(:pinchflat, :yt_dlp_runner) - command_opts = [:simulate, :skip_download] + def get_media_attributes(url, command_opts \\ [], addl_opts \\ []) do + all_command_opts = [:simulate, :skip_download] ++ command_opts output_template = indexing_output_template() - case runner.run(url, command_opts, output_template) do + case backend_runner().run(url, :get_media_attributes, all_command_opts, output_template, addl_opts) do {:ok, output} -> output |> Phoenix.json_library().decode!() @@ -87,9 +111,11 @@ defmodule Pinchflat.YtDlp.Media do Returns the output template for yt-dlp's indexing command. NOTE: playlist_index is really only useful for playlists that will never change their order. + NOTE: I've switched back to `original_url` (from `webpage_url`) since it's started indicating + if something is a short via the URL again """ def indexing_output_template do - "%(.{id,title,was_live,webpage_url,description,aspect_ratio,duration,upload_date,timestamp,playlist_index})j" + "%(.{id,title,live_status,original_url,description,aspect_ratio,duration,upload_date,timestamp,playlist_index,filename})j" end @doc """ @@ -103,17 +129,18 @@ defmodule Pinchflat.YtDlp.Media do media_id: response["id"], title: response["title"], description: response["description"], - original_url: response["webpage_url"], - livestream: !!response["was_live"], + original_url: response["original_url"], + livestream: !!response["live_status"] && response["live_status"] != "not_live", duration_seconds: response["duration"] && round(response["duration"]), - short_form_content: response["webpage_url"] && short_form_content?(response), + short_form_content: response["original_url"] && short_form_content?(response), uploaded_at: response["upload_date"] && parse_uploaded_at(response), - playlist_index: response["playlist_index"] || 0 + playlist_index: response["playlist_index"] || 0, + predicted_media_filepath: response["filename"] } end defp short_form_content?(response) do - if String.contains?(response["webpage_url"], "/shorts/") do + if String.contains?(response["original_url"], "/shorts/") do true else # Sometimes shorts are returned without /shorts/ in the URL, @@ -124,7 +151,7 @@ defmodule Pinchflat.YtDlp.Media do # # These don't fail if duration or aspect_ratio are missing # due to Elixir's comparison semantics - response["duration"] <= 60 && response["aspect_ratio"] < 0.8 + response["duration"] <= 180 && response["aspect_ratio"] <= 0.85 end end @@ -140,6 +167,16 @@ defmodule Pinchflat.YtDlp.Media do defp parse_uploaded_at(%{"upload_date" => nil}), do: nil defp parse_uploaded_at(response), do: MetadataFileHelpers.parse_upload_date(response["upload_date"]) + defp parse_downloadable_status(response) do + case response["live_status"] do + status when status in ["is_live", "is_upcoming", "post_live"] -> {:ok, :ignorable} + status when status in ["was_live", "not_live"] -> {:ok, :downloadable} + # This preserves my tenuous support for non-youtube sources. + nil -> {:ok, :downloadable} + _ -> {:error, "Unknown live status: #{response["live_status"]}"} + end + end + defp backend_runner do # This approach lets us mock the command for testing Application.get_env(:pinchflat, :yt_dlp_runner) diff --git a/lib/pinchflat/yt_dlp/media_collection.ex b/lib/pinchflat/yt_dlp/media_collection.ex index f657195..aa4abb3 100644 --- a/lib/pinchflat/yt_dlp/media_collection.ex +++ b/lib/pinchflat/yt_dlp/media_collection.ex @@ -11,29 +11,34 @@ defmodule Pinchflat.YtDlp.MediaCollection do @doc """ Returns a list of maps representing the media in the collection. + Optionally takes a list of additional command options to pass to yt-dlp + or configuration-related options to pass to the runner. - Options: + Runner Options: - :file_listener_handler - a function that will be called with the path to the file that will be written to when yt-dlp is done. This is useful for setting up a file watcher to know when the file is ready to be read. + - :use_cookies - whether or not to use user-provided cookies when fetching the media details Returns {:ok, [map()]} | {:error, any, ...}. """ - def get_media_attributes_for_collection(url, addl_opts \\ []) do - runner = Application.get_env(:pinchflat, :yt_dlp_runner) + def get_media_attributes_for_collection(url, command_opts \\ [], addl_opts \\ []) do # `ignore_no_formats_error` is necessary because yt-dlp will error out if # the first video has not released yet (ie: is a premier). We don't care about # available formats since we're just getting the media details - command_opts = [:simulate, :skip_download, :ignore_no_formats_error, :no_warnings] + all_command_opts = [:simulate, :skip_download, :ignore_no_formats_error, :no_warnings] ++ command_opts + use_cookies = Keyword.get(addl_opts, :use_cookies, false) output_template = YtDlpMedia.indexing_output_template() output_filepath = FilesystemUtils.generate_metadata_tmpfile(:json) file_listener_handler = Keyword.get(addl_opts, :file_listener_handler, false) + runner_opts = [output_filepath: output_filepath, use_cookies: use_cookies] + action = :get_media_attributes_for_collection if file_listener_handler do file_listener_handler.(output_filepath) end - case runner.run(url, command_opts, output_template, output_filepath: output_filepath) do + case backend_runner().run(url, action, all_command_opts, output_template, runner_opts) do {:ok, output} -> parsed_lines = output @@ -64,7 +69,7 @@ defmodule Pinchflat.YtDlp.MediaCollection do Returns {:ok, map()} | {:error, any, ...}. """ - def get_source_details(source_url, addl_opts \\ []) do + def get_source_details(source_url, command_opts \\ [], addl_opts \\ []) do # `ignore_no_formats_error` is necessary because yt-dlp will error out if # the first video has not released yet (ie: is a premier). We don't care about # available formats since we're just getting the source details @@ -75,13 +80,15 @@ defmodule Pinchflat.YtDlp.MediaCollection do playlist_end: 1 ] - command_opts = default_opts ++ addl_opts + all_command_opts = default_opts ++ command_opts output_template = "%(.{channel,channel_id,playlist_id,playlist_title,filename})j" + action = :get_source_details - with {:ok, output} <- backend_runner().run(source_url, command_opts, output_template), + with {:ok, output} <- backend_runner().run(source_url, action, all_command_opts, output_template, addl_opts), {:ok, parsed_json} <- Phoenix.json_library().decode(output) do {:ok, format_source_details(parsed_json)} else + {:error, %Jason.DecodeError{}} -> {:error, "Error decoding JSON response"} err -> err end end @@ -109,14 +116,15 @@ defmodule Pinchflat.YtDlp.MediaCollection do Returns {:ok, map()} | {:error, any, ...}. """ - def get_source_metadata(source_url, addl_opts \\ [playlist_items: 0]) do + def get_source_metadata(source_url, command_opts, addl_opts \\ []) do # This only validates that the `playlist_items` key is present. It's otherwise unused - _playlist_items = Keyword.fetch!(addl_opts, :playlist_items) + _playlist_items = Keyword.fetch!(command_opts, :playlist_items) - opts = [:skip_download] ++ addl_opts + all_command_opts = [:skip_download] ++ command_opts output_template = "playlist:%()j" + action = :get_source_metadata - with {:ok, output} <- backend_runner().run(source_url, opts, output_template), + with {:ok, output} <- backend_runner().run(source_url, action, all_command_opts, output_template, addl_opts), {:ok, parsed_json} <- Phoenix.json_library().decode(output) do {:ok, parsed_json} else diff --git a/lib/pinchflat/yt_dlp/update_worker.ex b/lib/pinchflat/yt_dlp/update_worker.ex new file mode 100644 index 0000000..2d9b43f --- /dev/null +++ b/lib/pinchflat/yt_dlp/update_worker.ex @@ -0,0 +1,44 @@ +defmodule Pinchflat.YtDlp.UpdateWorker do + @moduledoc false + + use Oban.Worker, + queue: :local_data, + tags: ["local_data"] + + require Logger + + alias __MODULE__ + alias Pinchflat.Settings + + @doc """ + Starts the yt-dlp update worker. Does not attach it to a task like `kickoff_with_task/2` + + Returns {:ok, %Oban.Job{}} | {:error, %Ecto.Changeset{}} + """ + def kickoff do + Oban.insert(UpdateWorker.new(%{})) + end + + @doc """ + Updates yt-dlp and saves the version to the settings. + + This worker is scheduled to run via the Oban Cron plugin as well as on app boot. + + Returns :ok + """ + @impl Oban.Worker + def perform(%Oban.Job{}) do + Logger.info("Updating yt-dlp") + + yt_dlp_runner().update() + + {:ok, yt_dlp_version} = yt_dlp_runner().version() + Settings.set(yt_dlp_version: yt_dlp_version) + + :ok + end + + defp yt_dlp_runner do + Application.get_env(:pinchflat, :yt_dlp_runner) + end +end diff --git a/lib/pinchflat/yt_dlp/yt_dlp_command_runner.ex b/lib/pinchflat/yt_dlp/yt_dlp_command_runner.ex index 9b46a32..e5c770e 100644 --- a/lib/pinchflat/yt_dlp/yt_dlp_command_runner.ex +++ b/lib/pinchflat/yt_dlp/yt_dlp_command_runner.ex @@ -6,7 +6,8 @@ defmodule Pinchflat.YtDlp.YtDlpCommandRunner do yt-dlp command. """ - @callback run(binary(), keyword(), binary()) :: {:ok, binary()} | {:error, binary(), integer()} - @callback run(binary(), keyword(), binary(), keyword()) :: {:ok, binary()} | {:error, binary(), integer()} + @callback run(binary(), atom(), keyword(), binary()) :: {:ok, binary()} | {:error, binary(), integer()} + @callback run(binary(), atom(), keyword(), binary(), keyword()) :: {:ok, binary()} | {:error, binary(), integer()} @callback version() :: {:ok, binary()} | {:error, binary()} + @callback update() :: {:ok, binary()} | {:error, binary()} end diff --git a/lib/pinchflat_web.ex b/lib/pinchflat_web.ex index ef58d00..9401e3b 100644 --- a/lib/pinchflat_web.ex +++ b/lib/pinchflat_web.ex @@ -43,7 +43,7 @@ defmodule PinchflatWeb do layouts: [html: PinchflatWeb.Layouts] import Plug.Conn - import PinchflatWeb.Gettext + use Gettext, backend: PinchflatWeb.Gettext alias Pinchflat.Settings alias PinchflatWeb.Layouts @@ -94,12 +94,13 @@ defmodule PinchflatWeb do # HTML escaping functionality import Phoenix.HTML # Core UI components and translation - import PinchflatWeb.Gettext + use Gettext, backend: PinchflatWeb.Gettext import PinchflatWeb.CoreComponents import PinchflatWeb.CustomComponents.TabComponents import PinchflatWeb.CustomComponents.TextComponents import PinchflatWeb.CustomComponents.TableComponents import PinchflatWeb.CustomComponents.ButtonComponents + import Pinchflat.Utils.StringUtils, only: [double_brace: 1] alias Pinchflat.Settings alias Pinchflat.Utils.StringUtils diff --git a/lib/pinchflat_web/components/core_components.ex b/lib/pinchflat_web/components/core_components.ex index 0bad772..37af104 100644 --- a/lib/pinchflat_web/components/core_components.ex +++ b/lib/pinchflat_web/components/core_components.ex @@ -15,8 +15,7 @@ defmodule PinchflatWeb.CoreComponents do Icons are provided by [heroicons](https://heroicons.com). See `icon/1` for usage. """ use Phoenix.Component, global_prefixes: ~w(x-) - - import PinchflatWeb.Gettext + use Gettext, backend: PinchflatWeb.Gettext alias Phoenix.LiveView.JS alias PinchflatWeb.CustomComponents.TextComponents @@ -82,7 +81,7 @@ defmodule PinchflatWeb.CoreComponents do

- <%= render_slot(@inner_block) %> + {render_slot(@inner_block)}
@@ -126,9 +125,9 @@ defmodule PinchflatWeb.CoreComponents do ]}>
- <%= @title %> + {@title}
-

<%= msg %>

+

{msg}