From e841f39cf2e2447059cc2cc8396162cbbb6c3a9e Mon Sep 17 00:00:00 2001 From: Kieran Date: Wed, 10 Apr 2024 17:54:45 -0700 Subject: [PATCH] [Enhancement] Redownload new media after a delay (#173) * Added redownload-related columns * Added methods for fetching re-downloadable media items * Filled out redownload worker + tests * Added redownload worker to config.exs cron * Added to UI and README --- README.md | 2 + config/config.exs | 3 +- .../downloading/media_download_worker.ex | 28 +++- .../downloading/media_redownload_worker.ex | 31 +++++ lib/pinchflat/media/media.ex | 34 ++++- lib/pinchflat/media/media_item.ex | 4 +- lib/pinchflat/media/media_query.ex | 50 +++++-- .../notifications/source_notifications.ex | 2 +- lib/pinchflat/profiles/media_profile.ex | 4 +- .../media_profile_form.html.heex | 11 ++ .../controllers/sources/source_controller.ex | 2 +- ...20240409224152_add_redownloaded_fields.exs | 13 ++ .../media_download_worker_test.exs | 26 +++- .../media_redownload_worker_test.exs | 44 ++++++ test/pinchflat/media_test.exs | 125 ++++++++++++++++++ 15 files changed, 349 insertions(+), 30 deletions(-) create mode 100644 lib/pinchflat/downloading/media_redownload_worker.ex create mode 100644 priv/repo/migrations/20240409224152_add_redownloaded_fields.exs create mode 100644 test/pinchflat/downloading/media_redownload_worker_test.exs diff --git a/README.md b/README.md index bdf24a5..0b0a10c 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,8 @@ If it doesn't work for your use case, please make a feature request! You can als - Supports downloading audio content - Custom rules for handling YouTube Shorts and livestreams - Apprise support for notifications +- Allows automatically redownloading new media after a set period + - This can help improve the download quality of new content or improve SponsorBlock tags - Optionally automatically delete old content ([docs](https://github.com/kieraneglin/pinchflat/wiki/Automatically-Delete-Media)) - Advanced options like setting cutoff dates and filtering by title - Reliable hands-off operation diff --git a/config/config.exs b/config/config.exs index b395950..c157fab 100644 --- a/config/config.exs +++ b/config/config.exs @@ -52,7 +52,8 @@ config :pinchflat, Oban, {Oban.Plugins.Pruner, max_age: 30 * 24 * 60 * 60}, {Oban.Plugins.Cron, crontab: [ - {"@daily", Pinchflat.Downloading.MediaRetentionWorker} + {"0 1 * * *", Pinchflat.Downloading.MediaRetentionWorker}, + {"0 2 * * *", Pinchflat.Downloading.MediaRedownloadWorker} ]} ], # TODO: consider making this an env var or something? diff --git a/lib/pinchflat/downloading/media_download_worker.ex b/lib/pinchflat/downloading/media_download_worker.ex index 17ab5b5..0e12067 100644 --- a/lib/pinchflat/downloading/media_download_worker.ex +++ b/lib/pinchflat/downloading/media_download_worker.ex @@ -35,14 +35,17 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do """ @impl Oban.Worker def perform(%Oban.Job{args: %{"id" => media_item_id} = args}) do + should_force = Map.get(args, "force", false) + is_redownload = Map.get(args, "redownload?", false) + media_item = media_item_id |> Media.get_media_item!() |> Repo.preload(:source) # If the source or media item is set to not download media, perform a no-op unless forced - if (media_item.source.download_media && !media_item.prevent_download) || args["force"] do - download_media_and_schedule_jobs(media_item) + if (media_item.source.download_media && !media_item.prevent_download) || should_force do + download_media_and_schedule_jobs(media_item, is_redownload) else :ok end @@ -51,10 +54,13 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do Ecto.StaleEntryError -> Logger.info("#{__MODULE__} discarded: media item #{media_item_id} stale") end - defp download_media_and_schedule_jobs(media_item) do + defp download_media_and_schedule_jobs(media_item, is_redownload) do case MediaDownloader.download_for_media_item(media_item) do {:ok, updated_media_item} -> - compute_and_save_media_filesize(updated_media_item) + Media.update_media_item(updated_media_item, %{ + media_size_bytes: compute_media_filesize(updated_media_item), + media_redownloaded_at: get_redownloaded_at(is_redownload) + }) {:ok, updated_media_item} @@ -66,13 +72,21 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do end end - defp compute_and_save_media_filesize(media_item) do + defp compute_media_filesize(media_item) do case File.stat(media_item.media_filepath) do {:ok, %{size: size}} -> - Media.update_media_item(media_item, %{media_size_bytes: size}) + size _ -> - :ok + nil + end + end + + defp get_redownloaded_at(is_redownload) do + if is_redownload do + DateTime.utc_now() + else + nil end end end diff --git a/lib/pinchflat/downloading/media_redownload_worker.ex b/lib/pinchflat/downloading/media_redownload_worker.ex new file mode 100644 index 0000000..b3b6d82 --- /dev/null +++ b/lib/pinchflat/downloading/media_redownload_worker.ex @@ -0,0 +1,31 @@ +defmodule Pinchflat.Downloading.MediaRedownloadWorker do + @moduledoc false + + use Oban.Worker, + queue: :media_fetching, + unique: [period: :infinity, states: [:available, :scheduled, :retryable, :executing]], + tags: ["media_item", "media_fetching"] + + require Logger + + alias Pinchflat.Media + alias Pinchflat.Downloading.MediaDownloadWorker + + @doc """ + Redownloads media items that are eligible for redownload. + + This worker is scheduled to run daily via the Oban Cron plugin + and it should run _after_ the retention worker. + + Returns :ok + """ + @impl Oban.Worker + def perform(%Oban.Job{}) do + redownloadable_media = Media.list_redownloadable_media_items() + Logger.info("Redownloading #{length(redownloadable_media)} media items") + + Enum.each(redownloadable_media, fn media_item -> + MediaDownloadWorker.kickoff_with_task(media_item, %{redownload?: true}) + end) + end +end diff --git a/lib/pinchflat/media/media.ex b/lib/pinchflat/media/media.ex index ee8531d..d458af4 100644 --- a/lib/pinchflat/media/media.ex +++ b/lib/pinchflat/media/media.ex @@ -31,15 +31,39 @@ defmodule Pinchflat.Media do def list_cullable_media_items do MediaQuery.new() |> MediaQuery.with_media_filepath() - |> MediaQuery.with_passed_retention_period() - |> MediaQuery.with_no_culling_prevention() + |> MediaQuery.where_past_retention_period() + |> MediaQuery.where_culling_not_prevented() + |> Repo.all() + end + + @doc """ + Returns a list of media_items that are redownloadable based on the redownload delay + of the media_profile their source belongs to. + + The logic is that a media_item is past_redownload_delay if the media_item's + upload_date is at least redownload_delay_days ago AND + `media_downloaded_at` - `redownload_delay_days` is before the media_item's `upload_date`. + This logic grabs media that we've recently downloaded AND is recently uploaded, but + doesn't grab media that we've recently downloaded and was uploaded a long time ago. + This also makes things work as expected when downloading media from a source for the + first time. + + Returns [%MediaItem{}, ...] + """ + def list_redownloadable_media_items do + MediaQuery.new() + |> MediaQuery.with_media_downloaded_at() + |> MediaQuery.where_download_not_prevented() + |> MediaQuery.where_not_culled() + |> MediaQuery.where_media_not_redownloaded() + |> MediaQuery.where_past_redownload_delay() |> Repo.all() end @doc """ Returns a list of pending media_items for a given source, where pending means the `media_filepath` is `nil` AND the media_item - matches satisfies `MediaQuery.with_media_pending_download`. You + matches satisfies `MediaQuery.where_pending_download`. You should really check out that function if you need to know more because it has a lot going on. @@ -48,7 +72,7 @@ defmodule Pinchflat.Media do def list_pending_media_items_for(%Source{} = source) do MediaQuery.new() |> MediaQuery.for_source(source) - |> MediaQuery.with_media_pending_download() + |> MediaQuery.where_pending_download() |> Repo.all() end @@ -66,7 +90,7 @@ defmodule Pinchflat.Media do MediaQuery.new() |> MediaQuery.with_id(media_item.id) - |> MediaQuery.with_media_pending_download() + |> MediaQuery.where_pending_download() |> Repo.exists?() end diff --git a/lib/pinchflat/media/media_item.ex b/lib/pinchflat/media/media_item.ex index 52633b2..a671d58 100644 --- a/lib/pinchflat/media/media_item.ex +++ b/lib/pinchflat/media/media_item.ex @@ -34,7 +34,8 @@ defmodule Pinchflat.Media.MediaItem do # These are user or system controlled fields :prevent_download, :prevent_culling, - :culled_at + :culled_at, + :media_redownloaded_at ] # Pretty much all the fields captured at index are required. @required_fields ~w( @@ -61,6 +62,7 @@ defmodule Pinchflat.Media.MediaItem do field :livestream, :boolean, default: false field :short_form_content, :boolean, default: false field :media_downloaded_at, :utc_datetime + field :media_redownloaded_at, :utc_datetime field :upload_date, :date field :duration_seconds, :integer diff --git a/lib/pinchflat/media/media_query.ex b/lib/pinchflat/media/media_query.ex index 6751c28..024f745 100644 --- a/lib/pinchflat/media/media_query.ex +++ b/lib/pinchflat/media/media_query.ex @@ -15,7 +15,7 @@ defmodule Pinchflat.Media.MediaQuery do # Prefixes: # - for_* - belonging to a certain record # - join_* - for joining on a certain record - # - with_* - for filtering based on full, concrete attributes + # - with_*, where_* - for filtering based on full, concrete attributes # - matching_* - for filtering based on partial attributes (e.g. LIKE, regex, full-text search) # # Suffixes: @@ -33,24 +33,46 @@ defmodule Pinchflat.Media.MediaQuery do from(mi in query, join: s in assoc(mi, :source), as: :sources) end - def with_passed_retention_period(query) do + def where_past_retention_period(query) do query |> require_assoc(:source) |> where( [mi, source], - fragment( - "IFNULL(?, 0) > 0 AND DATETIME('now', '-' || ? || ' day') > ?", - source.retention_period_days, - source.retention_period_days, - mi.media_downloaded_at - ) + fragment(""" + IFNULL(retention_period_days, 0) > 0 AND + DATETIME('now', '-' || retention_period_days || ' day') > media_downloaded_at + """) ) end - def with_no_culling_prevention(query) do + def where_past_redownload_delay(query) do + query + |> require_assoc(:source) + |> require_assoc(:media_profile) + |> where( + [_mi, _source, _media_profile], + # Returns media items where the upload_date is at least redownload_delay_days ago AND + # downloaded_at minus the redownload_delay_days is before the upload date + fragment(""" + IFNULL(redownload_delay_days, 0) > 0 AND + DATETIME('now', '-' || redownload_delay_days || ' day') > upload_date AND + DATETIME(media_downloaded_at, '-' || redownload_delay_days || ' day') < upload_date + """) + ) + end + + def where_culling_not_prevented(query) do where(query, [mi], mi.prevent_culling == false) end + def where_not_culled(query) do + where(query, [mi], is_nil(mi.culled_at)) + end + + def where_media_not_redownloaded(query) do + where(query, [mi], is_nil(mi.media_redownloaded_at)) + end + def with_id(query, id) do where(query, [mi], mi.id == ^id) end @@ -59,6 +81,10 @@ defmodule Pinchflat.Media.MediaQuery do where(query, [mi], mi.media_id in ^media_ids) end + def with_media_downloaded_at(query) do + where(query, [mi], not is_nil(mi.media_downloaded_at)) + end + def with_media_filepath(query) do where(query, [mi], not is_nil(mi.media_filepath)) end @@ -73,7 +99,7 @@ defmodule Pinchflat.Media.MediaQuery do |> where([mi, source], is_nil(source.download_cutoff_date) or mi.upload_date >= source.download_cutoff_date) end - def with_no_prevented_download(query) do + def where_download_not_prevented(query) do where(query, [mi], mi.prevent_download == false) end @@ -129,9 +155,9 @@ defmodule Pinchflat.Media.MediaQuery do ) end - def with_media_pending_download(query) do + def where_pending_download(query) do query - |> with_no_prevented_download() + |> where_download_not_prevented() |> with_no_media_filepath() |> with_upload_date_after_source_cutoff() |> with_format_matching_profile_preference() diff --git a/lib/pinchflat/notifications/source_notifications.ex b/lib/pinchflat/notifications/source_notifications.ex index e39841d..1205858 100644 --- a/lib/pinchflat/notifications/source_notifications.ex +++ b/lib/pinchflat/notifications/source_notifications.ex @@ -59,7 +59,7 @@ defmodule Pinchflat.Notifications.SourceNotifications do defp pending_media_item_count(source) do MediaQuery.new() |> MediaQuery.for_source(source) - |> MediaQuery.with_media_pending_download() + |> MediaQuery.where_pending_download() |> Repo.aggregate(:count) end diff --git a/lib/pinchflat/profiles/media_profile.ex b/lib/pinchflat/profiles/media_profile.ex index 57a3b7f..0b351a9 100644 --- a/lib/pinchflat/profiles/media_profile.ex +++ b/lib/pinchflat/profiles/media_profile.ex @@ -26,12 +26,14 @@ defmodule Pinchflat.Profiles.MediaProfile do shorts_behaviour livestream_behaviour preferred_resolution + redownload_delay_days )a @required_fields ~w(name output_path_template)a schema "media_profiles" do field :name, :string + field :redownload_delay_days, :integer field :output_path_template, :string, default: "/{{ source_custom_name }}/{{ upload_yyyy_mm_dd }} {{ title }}/{{ title }} [{{ id }}].{{ ext }}" @@ -60,7 +62,6 @@ defmodule Pinchflat.Profiles.MediaProfile do # See `build_format_clauses` in the Media context for more. field :shorts_behaviour, Ecto.Enum, values: ~w(include exclude only)a, default: :include field :livestream_behaviour, Ecto.Enum, values: ~w(include exclude only)a, default: :include - field :preferred_resolution, Ecto.Enum, values: ~w(2160p 1080p 720p 480p 360p audio)a, default: :"1080p" has_many :sources, Source @@ -75,6 +76,7 @@ defmodule Pinchflat.Profiles.MediaProfile do |> validate_required(@required_fields) # Ensures it ends with `.{{ ext }}` or `.%(ext)s` or similar (with a little wiggle room) |> validate_format(:output_path_template, ext_regex(), message: "must end with .{{ ext }}") + |> validate_number(:redownload_delay_days, greater_than_or_equal_to: 0) |> unique_constraint(:name) end diff --git a/lib/pinchflat_web/controllers/media_profiles/media_profile_html/media_profile_form.html.heex b/lib/pinchflat_web/controllers/media_profiles/media_profile_html/media_profile_form.html.heex index 36d99cc..78a6c5f 100644 --- a/lib/pinchflat_web/controllers/media_profiles/media_profile_html/media_profile_form.html.heex +++ b/lib/pinchflat_web/controllers/media_profiles/media_profile_html/media_profile_form.html.heex @@ -203,6 +203,17 @@ /> +
+ <.input + field={f[:redownload_delay_days]} + type="number" + label="Redownload Delay (days)" + min="0" + help="Delay in days until new media is redownloaded. Redownloading new media can improve its quality or SponsorBlock tags. Leave blank to not redownload" + x-init="$watch('selectedPreset', p => p && ($el.value = presets[p]))" + /> +
+

Media Center Options

diff --git a/lib/pinchflat_web/controllers/sources/source_controller.ex b/lib/pinchflat_web/controllers/sources/source_controller.ex index d8e904d..ff12534 100644 --- a/lib/pinchflat_web/controllers/sources/source_controller.ex +++ b/lib/pinchflat_web/controllers/sources/source_controller.ex @@ -63,7 +63,7 @@ defmodule PinchflatWeb.Sources.SourceController do pending_media = MediaQuery.new() |> MediaQuery.for_source(source) - |> MediaQuery.with_media_pending_download() + |> MediaQuery.where_pending_download() |> order_by(desc: :id) |> limit(100) |> Repo.all() diff --git a/priv/repo/migrations/20240409224152_add_redownloaded_fields.exs b/priv/repo/migrations/20240409224152_add_redownloaded_fields.exs new file mode 100644 index 0000000..11c528c --- /dev/null +++ b/priv/repo/migrations/20240409224152_add_redownloaded_fields.exs @@ -0,0 +1,13 @@ +defmodule Pinchflat.Repo.Migrations.AddRedownloadedFields do + use Ecto.Migration + + def change do + alter table(:media_profiles) do + add :redownload_delay_days, :integer + end + + alter table(:media_items) do + add :media_redownloaded_at, :utc_datetime + end + end +end diff --git a/test/pinchflat/downloading/media_download_worker_test.exs b/test/pinchflat/downloading/media_download_worker_test.exs index b227c31..4f3b72e 100644 --- a/test/pinchflat/downloading/media_download_worker_test.exs +++ b/test/pinchflat/downloading/media_download_worker_test.exs @@ -62,7 +62,9 @@ defmodule Pinchflat.Downloading.MediaDownloadWorkerTest do assert media_item.media_filepath == nil perform_job(MediaDownloadWorker, %{id: media_item.id}) - assert Repo.reload(media_item).media_filepath != nil + media_item = Repo.reload(media_item) + + assert media_item.media_filepath != nil end test "it saves the metadata to the media_item", %{media_item: media_item} do @@ -149,6 +151,28 @@ defmodule Pinchflat.Downloading.MediaDownloadWorkerTest do assert media_item.media_size_bytes > 0 end + test "saves redownloaded_at if this is for a redownload", %{media_item: media_item} do + expect(YtDlpRunnerMock, :run, fn _url, _opts, _ot, _addl -> + {:ok, render_metadata(:media_metadata)} + end) + + perform_job(MediaDownloadWorker, %{id: media_item.id, redownload?: true}) + media_item = Repo.reload(media_item) + + assert media_item.media_redownloaded_at != nil + end + + test "doesn't save redownloaded_at if this is not for a redownload", %{media_item: media_item} do + expect(YtDlpRunnerMock, :run, fn _url, _opts, _ot, _addl -> + {:ok, render_metadata(:media_metadata)} + end) + + perform_job(MediaDownloadWorker, %{id: media_item.id}) + media_item = Repo.reload(media_item) + + assert media_item.media_redownloaded_at == nil + end + test "does not blow up if the record doesn't exist" do assert :ok = perform_job(MediaDownloadWorker, %{id: 0}) end diff --git a/test/pinchflat/downloading/media_redownload_worker_test.exs b/test/pinchflat/downloading/media_redownload_worker_test.exs new file mode 100644 index 0000000..70d9cc0 --- /dev/null +++ b/test/pinchflat/downloading/media_redownload_worker_test.exs @@ -0,0 +1,44 @@ +defmodule Pinchflat.Downloading.MediaRedownloadWorkerTest do + use Pinchflat.DataCase + + import Pinchflat.MediaFixtures + import Pinchflat.SourcesFixtures + import Pinchflat.ProfilesFixtures + + alias Pinchflat.Downloading.MediaDownloadWorker + alias Pinchflat.Downloading.MediaRedownloadWorker + + describe "perform/1" do + test "kicks off a task for redownloadable media items" do + media_profile = media_profile_fixture(%{redownload_delay_days: 4}) + source = source_fixture(%{media_profile_id: media_profile.id, inserted_at: now_minus(10, :days)}) + + media_item = + media_item_fixture(%{ + source_id: source.id, + upload_date: now_minus(6, :days), + media_downloaded_at: now_minus(5, :days) + }) + + perform_job(MediaRedownloadWorker, %{}) + + assert [_] = all_enqueued(worker: MediaDownloadWorker, args: %{id: media_item.id, redownload?: true}) + end + + test "does not kickoff a task for non-redownloadable media items" do + media_profile = media_profile_fixture(%{redownload_delay_days: 4}) + source = source_fixture(%{media_profile_id: media_profile.id, inserted_at: now_minus(10, :days)}) + + _media_item = + media_item_fixture(%{ + source_id: source.id, + upload_date: now_minus(6, :days), + media_downloaded_at: now_minus(1, :day) + }) + + perform_job(MediaRedownloadWorker, %{}) + + assert [] = all_enqueued(worker: MediaDownloadWorker) + end + end +end diff --git a/test/pinchflat/media_test.exs b/test/pinchflat/media_test.exs index 52eff4d..fad36c0 100644 --- a/test/pinchflat/media_test.exs +++ b/test/pinchflat/media_test.exs @@ -130,6 +130,131 @@ defmodule Pinchflat.MediaTest do end end + describe "list_redownloadable_media_items/0" do + setup do + media_profile = media_profile_fixture(%{redownload_delay_days: 4}) + source = source_fixture(%{media_profile_id: media_profile.id, inserted_at: now_minus(10, :days)}) + + {:ok, %{media_profile: media_profile, source: source}} + end + + test "returns media eligible for redownload", %{source: source} do + media_item = + media_item_fixture(%{ + source_id: source.id, + upload_date: now_minus(6, :days), + media_downloaded_at: now_minus(5, :days) + }) + + assert Media.list_redownloadable_media_items() == [media_item] + end + + test "returns media items that were downloaded in past but still meet redownload delay", %{source: source} do + media_item = + media_item_fixture(%{ + source_id: source.id, + upload_date: now_minus(20, :days), + media_downloaded_at: now_minus(19, :days) + }) + + assert Media.list_redownloadable_media_items() == [media_item] + end + + test "does not return media items without a media_downloaded_at", %{source: source} do + _media_item = + media_item_fixture(%{ + source_id: source.id, + upload_date: now_minus(5, :days), + media_downloaded_at: nil + }) + + assert Media.list_redownloadable_media_items() == [] + end + + test "does not return media items that are set to prevent download", %{source: source} do + _media_item = + media_item_fixture(%{ + source_id: source.id, + upload_date: now_minus(5, :days), + media_downloaded_at: now(), + prevent_download: true + }) + + assert Media.list_redownloadable_media_items() == [] + end + + test "does not return media items that have been culled", %{source: source} do + _media_item = + media_item_fixture(%{ + source_id: source.id, + upload_date: now_minus(5, :days), + media_downloaded_at: now(), + culled_at: now() + }) + + assert Media.list_redownloadable_media_items() == [] + end + + test "does not return media items before the download delay", %{source: source} do + _media_item = + media_item_fixture(%{ + source_id: source.id, + upload_date: now_minus(3, :days), + media_downloaded_at: now_minus(3, :days) + }) + + assert Media.list_redownloadable_media_items() == [] + end + + test "does not return media items that have already been redownloaded", %{source: source} do + _media_item = + media_item_fixture(%{ + source_id: source.id, + upload_date: now_minus(5, :days), + media_downloaded_at: now(), + media_redownloaded_at: now() + }) + + assert Media.list_redownloadable_media_items() == [] + end + + test "does not return media items that were first downloaded well after the upload_date", %{source: source} do + _media_item = + media_item_fixture(%{ + source_id: source.id, + media_downloaded_at: now(), + upload_date: now_minus(20, :days) + }) + + assert Media.list_redownloadable_media_items() == [] + end + + test "does not return media items that were recently uploaded", %{source: source} do + _media_item = + media_item_fixture(%{ + source_id: source.id, + media_downloaded_at: now(), + upload_date: now_minus(2, :days) + }) + + assert Media.list_redownloadable_media_items() == [] + end + + test "does not return media items without a redownload delay" do + media_profile = media_profile_fixture(%{redownload_delay_days: nil}) + source = source_fixture(%{media_profile_id: media_profile.id}) + + _media_item = + media_item_fixture(%{ + source_id: source.id, + upload_date: now_minus(6, :days), + media_downloaded_at: now_minus(5, :days) + }) + + assert Media.list_redownloadable_media_items() == [] + end + end + describe "list_pending_media_items_for/1" do test "it returns pending without a filepath for a given source" do source = source_fixture()