diff --git a/README.md b/README.md
index bdf24a5..0b0a10c 100644
--- a/README.md
+++ b/README.md
@@ -49,6 +49,8 @@ If it doesn't work for your use case, please make a feature request! You can als
- Supports downloading audio content
- Custom rules for handling YouTube Shorts and livestreams
- Apprise support for notifications
+- Allows automatically redownloading new media after a set period
+ - This can help improve the download quality of new content or improve SponsorBlock tags
- Optionally automatically delete old content ([docs](https://github.com/kieraneglin/pinchflat/wiki/Automatically-Delete-Media))
- Advanced options like setting cutoff dates and filtering by title
- Reliable hands-off operation
diff --git a/config/config.exs b/config/config.exs
index b395950..c157fab 100644
--- a/config/config.exs
+++ b/config/config.exs
@@ -52,7 +52,8 @@ config :pinchflat, Oban,
{Oban.Plugins.Pruner, max_age: 30 * 24 * 60 * 60},
{Oban.Plugins.Cron,
crontab: [
- {"@daily", Pinchflat.Downloading.MediaRetentionWorker}
+ {"0 1 * * *", Pinchflat.Downloading.MediaRetentionWorker},
+ {"0 2 * * *", Pinchflat.Downloading.MediaRedownloadWorker}
]}
],
# TODO: consider making this an env var or something?
diff --git a/lib/pinchflat/downloading/media_download_worker.ex b/lib/pinchflat/downloading/media_download_worker.ex
index 17ab5b5..0e12067 100644
--- a/lib/pinchflat/downloading/media_download_worker.ex
+++ b/lib/pinchflat/downloading/media_download_worker.ex
@@ -35,14 +35,17 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do
"""
@impl Oban.Worker
def perform(%Oban.Job{args: %{"id" => media_item_id} = args}) do
+ should_force = Map.get(args, "force", false)
+ is_redownload = Map.get(args, "redownload?", false)
+
media_item =
media_item_id
|> Media.get_media_item!()
|> Repo.preload(:source)
# If the source or media item is set to not download media, perform a no-op unless forced
- if (media_item.source.download_media && !media_item.prevent_download) || args["force"] do
- download_media_and_schedule_jobs(media_item)
+ if (media_item.source.download_media && !media_item.prevent_download) || should_force do
+ download_media_and_schedule_jobs(media_item, is_redownload)
else
:ok
end
@@ -51,10 +54,13 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do
Ecto.StaleEntryError -> Logger.info("#{__MODULE__} discarded: media item #{media_item_id} stale")
end
- defp download_media_and_schedule_jobs(media_item) do
+ defp download_media_and_schedule_jobs(media_item, is_redownload) do
case MediaDownloader.download_for_media_item(media_item) do
{:ok, updated_media_item} ->
- compute_and_save_media_filesize(updated_media_item)
+ Media.update_media_item(updated_media_item, %{
+ media_size_bytes: compute_media_filesize(updated_media_item),
+ media_redownloaded_at: get_redownloaded_at(is_redownload)
+ })
{:ok, updated_media_item}
@@ -66,13 +72,21 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do
end
end
- defp compute_and_save_media_filesize(media_item) do
+ defp compute_media_filesize(media_item) do
case File.stat(media_item.media_filepath) do
{:ok, %{size: size}} ->
- Media.update_media_item(media_item, %{media_size_bytes: size})
+ size
_ ->
- :ok
+ nil
+ end
+ end
+
+ defp get_redownloaded_at(is_redownload) do
+ if is_redownload do
+ DateTime.utc_now()
+ else
+ nil
end
end
end
diff --git a/lib/pinchflat/downloading/media_redownload_worker.ex b/lib/pinchflat/downloading/media_redownload_worker.ex
new file mode 100644
index 0000000..b3b6d82
--- /dev/null
+++ b/lib/pinchflat/downloading/media_redownload_worker.ex
@@ -0,0 +1,31 @@
+defmodule Pinchflat.Downloading.MediaRedownloadWorker do
+ @moduledoc false
+
+ use Oban.Worker,
+ queue: :media_fetching,
+ unique: [period: :infinity, states: [:available, :scheduled, :retryable, :executing]],
+ tags: ["media_item", "media_fetching"]
+
+ require Logger
+
+ alias Pinchflat.Media
+ alias Pinchflat.Downloading.MediaDownloadWorker
+
+ @doc """
+ Redownloads media items that are eligible for redownload.
+
+ This worker is scheduled to run daily via the Oban Cron plugin
+ and it should run _after_ the retention worker.
+
+ Returns :ok
+ """
+ @impl Oban.Worker
+ def perform(%Oban.Job{}) do
+ redownloadable_media = Media.list_redownloadable_media_items()
+ Logger.info("Redownloading #{length(redownloadable_media)} media items")
+
+ Enum.each(redownloadable_media, fn media_item ->
+ MediaDownloadWorker.kickoff_with_task(media_item, %{redownload?: true})
+ end)
+ end
+end
diff --git a/lib/pinchflat/media/media.ex b/lib/pinchflat/media/media.ex
index ee8531d..d458af4 100644
--- a/lib/pinchflat/media/media.ex
+++ b/lib/pinchflat/media/media.ex
@@ -31,15 +31,39 @@ defmodule Pinchflat.Media do
def list_cullable_media_items do
MediaQuery.new()
|> MediaQuery.with_media_filepath()
- |> MediaQuery.with_passed_retention_period()
- |> MediaQuery.with_no_culling_prevention()
+ |> MediaQuery.where_past_retention_period()
+ |> MediaQuery.where_culling_not_prevented()
+ |> Repo.all()
+ end
+
+ @doc """
+ Returns a list of media_items that are redownloadable based on the redownload delay
+ of the media_profile their source belongs to.
+
+ The logic is that a media_item is past_redownload_delay if the media_item's
+ upload_date is at least redownload_delay_days ago AND
+ `media_downloaded_at` - `redownload_delay_days` is before the media_item's `upload_date`.
+ This logic grabs media that we've recently downloaded AND is recently uploaded, but
+ doesn't grab media that we've recently downloaded and was uploaded a long time ago.
+ This also makes things work as expected when downloading media from a source for the
+ first time.
+
+ Returns [%MediaItem{}, ...]
+ """
+ def list_redownloadable_media_items do
+ MediaQuery.new()
+ |> MediaQuery.with_media_downloaded_at()
+ |> MediaQuery.where_download_not_prevented()
+ |> MediaQuery.where_not_culled()
+ |> MediaQuery.where_media_not_redownloaded()
+ |> MediaQuery.where_past_redownload_delay()
|> Repo.all()
end
@doc """
Returns a list of pending media_items for a given source, where
pending means the `media_filepath` is `nil` AND the media_item
- matches satisfies `MediaQuery.with_media_pending_download`. You
+ matches satisfies `MediaQuery.where_pending_download`. You
should really check out that function if you need to know more
because it has a lot going on.
@@ -48,7 +72,7 @@ defmodule Pinchflat.Media do
def list_pending_media_items_for(%Source{} = source) do
MediaQuery.new()
|> MediaQuery.for_source(source)
- |> MediaQuery.with_media_pending_download()
+ |> MediaQuery.where_pending_download()
|> Repo.all()
end
@@ -66,7 +90,7 @@ defmodule Pinchflat.Media do
MediaQuery.new()
|> MediaQuery.with_id(media_item.id)
- |> MediaQuery.with_media_pending_download()
+ |> MediaQuery.where_pending_download()
|> Repo.exists?()
end
diff --git a/lib/pinchflat/media/media_item.ex b/lib/pinchflat/media/media_item.ex
index 52633b2..a671d58 100644
--- a/lib/pinchflat/media/media_item.ex
+++ b/lib/pinchflat/media/media_item.ex
@@ -34,7 +34,8 @@ defmodule Pinchflat.Media.MediaItem do
# These are user or system controlled fields
:prevent_download,
:prevent_culling,
- :culled_at
+ :culled_at,
+ :media_redownloaded_at
]
# Pretty much all the fields captured at index are required.
@required_fields ~w(
@@ -61,6 +62,7 @@ defmodule Pinchflat.Media.MediaItem do
field :livestream, :boolean, default: false
field :short_form_content, :boolean, default: false
field :media_downloaded_at, :utc_datetime
+ field :media_redownloaded_at, :utc_datetime
field :upload_date, :date
field :duration_seconds, :integer
diff --git a/lib/pinchflat/media/media_query.ex b/lib/pinchflat/media/media_query.ex
index 6751c28..024f745 100644
--- a/lib/pinchflat/media/media_query.ex
+++ b/lib/pinchflat/media/media_query.ex
@@ -15,7 +15,7 @@ defmodule Pinchflat.Media.MediaQuery do
# Prefixes:
# - for_* - belonging to a certain record
# - join_* - for joining on a certain record
- # - with_* - for filtering based on full, concrete attributes
+ # - with_*, where_* - for filtering based on full, concrete attributes
# - matching_* - for filtering based on partial attributes (e.g. LIKE, regex, full-text search)
#
# Suffixes:
@@ -33,24 +33,46 @@ defmodule Pinchflat.Media.MediaQuery do
from(mi in query, join: s in assoc(mi, :source), as: :sources)
end
- def with_passed_retention_period(query) do
+ def where_past_retention_period(query) do
query
|> require_assoc(:source)
|> where(
[mi, source],
- fragment(
- "IFNULL(?, 0) > 0 AND DATETIME('now', '-' || ? || ' day') > ?",
- source.retention_period_days,
- source.retention_period_days,
- mi.media_downloaded_at
- )
+ fragment("""
+ IFNULL(retention_period_days, 0) > 0 AND
+ DATETIME('now', '-' || retention_period_days || ' day') > media_downloaded_at
+ """)
)
end
- def with_no_culling_prevention(query) do
+ def where_past_redownload_delay(query) do
+ query
+ |> require_assoc(:source)
+ |> require_assoc(:media_profile)
+ |> where(
+ [_mi, _source, _media_profile],
+ # Returns media items where the upload_date is at least redownload_delay_days ago AND
+ # downloaded_at minus the redownload_delay_days is before the upload date
+ fragment("""
+ IFNULL(redownload_delay_days, 0) > 0 AND
+ DATETIME('now', '-' || redownload_delay_days || ' day') > upload_date AND
+ DATETIME(media_downloaded_at, '-' || redownload_delay_days || ' day') < upload_date
+ """)
+ )
+ end
+
+ def where_culling_not_prevented(query) do
where(query, [mi], mi.prevent_culling == false)
end
+ def where_not_culled(query) do
+ where(query, [mi], is_nil(mi.culled_at))
+ end
+
+ def where_media_not_redownloaded(query) do
+ where(query, [mi], is_nil(mi.media_redownloaded_at))
+ end
+
def with_id(query, id) do
where(query, [mi], mi.id == ^id)
end
@@ -59,6 +81,10 @@ defmodule Pinchflat.Media.MediaQuery do
where(query, [mi], mi.media_id in ^media_ids)
end
+ def with_media_downloaded_at(query) do
+ where(query, [mi], not is_nil(mi.media_downloaded_at))
+ end
+
def with_media_filepath(query) do
where(query, [mi], not is_nil(mi.media_filepath))
end
@@ -73,7 +99,7 @@ defmodule Pinchflat.Media.MediaQuery do
|> where([mi, source], is_nil(source.download_cutoff_date) or mi.upload_date >= source.download_cutoff_date)
end
- def with_no_prevented_download(query) do
+ def where_download_not_prevented(query) do
where(query, [mi], mi.prevent_download == false)
end
@@ -129,9 +155,9 @@ defmodule Pinchflat.Media.MediaQuery do
)
end
- def with_media_pending_download(query) do
+ def where_pending_download(query) do
query
- |> with_no_prevented_download()
+ |> where_download_not_prevented()
|> with_no_media_filepath()
|> with_upload_date_after_source_cutoff()
|> with_format_matching_profile_preference()
diff --git a/lib/pinchflat/notifications/source_notifications.ex b/lib/pinchflat/notifications/source_notifications.ex
index e39841d..1205858 100644
--- a/lib/pinchflat/notifications/source_notifications.ex
+++ b/lib/pinchflat/notifications/source_notifications.ex
@@ -59,7 +59,7 @@ defmodule Pinchflat.Notifications.SourceNotifications do
defp pending_media_item_count(source) do
MediaQuery.new()
|> MediaQuery.for_source(source)
- |> MediaQuery.with_media_pending_download()
+ |> MediaQuery.where_pending_download()
|> Repo.aggregate(:count)
end
diff --git a/lib/pinchflat/profiles/media_profile.ex b/lib/pinchflat/profiles/media_profile.ex
index 57a3b7f..0b351a9 100644
--- a/lib/pinchflat/profiles/media_profile.ex
+++ b/lib/pinchflat/profiles/media_profile.ex
@@ -26,12 +26,14 @@ defmodule Pinchflat.Profiles.MediaProfile do
shorts_behaviour
livestream_behaviour
preferred_resolution
+ redownload_delay_days
)a
@required_fields ~w(name output_path_template)a
schema "media_profiles" do
field :name, :string
+ field :redownload_delay_days, :integer
field :output_path_template, :string,
default: "/{{ source_custom_name }}/{{ upload_yyyy_mm_dd }} {{ title }}/{{ title }} [{{ id }}].{{ ext }}"
@@ -60,7 +62,6 @@ defmodule Pinchflat.Profiles.MediaProfile do
# See `build_format_clauses` in the Media context for more.
field :shorts_behaviour, Ecto.Enum, values: ~w(include exclude only)a, default: :include
field :livestream_behaviour, Ecto.Enum, values: ~w(include exclude only)a, default: :include
-
field :preferred_resolution, Ecto.Enum, values: ~w(2160p 1080p 720p 480p 360p audio)a, default: :"1080p"
has_many :sources, Source
@@ -75,6 +76,7 @@ defmodule Pinchflat.Profiles.MediaProfile do
|> validate_required(@required_fields)
# Ensures it ends with `.{{ ext }}` or `.%(ext)s` or similar (with a little wiggle room)
|> validate_format(:output_path_template, ext_regex(), message: "must end with .{{ ext }}")
+ |> validate_number(:redownload_delay_days, greater_than_or_equal_to: 0)
|> unique_constraint(:name)
end
diff --git a/lib/pinchflat_web/controllers/media_profiles/media_profile_html/media_profile_form.html.heex b/lib/pinchflat_web/controllers/media_profiles/media_profile_html/media_profile_form.html.heex
index 36d99cc..78a6c5f 100644
--- a/lib/pinchflat_web/controllers/media_profiles/media_profile_html/media_profile_form.html.heex
+++ b/lib/pinchflat_web/controllers/media_profiles/media_profile_html/media_profile_form.html.heex
@@ -203,6 +203,17 @@
/>
+