[Enhancement] Redownload new media after a delay (#173)

* Added redownload-related columns

* Added methods for fetching re-downloadable media items

* Filled out redownload worker + tests

* Added redownload worker to config.exs cron

* Added to UI and README
This commit is contained in:
Kieran 2024-04-10 17:54:45 -07:00 committed by GitHub
parent a2bcd454c7
commit e841f39cf2
15 changed files with 349 additions and 30 deletions

View file

@ -49,6 +49,8 @@ If it doesn't work for your use case, please make a feature request! You can als
- Supports downloading audio content
- Custom rules for handling YouTube Shorts and livestreams
- Apprise support for notifications
- Allows automatically redownloading new media after a set period
- This can help improve the download quality of new content or improve SponsorBlock tags
- Optionally automatically delete old content ([docs](https://github.com/kieraneglin/pinchflat/wiki/Automatically-Delete-Media))
- Advanced options like setting cutoff dates and filtering by title
- Reliable hands-off operation

View file

@ -52,7 +52,8 @@ config :pinchflat, Oban,
{Oban.Plugins.Pruner, max_age: 30 * 24 * 60 * 60},
{Oban.Plugins.Cron,
crontab: [
{"@daily", Pinchflat.Downloading.MediaRetentionWorker}
{"0 1 * * *", Pinchflat.Downloading.MediaRetentionWorker},
{"0 2 * * *", Pinchflat.Downloading.MediaRedownloadWorker}
]}
],
# TODO: consider making this an env var or something?

View file

@ -35,14 +35,17 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do
"""
@impl Oban.Worker
def perform(%Oban.Job{args: %{"id" => media_item_id} = args}) do
should_force = Map.get(args, "force", false)
is_redownload = Map.get(args, "redownload?", false)
media_item =
media_item_id
|> Media.get_media_item!()
|> Repo.preload(:source)
# If the source or media item is set to not download media, perform a no-op unless forced
if (media_item.source.download_media && !media_item.prevent_download) || args["force"] do
download_media_and_schedule_jobs(media_item)
if (media_item.source.download_media && !media_item.prevent_download) || should_force do
download_media_and_schedule_jobs(media_item, is_redownload)
else
:ok
end
@ -51,10 +54,13 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do
Ecto.StaleEntryError -> Logger.info("#{__MODULE__} discarded: media item #{media_item_id} stale")
end
defp download_media_and_schedule_jobs(media_item) do
defp download_media_and_schedule_jobs(media_item, is_redownload) do
case MediaDownloader.download_for_media_item(media_item) do
{:ok, updated_media_item} ->
compute_and_save_media_filesize(updated_media_item)
Media.update_media_item(updated_media_item, %{
media_size_bytes: compute_media_filesize(updated_media_item),
media_redownloaded_at: get_redownloaded_at(is_redownload)
})
{:ok, updated_media_item}
@ -66,13 +72,21 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do
end
end
defp compute_and_save_media_filesize(media_item) do
defp compute_media_filesize(media_item) do
case File.stat(media_item.media_filepath) do
{:ok, %{size: size}} ->
Media.update_media_item(media_item, %{media_size_bytes: size})
size
_ ->
:ok
nil
end
end
defp get_redownloaded_at(is_redownload) do
if is_redownload do
DateTime.utc_now()
else
nil
end
end
end

View file

@ -0,0 +1,31 @@
defmodule Pinchflat.Downloading.MediaRedownloadWorker do
@moduledoc false
use Oban.Worker,
queue: :media_fetching,
unique: [period: :infinity, states: [:available, :scheduled, :retryable, :executing]],
tags: ["media_item", "media_fetching"]
require Logger
alias Pinchflat.Media
alias Pinchflat.Downloading.MediaDownloadWorker
@doc """
Redownloads media items that are eligible for redownload.
This worker is scheduled to run daily via the Oban Cron plugin
and it should run _after_ the retention worker.
Returns :ok
"""
@impl Oban.Worker
def perform(%Oban.Job{}) do
redownloadable_media = Media.list_redownloadable_media_items()
Logger.info("Redownloading #{length(redownloadable_media)} media items")
Enum.each(redownloadable_media, fn media_item ->
MediaDownloadWorker.kickoff_with_task(media_item, %{redownload?: true})
end)
end
end

View file

@ -31,15 +31,39 @@ defmodule Pinchflat.Media do
def list_cullable_media_items do
MediaQuery.new()
|> MediaQuery.with_media_filepath()
|> MediaQuery.with_passed_retention_period()
|> MediaQuery.with_no_culling_prevention()
|> MediaQuery.where_past_retention_period()
|> MediaQuery.where_culling_not_prevented()
|> Repo.all()
end
@doc """
Returns a list of media_items that are redownloadable based on the redownload delay
of the media_profile their source belongs to.
The logic is that a media_item is past_redownload_delay if the media_item's
upload_date is at least redownload_delay_days ago AND
`media_downloaded_at` - `redownload_delay_days` is before the media_item's `upload_date`.
This logic grabs media that we've recently downloaded AND is recently uploaded, but
doesn't grab media that we've recently downloaded and was uploaded a long time ago.
This also makes things work as expected when downloading media from a source for the
first time.
Returns [%MediaItem{}, ...]
"""
def list_redownloadable_media_items do
MediaQuery.new()
|> MediaQuery.with_media_downloaded_at()
|> MediaQuery.where_download_not_prevented()
|> MediaQuery.where_not_culled()
|> MediaQuery.where_media_not_redownloaded()
|> MediaQuery.where_past_redownload_delay()
|> Repo.all()
end
@doc """
Returns a list of pending media_items for a given source, where
pending means the `media_filepath` is `nil` AND the media_item
matches satisfies `MediaQuery.with_media_pending_download`. You
matches satisfies `MediaQuery.where_pending_download`. You
should really check out that function if you need to know more
because it has a lot going on.
@ -48,7 +72,7 @@ defmodule Pinchflat.Media do
def list_pending_media_items_for(%Source{} = source) do
MediaQuery.new()
|> MediaQuery.for_source(source)
|> MediaQuery.with_media_pending_download()
|> MediaQuery.where_pending_download()
|> Repo.all()
end
@ -66,7 +90,7 @@ defmodule Pinchflat.Media do
MediaQuery.new()
|> MediaQuery.with_id(media_item.id)
|> MediaQuery.with_media_pending_download()
|> MediaQuery.where_pending_download()
|> Repo.exists?()
end

View file

@ -34,7 +34,8 @@ defmodule Pinchflat.Media.MediaItem do
# These are user or system controlled fields
:prevent_download,
:prevent_culling,
:culled_at
:culled_at,
:media_redownloaded_at
]
# Pretty much all the fields captured at index are required.
@required_fields ~w(
@ -61,6 +62,7 @@ defmodule Pinchflat.Media.MediaItem do
field :livestream, :boolean, default: false
field :short_form_content, :boolean, default: false
field :media_downloaded_at, :utc_datetime
field :media_redownloaded_at, :utc_datetime
field :upload_date, :date
field :duration_seconds, :integer

View file

@ -15,7 +15,7 @@ defmodule Pinchflat.Media.MediaQuery do
# Prefixes:
# - for_* - belonging to a certain record
# - join_* - for joining on a certain record
# - with_* - for filtering based on full, concrete attributes
# - with_*, where_* - for filtering based on full, concrete attributes
# - matching_* - for filtering based on partial attributes (e.g. LIKE, regex, full-text search)
#
# Suffixes:
@ -33,24 +33,46 @@ defmodule Pinchflat.Media.MediaQuery do
from(mi in query, join: s in assoc(mi, :source), as: :sources)
end
def with_passed_retention_period(query) do
def where_past_retention_period(query) do
query
|> require_assoc(:source)
|> where(
[mi, source],
fragment(
"IFNULL(?, 0) > 0 AND DATETIME('now', '-' || ? || ' day') > ?",
source.retention_period_days,
source.retention_period_days,
mi.media_downloaded_at
)
fragment("""
IFNULL(retention_period_days, 0) > 0 AND
DATETIME('now', '-' || retention_period_days || ' day') > media_downloaded_at
""")
)
end
def with_no_culling_prevention(query) do
def where_past_redownload_delay(query) do
query
|> require_assoc(:source)
|> require_assoc(:media_profile)
|> where(
[_mi, _source, _media_profile],
# Returns media items where the upload_date is at least redownload_delay_days ago AND
# downloaded_at minus the redownload_delay_days is before the upload date
fragment("""
IFNULL(redownload_delay_days, 0) > 0 AND
DATETIME('now', '-' || redownload_delay_days || ' day') > upload_date AND
DATETIME(media_downloaded_at, '-' || redownload_delay_days || ' day') < upload_date
""")
)
end
def where_culling_not_prevented(query) do
where(query, [mi], mi.prevent_culling == false)
end
def where_not_culled(query) do
where(query, [mi], is_nil(mi.culled_at))
end
def where_media_not_redownloaded(query) do
where(query, [mi], is_nil(mi.media_redownloaded_at))
end
def with_id(query, id) do
where(query, [mi], mi.id == ^id)
end
@ -59,6 +81,10 @@ defmodule Pinchflat.Media.MediaQuery do
where(query, [mi], mi.media_id in ^media_ids)
end
def with_media_downloaded_at(query) do
where(query, [mi], not is_nil(mi.media_downloaded_at))
end
def with_media_filepath(query) do
where(query, [mi], not is_nil(mi.media_filepath))
end
@ -73,7 +99,7 @@ defmodule Pinchflat.Media.MediaQuery do
|> where([mi, source], is_nil(source.download_cutoff_date) or mi.upload_date >= source.download_cutoff_date)
end
def with_no_prevented_download(query) do
def where_download_not_prevented(query) do
where(query, [mi], mi.prevent_download == false)
end
@ -129,9 +155,9 @@ defmodule Pinchflat.Media.MediaQuery do
)
end
def with_media_pending_download(query) do
def where_pending_download(query) do
query
|> with_no_prevented_download()
|> where_download_not_prevented()
|> with_no_media_filepath()
|> with_upload_date_after_source_cutoff()
|> with_format_matching_profile_preference()

View file

@ -59,7 +59,7 @@ defmodule Pinchflat.Notifications.SourceNotifications do
defp pending_media_item_count(source) do
MediaQuery.new()
|> MediaQuery.for_source(source)
|> MediaQuery.with_media_pending_download()
|> MediaQuery.where_pending_download()
|> Repo.aggregate(:count)
end

View file

@ -26,12 +26,14 @@ defmodule Pinchflat.Profiles.MediaProfile do
shorts_behaviour
livestream_behaviour
preferred_resolution
redownload_delay_days
)a
@required_fields ~w(name output_path_template)a
schema "media_profiles" do
field :name, :string
field :redownload_delay_days, :integer
field :output_path_template, :string,
default: "/{{ source_custom_name }}/{{ upload_yyyy_mm_dd }} {{ title }}/{{ title }} [{{ id }}].{{ ext }}"
@ -60,7 +62,6 @@ defmodule Pinchflat.Profiles.MediaProfile do
# See `build_format_clauses` in the Media context for more.
field :shorts_behaviour, Ecto.Enum, values: ~w(include exclude only)a, default: :include
field :livestream_behaviour, Ecto.Enum, values: ~w(include exclude only)a, default: :include
field :preferred_resolution, Ecto.Enum, values: ~w(2160p 1080p 720p 480p 360p audio)a, default: :"1080p"
has_many :sources, Source
@ -75,6 +76,7 @@ defmodule Pinchflat.Profiles.MediaProfile do
|> validate_required(@required_fields)
# Ensures it ends with `.{{ ext }}` or `.%(ext)s` or similar (with a little wiggle room)
|> validate_format(:output_path_template, ext_regex(), message: "must end with .{{ ext }}")
|> validate_number(:redownload_delay_days, greater_than_or_equal_to: 0)
|> unique_constraint(:name)
end

View file

@ -203,6 +203,17 @@
/>
</section>
<section x-data="{ presets: { default: null, media_center: 1, audio: null, archiving: 1 } }">
<.input
field={f[:redownload_delay_days]}
type="number"
label="Redownload Delay (days)"
min="0"
help="Delay in days until new media is redownloaded. Redownloading new media can improve its quality or SponsorBlock tags. Leave blank to not redownload"
x-init="$watch('selectedPreset', p => p && ($el.value = presets[p]))"
/>
</section>
<h3 class="mt-8 text-2xl text-black dark:text-white">
Media Center Options
</h3>

View file

@ -63,7 +63,7 @@ defmodule PinchflatWeb.Sources.SourceController do
pending_media =
MediaQuery.new()
|> MediaQuery.for_source(source)
|> MediaQuery.with_media_pending_download()
|> MediaQuery.where_pending_download()
|> order_by(desc: :id)
|> limit(100)
|> Repo.all()

View file

@ -0,0 +1,13 @@
defmodule Pinchflat.Repo.Migrations.AddRedownloadedFields do
use Ecto.Migration
def change do
alter table(:media_profiles) do
add :redownload_delay_days, :integer
end
alter table(:media_items) do
add :media_redownloaded_at, :utc_datetime
end
end
end

View file

@ -62,7 +62,9 @@ defmodule Pinchflat.Downloading.MediaDownloadWorkerTest do
assert media_item.media_filepath == nil
perform_job(MediaDownloadWorker, %{id: media_item.id})
assert Repo.reload(media_item).media_filepath != nil
media_item = Repo.reload(media_item)
assert media_item.media_filepath != nil
end
test "it saves the metadata to the media_item", %{media_item: media_item} do
@ -149,6 +151,28 @@ defmodule Pinchflat.Downloading.MediaDownloadWorkerTest do
assert media_item.media_size_bytes > 0
end
test "saves redownloaded_at if this is for a redownload", %{media_item: media_item} do
expect(YtDlpRunnerMock, :run, fn _url, _opts, _ot, _addl ->
{:ok, render_metadata(:media_metadata)}
end)
perform_job(MediaDownloadWorker, %{id: media_item.id, redownload?: true})
media_item = Repo.reload(media_item)
assert media_item.media_redownloaded_at != nil
end
test "doesn't save redownloaded_at if this is not for a redownload", %{media_item: media_item} do
expect(YtDlpRunnerMock, :run, fn _url, _opts, _ot, _addl ->
{:ok, render_metadata(:media_metadata)}
end)
perform_job(MediaDownloadWorker, %{id: media_item.id})
media_item = Repo.reload(media_item)
assert media_item.media_redownloaded_at == nil
end
test "does not blow up if the record doesn't exist" do
assert :ok = perform_job(MediaDownloadWorker, %{id: 0})
end

View file

@ -0,0 +1,44 @@
defmodule Pinchflat.Downloading.MediaRedownloadWorkerTest do
use Pinchflat.DataCase
import Pinchflat.MediaFixtures
import Pinchflat.SourcesFixtures
import Pinchflat.ProfilesFixtures
alias Pinchflat.Downloading.MediaDownloadWorker
alias Pinchflat.Downloading.MediaRedownloadWorker
describe "perform/1" do
test "kicks off a task for redownloadable media items" do
media_profile = media_profile_fixture(%{redownload_delay_days: 4})
source = source_fixture(%{media_profile_id: media_profile.id, inserted_at: now_minus(10, :days)})
media_item =
media_item_fixture(%{
source_id: source.id,
upload_date: now_minus(6, :days),
media_downloaded_at: now_minus(5, :days)
})
perform_job(MediaRedownloadWorker, %{})
assert [_] = all_enqueued(worker: MediaDownloadWorker, args: %{id: media_item.id, redownload?: true})
end
test "does not kickoff a task for non-redownloadable media items" do
media_profile = media_profile_fixture(%{redownload_delay_days: 4})
source = source_fixture(%{media_profile_id: media_profile.id, inserted_at: now_minus(10, :days)})
_media_item =
media_item_fixture(%{
source_id: source.id,
upload_date: now_minus(6, :days),
media_downloaded_at: now_minus(1, :day)
})
perform_job(MediaRedownloadWorker, %{})
assert [] = all_enqueued(worker: MediaDownloadWorker)
end
end
end

View file

@ -130,6 +130,131 @@ defmodule Pinchflat.MediaTest do
end
end
describe "list_redownloadable_media_items/0" do
setup do
media_profile = media_profile_fixture(%{redownload_delay_days: 4})
source = source_fixture(%{media_profile_id: media_profile.id, inserted_at: now_minus(10, :days)})
{:ok, %{media_profile: media_profile, source: source}}
end
test "returns media eligible for redownload", %{source: source} do
media_item =
media_item_fixture(%{
source_id: source.id,
upload_date: now_minus(6, :days),
media_downloaded_at: now_minus(5, :days)
})
assert Media.list_redownloadable_media_items() == [media_item]
end
test "returns media items that were downloaded in past but still meet redownload delay", %{source: source} do
media_item =
media_item_fixture(%{
source_id: source.id,
upload_date: now_minus(20, :days),
media_downloaded_at: now_minus(19, :days)
})
assert Media.list_redownloadable_media_items() == [media_item]
end
test "does not return media items without a media_downloaded_at", %{source: source} do
_media_item =
media_item_fixture(%{
source_id: source.id,
upload_date: now_minus(5, :days),
media_downloaded_at: nil
})
assert Media.list_redownloadable_media_items() == []
end
test "does not return media items that are set to prevent download", %{source: source} do
_media_item =
media_item_fixture(%{
source_id: source.id,
upload_date: now_minus(5, :days),
media_downloaded_at: now(),
prevent_download: true
})
assert Media.list_redownloadable_media_items() == []
end
test "does not return media items that have been culled", %{source: source} do
_media_item =
media_item_fixture(%{
source_id: source.id,
upload_date: now_minus(5, :days),
media_downloaded_at: now(),
culled_at: now()
})
assert Media.list_redownloadable_media_items() == []
end
test "does not return media items before the download delay", %{source: source} do
_media_item =
media_item_fixture(%{
source_id: source.id,
upload_date: now_minus(3, :days),
media_downloaded_at: now_minus(3, :days)
})
assert Media.list_redownloadable_media_items() == []
end
test "does not return media items that have already been redownloaded", %{source: source} do
_media_item =
media_item_fixture(%{
source_id: source.id,
upload_date: now_minus(5, :days),
media_downloaded_at: now(),
media_redownloaded_at: now()
})
assert Media.list_redownloadable_media_items() == []
end
test "does not return media items that were first downloaded well after the upload_date", %{source: source} do
_media_item =
media_item_fixture(%{
source_id: source.id,
media_downloaded_at: now(),
upload_date: now_minus(20, :days)
})
assert Media.list_redownloadable_media_items() == []
end
test "does not return media items that were recently uploaded", %{source: source} do
_media_item =
media_item_fixture(%{
source_id: source.id,
media_downloaded_at: now(),
upload_date: now_minus(2, :days)
})
assert Media.list_redownloadable_media_items() == []
end
test "does not return media items without a redownload delay" do
media_profile = media_profile_fixture(%{redownload_delay_days: nil})
source = source_fixture(%{media_profile_id: media_profile.id})
_media_item =
media_item_fixture(%{
source_id: source.id,
upload_date: now_minus(6, :days),
media_downloaded_at: now_minus(5, :days)
})
assert Media.list_redownloadable_media_items() == []
end
end
describe "list_pending_media_items_for/1" do
test "it returns pending without a filepath for a given source" do
source = source_fixture()