[Bugfix] Ensure livestreams aren't downloaded until they're finished processing (#485)

* Added logic to ignore downloads that aren't in the right live state

* Added tests for get_downloadable_status method

* Added tests for media downloader module

* Added tests to download worker modeule
This commit is contained in:
Kieran 2024-11-26 11:56:33 -08:00 committed by GitHub
parent d9c48370df
commit bfb27427ce
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 156 additions and 3 deletions

View file

@ -94,6 +94,9 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do
{:recovered, _} ->
{:error, :retry}
{:error, :unsuitable_for_download} ->
{:ok, :non_retry}
{:error, message} ->
action_on_error(message)
end

View file

@ -37,6 +37,13 @@ defmodule Pinchflat.Downloading.MediaDownloader do
{:ok, parsed_json} ->
update_media_item_from_parsed_json(media_with_preloads, parsed_json)
{:error, :unsuitable_for_download} ->
Logger.warning(
"Media item ##{media_with_preloads.id} isn't suitable for download yet. May be an active or processing live stream"
)
{:error, :unsuitable_for_download}
{:error, message, _exit_code} ->
Logger.error("yt-dlp download error for media item ##{media_with_preloads.id}: #{inspect(message)}")
@ -108,7 +115,11 @@ defmodule Pinchflat.Downloading.MediaDownloader do
{:ok, options} = DownloadOptionBuilder.build(item_with_preloads, override_opts)
runner_opts = [output_filepath: output_filepath, use_cookies: item_with_preloads.source.use_cookies]
YtDlpMedia.download(url, options, runner_opts)
case YtDlpMedia.get_downloadable_status(url) do
{:ok, :downloadable} -> YtDlpMedia.download(url, options, runner_opts)
{:ok, :ignorable} -> {:error, :unsuitable_for_download}
err -> err
end
end
defp recoverable_errors do

View file

@ -49,6 +49,24 @@ defmodule Pinchflat.YtDlp.Media do
end
end
@doc """
Determines if the media at the given URL is ready to be downloaded.
Common examples of non-downloadable media are upcoming or in-progress live streams.
Returns {:ok, :downloadable | :ignorable} | {:error, any}
"""
def get_downloadable_status(url) do
case backend_runner().run(url, [:simulate, :skip_download], "%(.{live_status})j") do
{:ok, output} ->
output
|> Phoenix.json_library().decode!()
|> parse_downloadable_status()
err ->
err
end
end
@doc """
Downloads a thumbnail for a single piece of media. Usually used for
downloading thumbnails for internal use
@ -71,11 +89,10 @@ defmodule Pinchflat.YtDlp.Media do
Returns {:ok, %Media{}} | {:error, any, ...}.
"""
def get_media_attributes(url, command_opts \\ [], addl_opts \\ []) do
runner = Application.get_env(:pinchflat, :yt_dlp_runner)
all_command_opts = [:simulate, :skip_download] ++ command_opts
output_template = indexing_output_template()
case runner.run(url, all_command_opts, output_template, addl_opts) do
case backend_runner().run(url, all_command_opts, output_template, addl_opts) do
{:ok, output} ->
output
|> Phoenix.json_library().decode!()
@ -147,6 +164,16 @@ defmodule Pinchflat.YtDlp.Media do
defp parse_uploaded_at(%{"upload_date" => nil}), do: nil
defp parse_uploaded_at(response), do: MetadataFileHelpers.parse_upload_date(response["upload_date"])
defp parse_downloadable_status(response) do
case response["live_status"] do
status when status in ["is_live", "is_upcoming", "post_live"] -> {:ok, :ignorable}
status when status in ["was_live", "not_live"] -> {:ok, :downloadable}
# This preserves my tenuous support for non-youtube sources.
nil -> {:ok, :downloadable}
_ -> {:error, "Unknown live status: #{response["live_status"]}"}
end
end
defp backend_runner do
# This approach lets us mock the command for testing
Application.get_env(:pinchflat, :yt_dlp_runner)