[Enhancement] Track the predicted final filepath for indexed media items (#461)

* Added ability to pass additional yt-dlp options to indexing step

* Added predicted_filename to media struct

* WIP added ability to predict filepath to source indexing

* renamed predicted_filepath

* Added the ability to predict filepath when fast indexing

* Add predicted_media_filepath to media items table

* Addressed TODOs
This commit is contained in:
Kieran 2024-11-08 09:42:59 -08:00 committed by GitHub
parent 8c0dd0bb6b
commit 83c10b2b00
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 134 additions and 21 deletions

View file

@ -34,21 +34,38 @@ defmodule Pinchflat.Downloading.DownloadOptionBuilder do
@doc """
Builds the output path for yt-dlp to download media based on the given source's
media profile. Uses the source's override output path template if it exists.
or media_item's media profile. Uses the source's override output path template if it exists.
Accepts a %MediaItem{} or %Source{} struct. If a %Source{} struct is passed, it
will use a default %MediaItem{} struct with the given source.
Returns binary()
"""
def build_output_path_for(%Source{} = source_with_preloads) do
build_output_path_for(%MediaItem{source: source_with_preloads})
end
def build_output_path_for(%MediaItem{} = media_item_with_preloads) do
output_path_template = Sources.output_path_template(media_item_with_preloads.source)
build_output_path(output_path_template, media_item_with_preloads)
end
def build_output_path_for(%Source{} = source_with_preloads) do
build_output_path_for(%MediaItem{source: source_with_preloads})
@doc """
Builds the quality options for yt-dlp to download media based on the given source's
or media_item's media profile. Useful for helping predict final filepath of downloaded
media.
returns [Keyword.t()]
"""
def build_quality_options_for(%Source{} = source_with_preloads) do
build_quality_options_for(%MediaItem{source: source_with_preloads})
end
def build_quality_options_for(%MediaItem{} = media_item_with_preloads) do
media_profile = media_item_with_preloads.source.media_profile
quality_options(media_profile)
end
defp default_options(override_opts) do

View file

@ -15,6 +15,7 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpers do
alias Pinchflat.FastIndexing.YoutubeRss
alias Pinchflat.FastIndexing.YoutubeApi
alias Pinchflat.Downloading.DownloadingHelpers
alias Pinchflat.Downloading.DownloadOptionBuilder
alias Pinchflat.YtDlp.Media, as: YtDlpMedia
@ -27,6 +28,10 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpers do
downloaded_.
"""
def kickoff_download_tasks_from_youtube_rss_feed(%Source{} = source) do
# The media_profile is needed to determine the quality options to _then_ determine a more
# accurate predicted filepath
source = Repo.preload(source, [:media_profile])
{:ok, media_ids} = get_recent_media_ids(source)
existing_media_items = list_media_items_by_media_id_for(source, media_ids)
new_media_ids = media_ids -- Enum.map(existing_media_items, & &1.media_id)
@ -68,7 +73,11 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpers do
defp create_media_item_from_media_id(source, media_id) do
url = "https://www.youtube.com/watch?v=#{media_id}"
case YtDlpMedia.get_media_attributes(url, use_cookies: source.use_cookies) do
command_opts =
[output: DownloadOptionBuilder.build_output_path_for(source)] ++
DownloadOptionBuilder.build_quality_options_for(source)
case YtDlpMedia.get_media_attributes(url, command_opts, use_cookies: source.use_cookies) do
{:ok, media_attrs} ->
Media.create_media_item_from_backend_attrs(source, media_attrs)

View file

@ -31,6 +31,7 @@ defmodule Pinchflat.Media.MediaItem do
:uploaded_at,
:upload_date_index,
:duration_seconds,
:predicted_media_filepath,
# these fields are captured only on download
:media_downloaded_at,
:media_filepath,
@ -76,6 +77,7 @@ defmodule Pinchflat.Media.MediaItem do
field :duration_seconds, :integer
field :playlist_index, :integer, default: 0
field :predicted_media_filepath, :string
field :media_filepath, :string
field :media_size_bytes, :integer
field :thumbnail_filepath, :string

View file

@ -16,6 +16,7 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do
alias Pinchflat.YtDlp.MediaCollection
alias Pinchflat.Downloading.DownloadingHelpers
alias Pinchflat.SlowIndexing.FileFollowerServer
alias Pinchflat.Downloading.DownloadOptionBuilder
alias Pinchflat.SlowIndexing.MediaCollectionIndexingWorker
alias Pinchflat.YtDlp.Media, as: YtDlpMedia
@ -56,6 +57,9 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do
Returns [%MediaItem{} | %Ecto.Changeset{}]
"""
def index_and_enqueue_download_for_media_items(%Source{} = source) do
# The media_profile is needed to determine the quality options to _then_ determine a more
# accurate predicted filepath
source = Repo.preload(source, [:media_profile])
# See the method definition below for more info on how file watchers work
# (important reading if you're not familiar with it)
{:ok, media_attributes} = setup_file_watcher_and_kickoff_indexing(source)
@ -94,8 +98,13 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do
{:ok, pid} = FileFollowerServer.start_link()
handler = fn filepath -> setup_file_follower_watcher(pid, filepath, source) end
command_opts =
[output: DownloadOptionBuilder.build_output_path_for(source)] ++
DownloadOptionBuilder.build_quality_options_for(source)
runner_opts = [file_listener_handler: handler, use_cookies: source.use_cookies]
result = MediaCollection.get_media_attributes_for_collection(source.original_url, runner_opts)
result = MediaCollection.get_media_attributes_for_collection(source.original_url, command_opts, runner_opts)
FileFollowerServer.stop(pid)

View file

@ -11,7 +11,8 @@ defmodule Pinchflat.YtDlp.Media do
:livestream,
:short_form_content,
:uploaded_at,
:duration_seconds
:duration_seconds,
:predicted_media_filepath
]
defstruct [
@ -23,7 +24,8 @@ defmodule Pinchflat.YtDlp.Media do
:short_form_content,
:uploaded_at,
:duration_seconds,
:playlist_index
:playlist_index,
:predicted_media_filepath
]
alias __MODULE__
@ -63,15 +65,17 @@ defmodule Pinchflat.YtDlp.Media do
@doc """
Returns a map representing the media at the given URL.
Optionally takes a list of additional command options to pass to yt-dlp
or configuration-related options to pass to the runner.
Returns {:ok, %Media{}} | {:error, any, ...}.
"""
def get_media_attributes(url, addl_opts \\ []) do
def get_media_attributes(url, command_opts \\ [], addl_opts \\ []) do
runner = Application.get_env(:pinchflat, :yt_dlp_runner)
command_opts = [:simulate, :skip_download]
all_command_opts = [:simulate, :skip_download] ++ command_opts
output_template = indexing_output_template()
case runner.run(url, command_opts, output_template, addl_opts) do
case runner.run(url, all_command_opts, output_template, addl_opts) do
{:ok, output} ->
output
|> Phoenix.json_library().decode!()
@ -91,7 +95,7 @@ defmodule Pinchflat.YtDlp.Media do
if something is a short via the URL again
"""
def indexing_output_template do
"%(.{id,title,live_status,original_url,description,aspect_ratio,duration,upload_date,timestamp,playlist_index})j"
"%(.{id,title,live_status,original_url,description,aspect_ratio,duration,upload_date,timestamp,playlist_index,filename})j"
end
@doc """
@ -110,7 +114,8 @@ defmodule Pinchflat.YtDlp.Media do
duration_seconds: response["duration"] && round(response["duration"]),
short_form_content: response["original_url"] && short_form_content?(response),
uploaded_at: response["upload_date"] && parse_uploaded_at(response),
playlist_index: response["playlist_index"] || 0
playlist_index: response["playlist_index"] || 0,
predicted_media_filepath: response["filename"]
}
end

View file

@ -11,20 +11,23 @@ defmodule Pinchflat.YtDlp.MediaCollection do
@doc """
Returns a list of maps representing the media in the collection.
Optionally takes a list of additional command options to pass to yt-dlp
or configuration-related options to pass to the runner.
Options:
Runner Options:
- :file_listener_handler - a function that will be called with the path to the
file that will be written to when yt-dlp is done. This is useful for
setting up a file watcher to know when the file is ready to be read.
- :use_cookies - whether or not to use user-provided cookies when fetching the media details
Returns {:ok, [map()]} | {:error, any, ...}.
"""
def get_media_attributes_for_collection(url, addl_opts \\ []) do
def get_media_attributes_for_collection(url, command_opts \\ [], addl_opts \\ []) do
runner = Application.get_env(:pinchflat, :yt_dlp_runner)
# `ignore_no_formats_error` is necessary because yt-dlp will error out if
# the first video has not released yet (ie: is a premier). We don't care about
# available formats since we're just getting the media details
command_opts = [:simulate, :skip_download, :ignore_no_formats_error, :no_warnings]
all_command_opts = [:simulate, :skip_download, :ignore_no_formats_error, :no_warnings] ++ command_opts
use_cookies = Keyword.get(addl_opts, :use_cookies, false)
output_template = YtDlpMedia.indexing_output_template()
output_filepath = FilesystemUtils.generate_metadata_tmpfile(:json)
@ -35,7 +38,7 @@ defmodule Pinchflat.YtDlp.MediaCollection do
file_listener_handler.(output_filepath)
end
case runner.run(url, command_opts, output_template, runner_opts) do
case runner.run(url, all_command_opts, output_template, runner_opts) do
{:ok, output} ->
parsed_lines =
output