[Enhancement] Track the predicted final filepath for indexed media items (#461)

* Added ability to pass additional yt-dlp options to indexing step

* Added predicted_filename to media struct

* WIP added ability to predict filepath to source indexing

* renamed predicted_filepath

* Added the ability to predict filepath when fast indexing

* Add predicted_media_filepath to media items table

* Addressed TODOs
This commit is contained in:
Kieran 2024-11-08 09:42:59 -08:00 committed by GitHub
parent 8c0dd0bb6b
commit 83c10b2b00
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 134 additions and 21 deletions

View file

@ -34,21 +34,38 @@ defmodule Pinchflat.Downloading.DownloadOptionBuilder do
@doc """
Builds the output path for yt-dlp to download media based on the given source's
media profile. Uses the source's override output path template if it exists.
or media_item's media profile. Uses the source's override output path template if it exists.
Accepts a %MediaItem{} or %Source{} struct. If a %Source{} struct is passed, it
will use a default %MediaItem{} struct with the given source.
Returns binary()
"""
def build_output_path_for(%Source{} = source_with_preloads) do
build_output_path_for(%MediaItem{source: source_with_preloads})
end
def build_output_path_for(%MediaItem{} = media_item_with_preloads) do
output_path_template = Sources.output_path_template(media_item_with_preloads.source)
build_output_path(output_path_template, media_item_with_preloads)
end
def build_output_path_for(%Source{} = source_with_preloads) do
build_output_path_for(%MediaItem{source: source_with_preloads})
@doc """
Builds the quality options for yt-dlp to download media based on the given source's
or media_item's media profile. Useful for helping predict final filepath of downloaded
media.
returns [Keyword.t()]
"""
def build_quality_options_for(%Source{} = source_with_preloads) do
build_quality_options_for(%MediaItem{source: source_with_preloads})
end
def build_quality_options_for(%MediaItem{} = media_item_with_preloads) do
media_profile = media_item_with_preloads.source.media_profile
quality_options(media_profile)
end
defp default_options(override_opts) do

View file

@ -15,6 +15,7 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpers do
alias Pinchflat.FastIndexing.YoutubeRss
alias Pinchflat.FastIndexing.YoutubeApi
alias Pinchflat.Downloading.DownloadingHelpers
alias Pinchflat.Downloading.DownloadOptionBuilder
alias Pinchflat.YtDlp.Media, as: YtDlpMedia
@ -27,6 +28,10 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpers do
downloaded_.
"""
def kickoff_download_tasks_from_youtube_rss_feed(%Source{} = source) do
# The media_profile is needed to determine the quality options to _then_ determine a more
# accurate predicted filepath
source = Repo.preload(source, [:media_profile])
{:ok, media_ids} = get_recent_media_ids(source)
existing_media_items = list_media_items_by_media_id_for(source, media_ids)
new_media_ids = media_ids -- Enum.map(existing_media_items, & &1.media_id)
@ -68,7 +73,11 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpers do
defp create_media_item_from_media_id(source, media_id) do
url = "https://www.youtube.com/watch?v=#{media_id}"
case YtDlpMedia.get_media_attributes(url, use_cookies: source.use_cookies) do
command_opts =
[output: DownloadOptionBuilder.build_output_path_for(source)] ++
DownloadOptionBuilder.build_quality_options_for(source)
case YtDlpMedia.get_media_attributes(url, command_opts, use_cookies: source.use_cookies) do
{:ok, media_attrs} ->
Media.create_media_item_from_backend_attrs(source, media_attrs)

View file

@ -31,6 +31,7 @@ defmodule Pinchflat.Media.MediaItem do
:uploaded_at,
:upload_date_index,
:duration_seconds,
:predicted_media_filepath,
# these fields are captured only on download
:media_downloaded_at,
:media_filepath,
@ -76,6 +77,7 @@ defmodule Pinchflat.Media.MediaItem do
field :duration_seconds, :integer
field :playlist_index, :integer, default: 0
field :predicted_media_filepath, :string
field :media_filepath, :string
field :media_size_bytes, :integer
field :thumbnail_filepath, :string

View file

@ -16,6 +16,7 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do
alias Pinchflat.YtDlp.MediaCollection
alias Pinchflat.Downloading.DownloadingHelpers
alias Pinchflat.SlowIndexing.FileFollowerServer
alias Pinchflat.Downloading.DownloadOptionBuilder
alias Pinchflat.SlowIndexing.MediaCollectionIndexingWorker
alias Pinchflat.YtDlp.Media, as: YtDlpMedia
@ -56,6 +57,9 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do
Returns [%MediaItem{} | %Ecto.Changeset{}]
"""
def index_and_enqueue_download_for_media_items(%Source{} = source) do
# The media_profile is needed to determine the quality options to _then_ determine a more
# accurate predicted filepath
source = Repo.preload(source, [:media_profile])
# See the method definition below for more info on how file watchers work
# (important reading if you're not familiar with it)
{:ok, media_attributes} = setup_file_watcher_and_kickoff_indexing(source)
@ -94,8 +98,13 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do
{:ok, pid} = FileFollowerServer.start_link()
handler = fn filepath -> setup_file_follower_watcher(pid, filepath, source) end
command_opts =
[output: DownloadOptionBuilder.build_output_path_for(source)] ++
DownloadOptionBuilder.build_quality_options_for(source)
runner_opts = [file_listener_handler: handler, use_cookies: source.use_cookies]
result = MediaCollection.get_media_attributes_for_collection(source.original_url, runner_opts)
result = MediaCollection.get_media_attributes_for_collection(source.original_url, command_opts, runner_opts)
FileFollowerServer.stop(pid)

View file

@ -11,7 +11,8 @@ defmodule Pinchflat.YtDlp.Media do
:livestream,
:short_form_content,
:uploaded_at,
:duration_seconds
:duration_seconds,
:predicted_media_filepath
]
defstruct [
@ -23,7 +24,8 @@ defmodule Pinchflat.YtDlp.Media do
:short_form_content,
:uploaded_at,
:duration_seconds,
:playlist_index
:playlist_index,
:predicted_media_filepath
]
alias __MODULE__
@ -63,15 +65,17 @@ defmodule Pinchflat.YtDlp.Media do
@doc """
Returns a map representing the media at the given URL.
Optionally takes a list of additional command options to pass to yt-dlp
or configuration-related options to pass to the runner.
Returns {:ok, %Media{}} | {:error, any, ...}.
"""
def get_media_attributes(url, addl_opts \\ []) do
def get_media_attributes(url, command_opts \\ [], addl_opts \\ []) do
runner = Application.get_env(:pinchflat, :yt_dlp_runner)
command_opts = [:simulate, :skip_download]
all_command_opts = [:simulate, :skip_download] ++ command_opts
output_template = indexing_output_template()
case runner.run(url, command_opts, output_template, addl_opts) do
case runner.run(url, all_command_opts, output_template, addl_opts) do
{:ok, output} ->
output
|> Phoenix.json_library().decode!()
@ -91,7 +95,7 @@ defmodule Pinchflat.YtDlp.Media do
if something is a short via the URL again
"""
def indexing_output_template do
"%(.{id,title,live_status,original_url,description,aspect_ratio,duration,upload_date,timestamp,playlist_index})j"
"%(.{id,title,live_status,original_url,description,aspect_ratio,duration,upload_date,timestamp,playlist_index,filename})j"
end
@doc """
@ -110,7 +114,8 @@ defmodule Pinchflat.YtDlp.Media do
duration_seconds: response["duration"] && round(response["duration"]),
short_form_content: response["original_url"] && short_form_content?(response),
uploaded_at: response["upload_date"] && parse_uploaded_at(response),
playlist_index: response["playlist_index"] || 0
playlist_index: response["playlist_index"] || 0,
predicted_media_filepath: response["filename"]
}
end

View file

@ -11,20 +11,23 @@ defmodule Pinchflat.YtDlp.MediaCollection do
@doc """
Returns a list of maps representing the media in the collection.
Optionally takes a list of additional command options to pass to yt-dlp
or configuration-related options to pass to the runner.
Options:
Runner Options:
- :file_listener_handler - a function that will be called with the path to the
file that will be written to when yt-dlp is done. This is useful for
setting up a file watcher to know when the file is ready to be read.
- :use_cookies - whether or not to use user-provided cookies when fetching the media details
Returns {:ok, [map()]} | {:error, any, ...}.
"""
def get_media_attributes_for_collection(url, addl_opts \\ []) do
def get_media_attributes_for_collection(url, command_opts \\ [], addl_opts \\ []) do
runner = Application.get_env(:pinchflat, :yt_dlp_runner)
# `ignore_no_formats_error` is necessary because yt-dlp will error out if
# the first video has not released yet (ie: is a premier). We don't care about
# available formats since we're just getting the media details
command_opts = [:simulate, :skip_download, :ignore_no_formats_error, :no_warnings]
all_command_opts = [:simulate, :skip_download, :ignore_no_formats_error, :no_warnings] ++ command_opts
use_cookies = Keyword.get(addl_opts, :use_cookies, false)
output_template = YtDlpMedia.indexing_output_template()
output_filepath = FilesystemUtils.generate_metadata_tmpfile(:json)
@ -35,7 +38,7 @@ defmodule Pinchflat.YtDlp.MediaCollection do
file_listener_handler.(output_filepath)
end
case runner.run(url, command_opts, output_template, runner_opts) do
case runner.run(url, all_command_opts, output_template, runner_opts) do
{:ok, output} ->
parsed_lines =
output

Binary file not shown.

Before

Width:  |  Height:  |  Size: 433 KiB

After

Width:  |  Height:  |  Size: 449 KiB

Before After
Before After

View file

@ -0,0 +1,9 @@
defmodule Pinchflat.Repo.Migrations.AddPredictedMediaFilepathToMediaItems do
use Ecto.Migration
def change do
alter table(:media_items) do
add :predicted_media_filepath, :string
end
end
end

View file

@ -461,6 +461,22 @@ defmodule Pinchflat.Downloading.DownloadOptionBuilderTest do
end
end
describe "build_quality_options_for/1" do
test "builds quality options for a media item", %{media_item: media_item} do
options = DownloadOptionBuilder.build_quality_options_for(media_item)
assert {:format_sort, "res:1080,+codec:avc:m4a"} in options
assert {:remux_video, "mp4"} in options
end
test "builds quality options for a source", %{media_item: media_item} do
options = DownloadOptionBuilder.build_quality_options_for(media_item.source)
assert {:format_sort, "res:1080,+codec:avc:m4a"} in options
assert {:remux_video, "mp4"} in options
end
end
defp update_media_profile_attribute(media_item_with_preloads, attrs) do
media_item_with_preloads.source.media_profile
|> Profiles.change_media_profile(attrs)

View file

@ -61,6 +61,18 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpersTest do
assert [_] = Tasks.list_tasks_for(media_item, "MediaDownloadWorker")
end
test "passes the source's download options to the yt-dlp runner", %{source: source} do
expect(HTTPClientMock, :get, fn _url -> {:ok, "<yt:videoId>test_1</yt:videoId>"} end)
expect(YtDlpRunnerMock, :run, fn _url, opts, _ot, _addl_opts ->
assert {:output, "/tmp/test/media/%(title)S.%(ext)S"} in opts
assert {:remux_video, "mp4"} in opts
{:ok, media_attributes_return_fixture()}
end)
FastIndexingHelpers.kickoff_download_tasks_from_youtube_rss_feed(source)
end
test "sets use_cookies if the source uses cookies" do
expect(HTTPClientMock, :get, fn _url -> {:ok, "<yt:videoId>test_1</yt:videoId>"} end)

View file

@ -202,6 +202,16 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpersTest do
assert %Ecto.Changeset{} = changeset
end
test "passes the source's download options to the yt-dlp runner", %{source: source} do
expect(YtDlpRunnerMock, :run, fn _url, opts, _ot, _addl_opts ->
assert {:output, "/tmp/test/media/%(title)S.%(ext)S"} in opts
assert {:remux_video, "mp4"} in opts
{:ok, source_attributes_return_fixture()}
end)
SlowIndexingHelpers.index_and_enqueue_download_for_media_items(source)
end
test "sets use_cookies if the source uses cookies" do
expect(YtDlpRunnerMock, :run, fn _url, _opts, _ot, addl_opts ->
assert {:use_cookies, true} in addl_opts

View file

@ -35,6 +35,16 @@ defmodule Pinchflat.YtDlp.MediaCollectionTest do
assert {:error, "Big issue", 1} = MediaCollection.get_media_attributes_for_collection(@channel_url)
end
test "passes long additional command options" do
expect(YtDlpRunnerMock, :run, fn _url, opts, _ot, _addl_opts ->
assert :foo in opts
{:ok, ""}
end)
assert {:ok, _} = MediaCollection.get_media_attributes_for_collection(@channel_url, [:foo])
end
test "passes additional args to runner" do
expect(YtDlpRunnerMock, :run, fn _url, _opts, _ot, addl_opts ->
assert [{:output_filepath, filepath} | _] = addl_opts
@ -56,7 +66,7 @@ defmodule Pinchflat.YtDlp.MediaCollectionTest do
end
assert {:ok, _} =
MediaCollection.get_media_attributes_for_collection(@channel_url, file_listener_handler: handler)
MediaCollection.get_media_attributes_for_collection(@channel_url, [], file_listener_handler: handler)
assert_receive {:handler, filename}
assert String.ends_with?(filename, ".json")

View file

@ -120,13 +120,22 @@ defmodule Pinchflat.YtDlp.MediaTest do
assert {:ok, _} = Media.get_media_attributes(@media_url)
end
test "passes along additional command options" do
expect(YtDlpRunnerMock, :run, fn _url, opts, _ot, _addl ->
assert [:simulate, :skip_download, :custom_arg] = opts
{:ok, media_attributes_return_fixture()}
end)
assert {:ok, _} = Media.get_media_attributes(@media_url, [:custom_arg])
end
test "passes along additional options" do
expect(YtDlpRunnerMock, :run, fn _url, _opts, _ot, addl ->
assert [addl_arg: true] = addl
{:ok, media_attributes_return_fixture()}
end)
assert {:ok, _} = Media.get_media_attributes(@media_url, addl_arg: true)
assert {:ok, _} = Media.get_media_attributes(@media_url, [], addl_arg: true)
end
test "returns the error straight through when the command fails" do
@ -139,7 +148,7 @@ defmodule Pinchflat.YtDlp.MediaTest do
describe "indexing_output_template/0" do
test "contains all the greatest hits" do
attrs =
~w(id title live_status original_url description aspect_ratio duration upload_date timestamp playlist_index)a
~w(id title live_status original_url description aspect_ratio duration upload_date timestamp playlist_index filename)a
formatted_attrs = "%(.{#{Enum.join(attrs, ",")}})j"
@ -159,7 +168,8 @@ defmodule Pinchflat.YtDlp.MediaTest do
"duration" => 60,
"upload_date" => "20210101",
"timestamp" => 1_600_000_000,
"playlist_index" => 1
"playlist_index" => 1,
"filename" => "TiZPUDkDYbk.mp4"
}
assert %Media{
@ -171,7 +181,8 @@ defmodule Pinchflat.YtDlp.MediaTest do
short_form_content: false,
uploaded_at: ~U[2020-09-13 12:26:40Z],
duration_seconds: 60,
playlist_index: 1
playlist_index: 1,
predicted_media_filepath: "TiZPUDkDYbk.mp4"
} == Media.response_to_struct(response)
end