mirror of
https://github.com/kieraneglin/pinchflat.git
synced 2026-01-23 02:24:24 +00:00
[Enhancement] Track the predicted final filepath for indexed media items (#461)
* Added ability to pass additional yt-dlp options to indexing step * Added predicted_filename to media struct * WIP added ability to predict filepath to source indexing * renamed predicted_filepath * Added the ability to predict filepath when fast indexing * Add predicted_media_filepath to media items table * Addressed TODOs
This commit is contained in:
parent
8c0dd0bb6b
commit
83c10b2b00
13 changed files with 134 additions and 21 deletions
|
|
@ -34,21 +34,38 @@ defmodule Pinchflat.Downloading.DownloadOptionBuilder do
|
|||
|
||||
@doc """
|
||||
Builds the output path for yt-dlp to download media based on the given source's
|
||||
media profile. Uses the source's override output path template if it exists.
|
||||
or media_item's media profile. Uses the source's override output path template if it exists.
|
||||
|
||||
Accepts a %MediaItem{} or %Source{} struct. If a %Source{} struct is passed, it
|
||||
will use a default %MediaItem{} struct with the given source.
|
||||
|
||||
Returns binary()
|
||||
"""
|
||||
def build_output_path_for(%Source{} = source_with_preloads) do
|
||||
build_output_path_for(%MediaItem{source: source_with_preloads})
|
||||
end
|
||||
|
||||
def build_output_path_for(%MediaItem{} = media_item_with_preloads) do
|
||||
output_path_template = Sources.output_path_template(media_item_with_preloads.source)
|
||||
|
||||
build_output_path(output_path_template, media_item_with_preloads)
|
||||
end
|
||||
|
||||
def build_output_path_for(%Source{} = source_with_preloads) do
|
||||
build_output_path_for(%MediaItem{source: source_with_preloads})
|
||||
@doc """
|
||||
Builds the quality options for yt-dlp to download media based on the given source's
|
||||
or media_item's media profile. Useful for helping predict final filepath of downloaded
|
||||
media.
|
||||
|
||||
returns [Keyword.t()]
|
||||
"""
|
||||
def build_quality_options_for(%Source{} = source_with_preloads) do
|
||||
build_quality_options_for(%MediaItem{source: source_with_preloads})
|
||||
end
|
||||
|
||||
def build_quality_options_for(%MediaItem{} = media_item_with_preloads) do
|
||||
media_profile = media_item_with_preloads.source.media_profile
|
||||
|
||||
quality_options(media_profile)
|
||||
end
|
||||
|
||||
defp default_options(override_opts) do
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpers do
|
|||
alias Pinchflat.FastIndexing.YoutubeRss
|
||||
alias Pinchflat.FastIndexing.YoutubeApi
|
||||
alias Pinchflat.Downloading.DownloadingHelpers
|
||||
alias Pinchflat.Downloading.DownloadOptionBuilder
|
||||
|
||||
alias Pinchflat.YtDlp.Media, as: YtDlpMedia
|
||||
|
||||
|
|
@ -27,6 +28,10 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpers do
|
|||
downloaded_.
|
||||
"""
|
||||
def kickoff_download_tasks_from_youtube_rss_feed(%Source{} = source) do
|
||||
# The media_profile is needed to determine the quality options to _then_ determine a more
|
||||
# accurate predicted filepath
|
||||
source = Repo.preload(source, [:media_profile])
|
||||
|
||||
{:ok, media_ids} = get_recent_media_ids(source)
|
||||
existing_media_items = list_media_items_by_media_id_for(source, media_ids)
|
||||
new_media_ids = media_ids -- Enum.map(existing_media_items, & &1.media_id)
|
||||
|
|
@ -68,7 +73,11 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpers do
|
|||
defp create_media_item_from_media_id(source, media_id) do
|
||||
url = "https://www.youtube.com/watch?v=#{media_id}"
|
||||
|
||||
case YtDlpMedia.get_media_attributes(url, use_cookies: source.use_cookies) do
|
||||
command_opts =
|
||||
[output: DownloadOptionBuilder.build_output_path_for(source)] ++
|
||||
DownloadOptionBuilder.build_quality_options_for(source)
|
||||
|
||||
case YtDlpMedia.get_media_attributes(url, command_opts, use_cookies: source.use_cookies) do
|
||||
{:ok, media_attrs} ->
|
||||
Media.create_media_item_from_backend_attrs(source, media_attrs)
|
||||
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ defmodule Pinchflat.Media.MediaItem do
|
|||
:uploaded_at,
|
||||
:upload_date_index,
|
||||
:duration_seconds,
|
||||
:predicted_media_filepath,
|
||||
# these fields are captured only on download
|
||||
:media_downloaded_at,
|
||||
:media_filepath,
|
||||
|
|
@ -76,6 +77,7 @@ defmodule Pinchflat.Media.MediaItem do
|
|||
field :duration_seconds, :integer
|
||||
field :playlist_index, :integer, default: 0
|
||||
|
||||
field :predicted_media_filepath, :string
|
||||
field :media_filepath, :string
|
||||
field :media_size_bytes, :integer
|
||||
field :thumbnail_filepath, :string
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do
|
|||
alias Pinchflat.YtDlp.MediaCollection
|
||||
alias Pinchflat.Downloading.DownloadingHelpers
|
||||
alias Pinchflat.SlowIndexing.FileFollowerServer
|
||||
alias Pinchflat.Downloading.DownloadOptionBuilder
|
||||
alias Pinchflat.SlowIndexing.MediaCollectionIndexingWorker
|
||||
|
||||
alias Pinchflat.YtDlp.Media, as: YtDlpMedia
|
||||
|
|
@ -56,6 +57,9 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do
|
|||
Returns [%MediaItem{} | %Ecto.Changeset{}]
|
||||
"""
|
||||
def index_and_enqueue_download_for_media_items(%Source{} = source) do
|
||||
# The media_profile is needed to determine the quality options to _then_ determine a more
|
||||
# accurate predicted filepath
|
||||
source = Repo.preload(source, [:media_profile])
|
||||
# See the method definition below for more info on how file watchers work
|
||||
# (important reading if you're not familiar with it)
|
||||
{:ok, media_attributes} = setup_file_watcher_and_kickoff_indexing(source)
|
||||
|
|
@ -94,8 +98,13 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpers do
|
|||
{:ok, pid} = FileFollowerServer.start_link()
|
||||
|
||||
handler = fn filepath -> setup_file_follower_watcher(pid, filepath, source) end
|
||||
|
||||
command_opts =
|
||||
[output: DownloadOptionBuilder.build_output_path_for(source)] ++
|
||||
DownloadOptionBuilder.build_quality_options_for(source)
|
||||
|
||||
runner_opts = [file_listener_handler: handler, use_cookies: source.use_cookies]
|
||||
result = MediaCollection.get_media_attributes_for_collection(source.original_url, runner_opts)
|
||||
result = MediaCollection.get_media_attributes_for_collection(source.original_url, command_opts, runner_opts)
|
||||
|
||||
FileFollowerServer.stop(pid)
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,8 @@ defmodule Pinchflat.YtDlp.Media do
|
|||
:livestream,
|
||||
:short_form_content,
|
||||
:uploaded_at,
|
||||
:duration_seconds
|
||||
:duration_seconds,
|
||||
:predicted_media_filepath
|
||||
]
|
||||
|
||||
defstruct [
|
||||
|
|
@ -23,7 +24,8 @@ defmodule Pinchflat.YtDlp.Media do
|
|||
:short_form_content,
|
||||
:uploaded_at,
|
||||
:duration_seconds,
|
||||
:playlist_index
|
||||
:playlist_index,
|
||||
:predicted_media_filepath
|
||||
]
|
||||
|
||||
alias __MODULE__
|
||||
|
|
@ -63,15 +65,17 @@ defmodule Pinchflat.YtDlp.Media do
|
|||
|
||||
@doc """
|
||||
Returns a map representing the media at the given URL.
|
||||
Optionally takes a list of additional command options to pass to yt-dlp
|
||||
or configuration-related options to pass to the runner.
|
||||
|
||||
Returns {:ok, %Media{}} | {:error, any, ...}.
|
||||
"""
|
||||
def get_media_attributes(url, addl_opts \\ []) do
|
||||
def get_media_attributes(url, command_opts \\ [], addl_opts \\ []) do
|
||||
runner = Application.get_env(:pinchflat, :yt_dlp_runner)
|
||||
command_opts = [:simulate, :skip_download]
|
||||
all_command_opts = [:simulate, :skip_download] ++ command_opts
|
||||
output_template = indexing_output_template()
|
||||
|
||||
case runner.run(url, command_opts, output_template, addl_opts) do
|
||||
case runner.run(url, all_command_opts, output_template, addl_opts) do
|
||||
{:ok, output} ->
|
||||
output
|
||||
|> Phoenix.json_library().decode!()
|
||||
|
|
@ -91,7 +95,7 @@ defmodule Pinchflat.YtDlp.Media do
|
|||
if something is a short via the URL again
|
||||
"""
|
||||
def indexing_output_template do
|
||||
"%(.{id,title,live_status,original_url,description,aspect_ratio,duration,upload_date,timestamp,playlist_index})j"
|
||||
"%(.{id,title,live_status,original_url,description,aspect_ratio,duration,upload_date,timestamp,playlist_index,filename})j"
|
||||
end
|
||||
|
||||
@doc """
|
||||
|
|
@ -110,7 +114,8 @@ defmodule Pinchflat.YtDlp.Media do
|
|||
duration_seconds: response["duration"] && round(response["duration"]),
|
||||
short_form_content: response["original_url"] && short_form_content?(response),
|
||||
uploaded_at: response["upload_date"] && parse_uploaded_at(response),
|
||||
playlist_index: response["playlist_index"] || 0
|
||||
playlist_index: response["playlist_index"] || 0,
|
||||
predicted_media_filepath: response["filename"]
|
||||
}
|
||||
end
|
||||
|
||||
|
|
|
|||
|
|
@ -11,20 +11,23 @@ defmodule Pinchflat.YtDlp.MediaCollection do
|
|||
|
||||
@doc """
|
||||
Returns a list of maps representing the media in the collection.
|
||||
Optionally takes a list of additional command options to pass to yt-dlp
|
||||
or configuration-related options to pass to the runner.
|
||||
|
||||
Options:
|
||||
Runner Options:
|
||||
- :file_listener_handler - a function that will be called with the path to the
|
||||
file that will be written to when yt-dlp is done. This is useful for
|
||||
setting up a file watcher to know when the file is ready to be read.
|
||||
- :use_cookies - whether or not to use user-provided cookies when fetching the media details
|
||||
|
||||
Returns {:ok, [map()]} | {:error, any, ...}.
|
||||
"""
|
||||
def get_media_attributes_for_collection(url, addl_opts \\ []) do
|
||||
def get_media_attributes_for_collection(url, command_opts \\ [], addl_opts \\ []) do
|
||||
runner = Application.get_env(:pinchflat, :yt_dlp_runner)
|
||||
# `ignore_no_formats_error` is necessary because yt-dlp will error out if
|
||||
# the first video has not released yet (ie: is a premier). We don't care about
|
||||
# available formats since we're just getting the media details
|
||||
command_opts = [:simulate, :skip_download, :ignore_no_formats_error, :no_warnings]
|
||||
all_command_opts = [:simulate, :skip_download, :ignore_no_formats_error, :no_warnings] ++ command_opts
|
||||
use_cookies = Keyword.get(addl_opts, :use_cookies, false)
|
||||
output_template = YtDlpMedia.indexing_output_template()
|
||||
output_filepath = FilesystemUtils.generate_metadata_tmpfile(:json)
|
||||
|
|
@ -35,7 +38,7 @@ defmodule Pinchflat.YtDlp.MediaCollection do
|
|||
file_listener_handler.(output_filepath)
|
||||
end
|
||||
|
||||
case runner.run(url, command_opts, output_template, runner_opts) do
|
||||
case runner.run(url, all_command_opts, output_template, runner_opts) do
|
||||
{:ok, output} ->
|
||||
parsed_lines =
|
||||
output
|
||||
|
|
|
|||
Binary file not shown.
|
Before Width: | Height: | Size: 433 KiB After Width: | Height: | Size: 449 KiB |
|
|
@ -0,0 +1,9 @@
|
|||
defmodule Pinchflat.Repo.Migrations.AddPredictedMediaFilepathToMediaItems do
|
||||
use Ecto.Migration
|
||||
|
||||
def change do
|
||||
alter table(:media_items) do
|
||||
add :predicted_media_filepath, :string
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
@ -461,6 +461,22 @@ defmodule Pinchflat.Downloading.DownloadOptionBuilderTest do
|
|||
end
|
||||
end
|
||||
|
||||
describe "build_quality_options_for/1" do
|
||||
test "builds quality options for a media item", %{media_item: media_item} do
|
||||
options = DownloadOptionBuilder.build_quality_options_for(media_item)
|
||||
|
||||
assert {:format_sort, "res:1080,+codec:avc:m4a"} in options
|
||||
assert {:remux_video, "mp4"} in options
|
||||
end
|
||||
|
||||
test "builds quality options for a source", %{media_item: media_item} do
|
||||
options = DownloadOptionBuilder.build_quality_options_for(media_item.source)
|
||||
|
||||
assert {:format_sort, "res:1080,+codec:avc:m4a"} in options
|
||||
assert {:remux_video, "mp4"} in options
|
||||
end
|
||||
end
|
||||
|
||||
defp update_media_profile_attribute(media_item_with_preloads, attrs) do
|
||||
media_item_with_preloads.source.media_profile
|
||||
|> Profiles.change_media_profile(attrs)
|
||||
|
|
|
|||
|
|
@ -61,6 +61,18 @@ defmodule Pinchflat.FastIndexing.FastIndexingHelpersTest do
|
|||
assert [_] = Tasks.list_tasks_for(media_item, "MediaDownloadWorker")
|
||||
end
|
||||
|
||||
test "passes the source's download options to the yt-dlp runner", %{source: source} do
|
||||
expect(HTTPClientMock, :get, fn _url -> {:ok, "<yt:videoId>test_1</yt:videoId>"} end)
|
||||
|
||||
expect(YtDlpRunnerMock, :run, fn _url, opts, _ot, _addl_opts ->
|
||||
assert {:output, "/tmp/test/media/%(title)S.%(ext)S"} in opts
|
||||
assert {:remux_video, "mp4"} in opts
|
||||
{:ok, media_attributes_return_fixture()}
|
||||
end)
|
||||
|
||||
FastIndexingHelpers.kickoff_download_tasks_from_youtube_rss_feed(source)
|
||||
end
|
||||
|
||||
test "sets use_cookies if the source uses cookies" do
|
||||
expect(HTTPClientMock, :get, fn _url -> {:ok, "<yt:videoId>test_1</yt:videoId>"} end)
|
||||
|
||||
|
|
|
|||
|
|
@ -202,6 +202,16 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpersTest do
|
|||
assert %Ecto.Changeset{} = changeset
|
||||
end
|
||||
|
||||
test "passes the source's download options to the yt-dlp runner", %{source: source} do
|
||||
expect(YtDlpRunnerMock, :run, fn _url, opts, _ot, _addl_opts ->
|
||||
assert {:output, "/tmp/test/media/%(title)S.%(ext)S"} in opts
|
||||
assert {:remux_video, "mp4"} in opts
|
||||
{:ok, source_attributes_return_fixture()}
|
||||
end)
|
||||
|
||||
SlowIndexingHelpers.index_and_enqueue_download_for_media_items(source)
|
||||
end
|
||||
|
||||
test "sets use_cookies if the source uses cookies" do
|
||||
expect(YtDlpRunnerMock, :run, fn _url, _opts, _ot, addl_opts ->
|
||||
assert {:use_cookies, true} in addl_opts
|
||||
|
|
|
|||
|
|
@ -35,6 +35,16 @@ defmodule Pinchflat.YtDlp.MediaCollectionTest do
|
|||
assert {:error, "Big issue", 1} = MediaCollection.get_media_attributes_for_collection(@channel_url)
|
||||
end
|
||||
|
||||
test "passes long additional command options" do
|
||||
expect(YtDlpRunnerMock, :run, fn _url, opts, _ot, _addl_opts ->
|
||||
assert :foo in opts
|
||||
|
||||
{:ok, ""}
|
||||
end)
|
||||
|
||||
assert {:ok, _} = MediaCollection.get_media_attributes_for_collection(@channel_url, [:foo])
|
||||
end
|
||||
|
||||
test "passes additional args to runner" do
|
||||
expect(YtDlpRunnerMock, :run, fn _url, _opts, _ot, addl_opts ->
|
||||
assert [{:output_filepath, filepath} | _] = addl_opts
|
||||
|
|
@ -56,7 +66,7 @@ defmodule Pinchflat.YtDlp.MediaCollectionTest do
|
|||
end
|
||||
|
||||
assert {:ok, _} =
|
||||
MediaCollection.get_media_attributes_for_collection(@channel_url, file_listener_handler: handler)
|
||||
MediaCollection.get_media_attributes_for_collection(@channel_url, [], file_listener_handler: handler)
|
||||
|
||||
assert_receive {:handler, filename}
|
||||
assert String.ends_with?(filename, ".json")
|
||||
|
|
|
|||
|
|
@ -120,13 +120,22 @@ defmodule Pinchflat.YtDlp.MediaTest do
|
|||
assert {:ok, _} = Media.get_media_attributes(@media_url)
|
||||
end
|
||||
|
||||
test "passes along additional command options" do
|
||||
expect(YtDlpRunnerMock, :run, fn _url, opts, _ot, _addl ->
|
||||
assert [:simulate, :skip_download, :custom_arg] = opts
|
||||
{:ok, media_attributes_return_fixture()}
|
||||
end)
|
||||
|
||||
assert {:ok, _} = Media.get_media_attributes(@media_url, [:custom_arg])
|
||||
end
|
||||
|
||||
test "passes along additional options" do
|
||||
expect(YtDlpRunnerMock, :run, fn _url, _opts, _ot, addl ->
|
||||
assert [addl_arg: true] = addl
|
||||
{:ok, media_attributes_return_fixture()}
|
||||
end)
|
||||
|
||||
assert {:ok, _} = Media.get_media_attributes(@media_url, addl_arg: true)
|
||||
assert {:ok, _} = Media.get_media_attributes(@media_url, [], addl_arg: true)
|
||||
end
|
||||
|
||||
test "returns the error straight through when the command fails" do
|
||||
|
|
@ -139,7 +148,7 @@ defmodule Pinchflat.YtDlp.MediaTest do
|
|||
describe "indexing_output_template/0" do
|
||||
test "contains all the greatest hits" do
|
||||
attrs =
|
||||
~w(id title live_status original_url description aspect_ratio duration upload_date timestamp playlist_index)a
|
||||
~w(id title live_status original_url description aspect_ratio duration upload_date timestamp playlist_index filename)a
|
||||
|
||||
formatted_attrs = "%(.{#{Enum.join(attrs, ",")}})j"
|
||||
|
||||
|
|
@ -159,7 +168,8 @@ defmodule Pinchflat.YtDlp.MediaTest do
|
|||
"duration" => 60,
|
||||
"upload_date" => "20210101",
|
||||
"timestamp" => 1_600_000_000,
|
||||
"playlist_index" => 1
|
||||
"playlist_index" => 1,
|
||||
"filename" => "TiZPUDkDYbk.mp4"
|
||||
}
|
||||
|
||||
assert %Media{
|
||||
|
|
@ -171,7 +181,8 @@ defmodule Pinchflat.YtDlp.MediaTest do
|
|||
short_form_content: false,
|
||||
uploaded_at: ~U[2020-09-13 12:26:40Z],
|
||||
duration_seconds: 60,
|
||||
playlist_index: 1
|
||||
playlist_index: 1,
|
||||
predicted_media_filepath: "TiZPUDkDYbk.mp4"
|
||||
} == Media.response_to_struct(response)
|
||||
end
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue