[Enhancement] Overhaul indexing to be more efficient (#540)

* WIP - created methods for breaking on existing media

* WIP - got everything hooked up for POC

* Add some docs, tests

* Refactors

* Updated TODO
This commit is contained in:
Kieran 2025-01-02 15:48:18 -08:00 committed by GitHub
parent 09d1653f4b
commit 9185f075ca
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 236 additions and 48 deletions

View file

@ -57,25 +57,51 @@ defmodule Pinchflat.SlowIndexing.MediaCollectionIndexingWorkerTest do
:ok
end
test "it indexes the source if it should be indexed" do
test "indexes the source if it should be indexed" do
expect(YtDlpRunnerMock, :run, fn _url, :get_media_attributes_for_collection, _opts, _ot, _addl_opts ->
{:ok, ""}
end)
source = source_fixture(index_frequency_minutes: 10)
perform_job(MediaCollectionIndexingWorker, %{id: source.id})
end
test "it indexes the source no matter what if the source has never been indexed before" do
test "indexes the source no matter what if the source has never been indexed before" do
expect(YtDlpRunnerMock, :run, fn _url, :get_media_attributes_for_collection, _opts, _ot, _addl_opts ->
{:ok, ""}
end)
source = source_fixture(index_frequency_minutes: 0, last_indexed_at: nil)
perform_job(MediaCollectionIndexingWorker, %{id: source.id})
end
test "it indexes the source no matter what if the 'force' arg is passed" do
test "indexes the source no matter what if the 'force' arg is passed" do
expect(YtDlpRunnerMock, :run, fn _url, :get_media_attributes_for_collection, _opts, _ot, _addl_opts ->
{:ok, ""}
end)
source = source_fixture(index_frequency_minutes: 0, last_indexed_at: DateTime.utc_now())
perform_job(MediaCollectionIndexingWorker, %{id: source.id, force: true})
end
test "it does not do any indexing if the source has been indexed and shouldn't be rescheduled" do
test "doesn't use a download archive if the index has been forced" do
expect(YtDlpRunnerMock, :run, fn _url, :get_media_attributes_for_collection, opts, _ot, _addl_opts ->
refute :break_on_existing in opts
refute Keyword.has_key?(opts, :download_archive)
{:ok, ""}
end)
source =
source_fixture(collection_type: :channel, index_frequency_minutes: 0, last_indexed_at: DateTime.utc_now())
perform_job(MediaCollectionIndexingWorker, %{id: source.id, force: true})
end
test "does not do any indexing if the source has been indexed and shouldn't be rescheduled" do
expect(YtDlpRunnerMock, :run, 0, fn _url, :get_media_attributes_for_collection, _opts, _ot, _addl_opts ->
{:ok, ""}
end)
@ -85,7 +111,7 @@ defmodule Pinchflat.SlowIndexing.MediaCollectionIndexingWorkerTest do
perform_job(MediaCollectionIndexingWorker, %{id: source.id})
end
test "it does not reschedule if the source shouldn't be indexed" do
test "does not reschedule if the source shouldn't be indexed" do
stub(YtDlpRunnerMock, :run, fn _url, :get_media_attributes_for_collection, _opts, _ot, _addl_opts -> {:ok, ""} end)
source = source_fixture(index_frequency_minutes: -1)
@ -94,7 +120,7 @@ defmodule Pinchflat.SlowIndexing.MediaCollectionIndexingWorkerTest do
refute_enqueued(worker: MediaCollectionIndexingWorker, args: %{"id" => source.id})
end
test "it kicks off a download job for each pending media item" do
test "kicks off a download job for each pending media item" do
expect(YtDlpRunnerMock, :run, fn _url, :get_media_attributes_for_collection, _opts, _ot, _addl_opts ->
{:ok, source_attributes_return_fixture()}
end)
@ -105,7 +131,7 @@ defmodule Pinchflat.SlowIndexing.MediaCollectionIndexingWorkerTest do
assert length(all_enqueued(worker: MediaDownloadWorker)) == 3
end
test "it starts a job for any pending media item even if it's from another run" do
test "starts a job for any pending media item even if it's from another run" do
expect(YtDlpRunnerMock, :run, fn _url, :get_media_attributes_for_collection, _opts, _ot, _addl_opts ->
{:ok, source_attributes_return_fixture()}
end)
@ -117,7 +143,7 @@ defmodule Pinchflat.SlowIndexing.MediaCollectionIndexingWorkerTest do
assert length(all_enqueued(worker: MediaDownloadWorker)) == 4
end
test "it does not kick off a job for media items that could not be saved" do
test "does not kick off a job for media items that could not be saved" do
expect(YtDlpRunnerMock, :run, fn _url, :get_media_attributes_for_collection, _opts, _ot, _addl_opts ->
{:ok, source_attributes_return_fixture()}
end)
@ -130,7 +156,7 @@ defmodule Pinchflat.SlowIndexing.MediaCollectionIndexingWorkerTest do
assert length(all_enqueued(worker: MediaDownloadWorker))
end
test "it reschedules the job based on the index frequency" do
test "reschedules the job based on the index frequency" do
source = source_fixture(index_frequency_minutes: 10)
perform_job(MediaCollectionIndexingWorker, %{id: source.id})
@ -141,7 +167,7 @@ defmodule Pinchflat.SlowIndexing.MediaCollectionIndexingWorkerTest do
)
end
test "it creates a task for the rescheduled job" do
test "creates a task for the rescheduled job" do
source = source_fixture(index_frequency_minutes: 10)
task_count_fetcher = fn ->
@ -153,7 +179,7 @@ defmodule Pinchflat.SlowIndexing.MediaCollectionIndexingWorkerTest do
end)
end
test "it creates a future task for fast indexing if appropriate" do
test "creates a future task for fast indexing if appropriate" do
source = source_fixture(index_frequency_minutes: 10, fast_index: true)
perform_job(MediaCollectionIndexingWorker, %{id: source.id})
@ -164,7 +190,7 @@ defmodule Pinchflat.SlowIndexing.MediaCollectionIndexingWorkerTest do
)
end
test "it deletes existing fast indexing tasks if a new one is created" do
test "deletes existing fast indexing tasks if a new one is created" do
source = source_fixture(index_frequency_minutes: 10, fast_index: true)
{:ok, job} = Oban.insert(FastIndexingWorker.new(%{"id" => source.id}))
task = task_fixture(source_id: source.id, job_id: job.id)
@ -174,14 +200,14 @@ defmodule Pinchflat.SlowIndexing.MediaCollectionIndexingWorkerTest do
assert_raise Ecto.NoResultsError, fn -> Repo.reload!(task) end
end
test "it does not create a task for fast indexing otherwise" do
test "does not create a task for fast indexing otherwise" do
source = source_fixture(index_frequency_minutes: 10, fast_index: false)
perform_job(MediaCollectionIndexingWorker, %{id: source.id})
refute_enqueued(worker: FastIndexingWorker)
end
test "it creates the basic media_item records" do
test "creates the basic media_item records" do
expect(YtDlpRunnerMock, :run, fn _url, :get_media_attributes_for_collection, _opts, _ot, _addl_opts ->
{:ok, source_attributes_return_fixture()}
end)

View file

@ -14,6 +14,10 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpersTest do
alias Pinchflat.SlowIndexing.SlowIndexingHelpers
alias Pinchflat.SlowIndexing.MediaCollectionIndexingWorker
setup do
{:ok, %{source: source_fixture()}}
end
describe "kickoff_indexing_task/3" do
test "schedules a job" do
source = source_fixture(index_frequency_minutes: 1)
@ -53,6 +57,16 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpersTest do
assert_in_delta DateTime.diff(job.scheduled_at, DateTime.utc_now(), :second), 0, 1
end
test "schedules a job immediately if the user is forcing an index" do
source = source_fixture(index_frequency_minutes: 30, last_indexed_at: now_minus(5, :minutes))
assert {:ok, _} = SlowIndexingHelpers.kickoff_indexing_task(source, %{force: true})
[job] = all_enqueued(worker: MediaCollectionIndexingWorker, args: %{"id" => source.id})
assert_in_delta DateTime.diff(job.scheduled_at, DateTime.utc_now(), :second), 0, 1
end
test "creates and attaches a task" do
source = source_fixture(index_frequency_minutes: 1)
@ -123,12 +137,6 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpersTest do
end
describe "delete_indexing_tasks/2" do
setup do
source = source_fixture()
{:ok, %{source: source}}
end
test "deletes slow indexing tasks for the source", %{source: source} do
{:ok, job} = Oban.insert(MediaCollectionIndexingWorker.new(%{"id" => source.id}))
_task = task_fixture(source_id: source.id, job_id: job.id)
@ -172,13 +180,13 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpersTest do
end
end
describe "index_and_enqueue_download_for_media_items/1" do
describe "index_and_enqueue_download_for_media_items/2" do
setup do
stub(YtDlpRunnerMock, :run, fn _url, :get_media_attributes_for_collection, _opts, _ot, _addl_opts ->
{:ok, source_attributes_return_fixture()}
end)
{:ok, [source: source_fixture()]}
:ok
end
test "creates a media_item record for each media ID returned", %{source: source} do
@ -315,11 +323,7 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpersTest do
end
end
describe "index_and_enqueue_download_for_media_items/1 when testing file watcher" do
setup do
{:ok, [source: source_fixture()]}
end
describe "index_and_enqueue_download_for_media_items/2 when testing file watcher" do
test "creates a new media item for everything already in the file", %{source: source} do
watcher_poll_interval = Application.get_env(:pinchflat, :file_watcher_poll_interval)
@ -446,4 +450,62 @@ defmodule Pinchflat.SlowIndexing.SlowIndexingHelpersTest do
assert [] = SlowIndexingHelpers.index_and_enqueue_download_for_media_items(source)
end
end
describe "index_and_enqueue_download_for_media_items when testing the download archive" do
test "a download archive is used if the source is a channel", %{source: source} do
expect(YtDlpRunnerMock, :run, fn _url, :get_media_attributes_for_collection, opts, _ot, _addl_opts ->
assert :break_on_existing in opts
assert Keyword.has_key?(opts, :download_archive)
{:ok, source_attributes_return_fixture()}
end)
SlowIndexingHelpers.index_and_enqueue_download_for_media_items(source)
end
test "a download archive is not used if the source is not a channel" do
source = source_fixture(%{collection_type: :playlist})
expect(YtDlpRunnerMock, :run, fn _url, :get_media_attributes_for_collection, opts, _ot, _addl_opts ->
refute :break_on_existing in opts
refute Keyword.has_key?(opts, :download_archive)
{:ok, source_attributes_return_fixture()}
end)
SlowIndexingHelpers.index_and_enqueue_download_for_media_items(source)
end
test "a download archive is not used if the index has been forced to run" do
source = source_fixture(%{collection_type: :channel})
expect(YtDlpRunnerMock, :run, fn _url, :get_media_attributes_for_collection, opts, _ot, _addl_opts ->
refute :break_on_existing in opts
refute Keyword.has_key?(opts, :download_archive)
{:ok, source_attributes_return_fixture()}
end)
SlowIndexingHelpers.index_and_enqueue_download_for_media_items(source, was_forced: true)
end
test "the download archive is formatted correctly and contains the right video", %{source: source} do
media_items =
1..21
|> Enum.map(fn n ->
media_item_fixture(%{source_id: source.id, uploaded_at: now_minus(n, :days)})
end)
expect(YtDlpRunnerMock, :run, fn _url, :get_media_attributes_for_collection, opts, _ot, _addl_opts ->
archive_file = Keyword.get(opts, :download_archive)
last_media_item = List.last(media_items)
assert File.read!(archive_file) == "youtube #{last_media_item.media_id}"
{:ok, source_attributes_return_fixture()}
end)
SlowIndexingHelpers.index_and_enqueue_download_for_media_items(source)
end
end
end

View file

@ -17,6 +17,12 @@ defmodule Pinchflat.YtDlp.CommandRunnerTest do
assert {:ok, _output} = Runner.run(@media_url, :foo, [], "")
end
test "considers a 101 exit code as being successful" do
wrap_executable("/app/test/support/scripts/yt-dlp-mocks/101_exit_code.sh", fn ->
assert {:ok, _output} = Runner.run(@media_url, :foo, [], "")
end)
end
test "includes the media url as the first argument" do
assert {:ok, output} = Runner.run(@media_url, :foo, [:ignore_errors], "")

View file

@ -0,0 +1,3 @@
#!/bin/bash
exit 101