[Bugfix] prevent duplicate videos from being downloaded if the video's name changes (#396)

* Added methods for deleting outdated files

* Hooked up outdated file deletion to media download worker
This commit is contained in:
Kieran 2024-09-23 15:59:48 -07:00 committed by GitHub
parent 7fb6fa3af4
commit 0163e85e76
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 310 additions and 2 deletions

View file

@ -12,6 +12,7 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do
alias Pinchflat.Tasks
alias Pinchflat.Repo
alias Pinchflat.Media
alias Pinchflat.Media.FileDeletion
alias Pinchflat.Downloading.MediaDownloader
alias Pinchflat.Lifecycle.UserScripts.CommandRunner, as: UserScriptRunner
@ -85,6 +86,7 @@ defmodule Pinchflat.Downloading.MediaDownloadWorker do
media_redownloaded_at: get_redownloaded_at(is_quality_upgrade)
})
:ok = FileDeletion.delete_outdated_files(media_item, updated_media_item)
run_user_script(:media_downloaded, updated_media_item)
:ok

View file

@ -0,0 +1,54 @@
defmodule Pinchflat.Media.FileDeletion do
@moduledoc """
Functions for deleting files that are no longer needed by media items.
"""
alias Pinchflat.Utils.MapUtils
alias Pinchflat.Media.MediaItem
alias Pinchflat.Utils.FilesystemUtils, as: FSUtils
@doc """
Deletes files that are no longer needed by a media item.
This means that if a media item has been updated, the old and new versions
can be passed and any files that are no longer needed will be deleted.
An example is a video that gets its quality upgraded and its name changes
between original download and re-download. The old file will exist on-disk
with the old name but the database entry will point to the new file. This
function can be used to delete the old file in this case.
Returns :ok
"""
def delete_outdated_files(old_media_item, new_media_item) do
non_subtitle_keys = MediaItem.filepath_attributes() -- [:subtitle_filepaths]
old_non_subtitles = Map.take(old_media_item, non_subtitle_keys)
old_subtitles = MapUtils.from_nested_list(old_media_item.subtitle_filepaths)
new_non_subtitles = Map.take(new_media_item, non_subtitle_keys)
new_subtitles = MapUtils.from_nested_list(new_media_item.subtitle_filepaths)
handle_file_deletion(old_non_subtitles, new_non_subtitles)
handle_file_deletion(old_subtitles, new_subtitles)
:ok
end
defp handle_file_deletion(old_attributes, new_attributes) do
# The logic:
# - A file should only be deleted if it exists and the new file is different
# - The new attributes are the ones we're interested in keeping
# - If the old attributes have a key that doesn't exist in the new attributes, don't touch it.
# This is good for archiving but may be unpopular for other users so this may change.
Enum.each(new_attributes, fn {key, new_filepath} ->
old_filepath = Map.get(old_attributes, key)
files_have_changed = old_filepath && new_filepath && old_filepath != new_filepath
files_exist_on_disk = files_have_changed && File.exists?(old_filepath) && File.exists?(new_filepath)
if files_exist_on_disk && !FSUtils.filepaths_reference_same_file?(old_filepath, new_filepath) do
FSUtils.delete_file_and_remove_empty_directories(old_filepath)
end
end)
end
end

View file

@ -20,6 +20,24 @@ defmodule Pinchflat.Utils.FilesystemUtils do
end
end
@doc """
Checks if two filepaths reference the same file.
Useful if you have a relative and absolute filepath and want to be sure they're the same file.
Also works with symlinks.
Returns boolean()
"""
def filepaths_reference_same_file?(filepath_1, filepath_2) do
{:ok, stat_1} = File.stat(filepath_1)
{:ok, stat_2} = File.stat(filepath_2)
identifier_1 = "#{stat_1.major_device}:#{stat_1.minor_device}:#{stat_1.inode}"
identifier_2 = "#{stat_2.major_device}:#{stat_2.minor_device}:#{stat_2.inode}"
identifier_1 == identifier_2
end
@doc """
Generates a temporary file and returns its path. The file is empty and has the given type.
Generates all the directories in the path if they don't exist.

View file

@ -0,0 +1,17 @@
defmodule Pinchflat.Utils.MapUtils do
@moduledoc """
Utility methods for working with maps
"""
@doc """
Converts a nested list of 2-element tuples or lists into a map.
Returns map()
"""
def from_nested_list(list) do
Enum.reduce(list, %{}, fn
[key, value], acc -> Map.put(acc, key, value)
{key, value}, acc -> Map.put(acc, key, value)
end)
end
end

View file

@ -236,6 +236,26 @@ defmodule Pinchflat.Downloading.MediaDownloadWorkerTest do
perform_job(MediaDownloadWorker, %{id: media_item.id, force: true})
end
test "deletes old files if the media item has been updated" do
expect(YtDlpRunnerMock, :run, 1, fn _url, _opts, _ot, _addl ->
tmp_media_item = media_item_with_attachments()
metadata = render_parsed_metadata(:media_metadata)
metadata = Map.put(metadata, "filepath", tmp_media_item.media_filepath)
{:ok, Phoenix.json_library().encode!(metadata)}
end)
expect(YtDlpRunnerMock, :run, 1, fn _url, _opts, _ot, _addl -> {:ok, ""} end)
old_media_item = media_item_with_attachments()
perform_job(MediaDownloadWorker, %{id: old_media_item.id, force: true})
updated_media_item = Repo.reload(old_media_item)
assert updated_media_item.media_filepath != old_media_item.media_filepath
refute File.exists?(old_media_item.media_filepath)
assert File.exists?(updated_media_item.media_filepath)
end
end
describe "perform/1 when testing user script callbacks" do

View file

@ -0,0 +1,77 @@
defmodule Pinchflat.Media.FileDeletionTest do
use Pinchflat.DataCase
import Pinchflat.MediaFixtures
alias Pinchflat.Media.FileDeletion
describe "delete_outdated_files/2" do
test "deletes outdated non-subtitle files" do
new_media_item = media_item_with_attachments()
old_media_item = media_item_with_attachments()
assert :ok = FileDeletion.delete_outdated_files(old_media_item, new_media_item)
assert File.exists?(new_media_item.media_filepath)
refute File.exists?(old_media_item.media_filepath)
end
test "doesn't delete non-subtitle files if the new file is the same" do
new_media_item = media_item_with_attachments()
old_media_item = media_item_fixture(%{media_filepath: new_media_item.media_filepath})
assert :ok = FileDeletion.delete_outdated_files(old_media_item, new_media_item)
assert File.exists?(new_media_item.media_filepath)
assert File.exists?(old_media_item.media_filepath)
end
test "doesn't delete the old file if the new file is missing that key" do
new_media_item = media_item_fixture(%{media_filepath: nil})
old_media_item = media_item_with_attachments()
assert :ok = FileDeletion.delete_outdated_files(old_media_item, new_media_item)
assert File.exists?(old_media_item.media_filepath)
end
test "deletes outdated subtitle files" do
new_media_item = media_item_with_attachments()
old_media_item = media_item_with_attachments()
assert :ok = FileDeletion.delete_outdated_files(old_media_item, new_media_item)
assert File.exists?(get_subtitle_filepath(new_media_item, "en"))
refute File.exists?(get_subtitle_filepath(old_media_item, "en"))
end
test "keeps old subtitle files if the new file is the same" do
new_media_item = media_item_with_attachments()
old_media_item = media_item_fixture(%{subtitle_filepaths: new_media_item.subtitle_filepaths})
assert :ok = FileDeletion.delete_outdated_files(old_media_item, new_media_item)
assert File.exists?(get_subtitle_filepath(new_media_item, "en"))
assert File.exists?(get_subtitle_filepath(old_media_item, "en"))
end
test "doesn't delete old subtitle files if the new file is missing that key" do
new_media_item = media_item_fixture(%{subtitle_filepaths: []})
old_media_item = media_item_with_attachments()
assert :ok = FileDeletion.delete_outdated_files(old_media_item, new_media_item)
assert File.exists?(get_subtitle_filepath(old_media_item, "en"))
end
end
defp get_subtitle_filepath(media_item, language) do
Enum.reduce_while(media_item.subtitle_filepaths, nil, fn [lang, filepath], acc ->
if lang == language do
{:halt, filepath}
else
{:cont, acc}
end
end)
end
end

View file

@ -37,6 +37,46 @@ defmodule Pinchflat.Utils.FilesystemUtilsTest do
end
end
describe "filepaths_reference_same_file?/2" do
setup do
filepath = FilesystemUtils.generate_metadata_tmpfile(:json)
on_exit(fn -> File.rm!(filepath) end)
{:ok, %{filepath: filepath}}
end
test "returns true if the files are the same", %{filepath: filepath} do
assert FilesystemUtils.filepaths_reference_same_file?(filepath, filepath)
end
test "returns true if different filepaths point to the same file", %{filepath: filepath} do
short_path = Path.expand(filepath)
long_path = Path.join(["/tmp", "..", filepath])
assert short_path != long_path
assert FilesystemUtils.filepaths_reference_same_file?(short_path, long_path)
end
test "returns true if the files are symlinked", %{filepath: filepath} do
tmpfile_directory = Application.get_env(:pinchflat, :tmpfile_directory)
other_filepath = Path.join([tmpfile_directory, "symlink.json"])
:ok = File.ln_s!(filepath, other_filepath)
assert FilesystemUtils.filepaths_reference_same_file?(filepath, other_filepath)
File.rm!(other_filepath)
end
test "returns false if the files are different", %{filepath: filepath} do
other_filepath = FilesystemUtils.generate_metadata_tmpfile(:json)
refute FilesystemUtils.filepaths_reference_same_file?(filepath, other_filepath)
File.rm!(other_filepath)
end
end
describe "generate_metadata_tmpfile/1" do
test "creates a tmpfile and returns its path" do
res = FilesystemUtils.generate_metadata_tmpfile(:json)

View file

@ -0,0 +1,31 @@
defmodule Pinchflat.Utils.MapUtilsTest do
use Pinchflat.DataCase
alias Pinchflat.Utils.MapUtils
describe "from_nested_list/1" do
test "creates a map from a nested 2-element tuple list" do
list = [
{"key1", "value1"},
{"key2", "value2"}
]
assert MapUtils.from_nested_list(list) == %{
"key1" => "value1",
"key2" => "value2"
}
end
test "creates a map from a nested 2-element list of lists" do
list = [
["key1", "value1"],
["key2", "value2"]
]
assert MapUtils.from_nested_list(list) == %{
"key1" => "value1",
"key2" => "value2"
}
end
end
end

View file

@ -0,0 +1,36 @@
1
00:00:00,000 --> 00:00:02,500
Welcome to the Example Subtitle File!
2
00:00:03,000 --> 00:00:06,000
This is a demonstration of SRT subtitles.
3
00:00:07,000 --> 00:00:10,500
You can use SRT files to add subtitles to your videos.
4
00:00:12,000 --> 00:00:15,000
Each subtitle entry consists of a number, a timecode,
and the subtitle text.
5
00:00:16,000 --> 00:00:20,000
The timecode format is hours:minutes:seconds,milliseconds.
6
00:00:21,000 --> 00:00:25,000
You can adjust the timing to match your video.
7
00:00:26,000 --> 00:00:30,000
Make sure the subtitle text is clear and readable.
8
00:00:31,000 --> 00:00:35,000
And that's how you create an SRT subtitle file!
9
00:00:36,000 --> 00:00:40,000
Enjoy adding subtitles to your videos!

View file

@ -73,16 +73,19 @@ defmodule Pinchflat.MediaFixtures do
"#{:rand.uniform(1_000_000)}"
])
stored_media_filepath = Path.join(base_dir, "#media.mp4")
stored_media_filepath = Path.join(base_dir, "media.mp4")
thumbnail_filepath = Path.join(base_dir, "thumbnail.jpg")
subtitle_filepath = Path.join(base_dir, "subtitle.en.srt")
FilesystemUtils.cp_p!(media_filepath_fixture(), stored_media_filepath)
FilesystemUtils.cp_p!(thumbnail_filepath_fixture(), thumbnail_filepath)
FilesystemUtils.cp_p!(subtitle_filepath_fixture(), subtitle_filepath)
merged_attrs =
Map.merge(attrs, %{
media_filepath: stored_media_filepath,
thumbnail_filepath: thumbnail_filepath
thumbnail_filepath: thumbnail_filepath,
subtitle_filepaths: [["en", subtitle_filepath]]
})
media_item_fixture(merged_attrs)
@ -124,6 +127,16 @@ defmodule Pinchflat.MediaFixtures do
])
end
def subtitle_filepath_fixture do
Path.join([
File.cwd!(),
"test",
"support",
"files",
"subtitle.srt"
])
end
def infojson_filepath_fixture do
Path.join([
File.cwd!(),