From 988f488308263978bcf7561007cd5c044e4c3c66 Mon Sep 17 00:00:00 2001 From: Brandon Haney <121782102+Brandon-Haney@users.noreply.github.com> Date: Fri, 5 Jun 2026 16:42:03 -0500 Subject: [PATCH] Scope subtitle/sidecar caching to the owning video in shared folders MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Associated-file discovery decided whether siblings belonged to one video based on how many videos were being cached from a directory, not how many physically exist in it. In a flat folder holding many movies, caching one movie pulled in every other movie's subtitles, which then churned back to the array on the next run. get_media_siblings_grouped() now scans the directory for all physical videos (including .plexcached backups) and gates on that count: - One physical video (per-movie folder): unchanged fast path — every sibling belongs to it, including subtitles named without the video's quality suffix. - Multiple physical videos: assign each sibling to its owning video by boundary-aware name-prefix match. Siblings owned by a video that is not being cached are skipped, and orphan subtitles (no matching video) are skipped rather than handed to the first video, since a subtitle belongs to exactly one video. Generic shared assets (poster.jpg) still go to the first cached video. name_matches_video_stem() requires a non-alphanumeric boundary after the stem so "Movie 10.en.srt" no longer matches the stem "Movie 1". Adds regression tests covering the flat-folder case, the per-movie folder with non-prefixed subtitles, the .plexcached-as-owner case, orphan subtitle skipping, and the boundary matcher. --- core/file_operations.py | 151 +++++++++++++++++++++------ tests/test_extension_free_caching.py | 112 ++++++++++++++++++++ 2 files changed, 232 insertions(+), 31 deletions(-) diff --git a/core/file_operations.py b/core/file_operations.py index 9035f415..3e8e39fb 100644 --- a/core/file_operations.py +++ b/core/file_operations.py @@ -115,6 +115,31 @@ def is_directory_level_file(filepath: str, parent_video: str) -> bool: return not os.path.basename(filepath).startswith(video_base) +def name_matches_video_stem(name: str, stem: str) -> bool: + """Check if a sibling filename belongs to a video by its stem (boundary-aware). + + A sidecar belongs to a video when its name is the video stem followed by a + separator and the rest (e.g. ``.en.srt``, ``-fanart.jpg``, + ``.nfo``). A bare ``str.startswith()`` is not enough — it would match + ``"Movie 10.en.srt"`` against the stem ``"Movie 1"``. We require the character + immediately after the stem to be a non-alphanumeric boundary so that two + movies sharing a prefix in a flat directory don't steal each other's + subtitles. + + Args: + name: The sibling file's basename. + stem: A video file's stem (basename without extension). + + Returns: + True if ``name`` is a sidecar of the video identified by ``stem``. + """ + if not name.startswith(stem): + return False + if len(name) == len(stem): + return True + return not name[len(stem)].isalnum() + + def is_season_like_folder(folder_name: str) -> bool: """Check if a folder name looks like a TV season directory. @@ -2839,11 +2864,18 @@ def get_media_siblings_grouped(self, media_files: List[str], files_to_skip: Opti Discovers all non-video, non-hidden files in the same directory as each video. This includes subtitles, artwork, NFOs, and any other sidecar files. - When multiple videos share a directory (e.g., 4K + 1080p versions), siblings - are assigned by name-prefix matching: a file named "Movie - [1080P]-FGT-fanart.jpg" - is assigned to "Movie - [1080P]-FGT.mkv", not to "Movie - [2160P]-REMUX.mkv". - Siblings that don't match any video's stem are assigned to the first video - in the directory. + Whether a sibling is shared by the folder or owned by one video is decided + by how many videos *physically* exist in the directory, not how many are + being cached: + + - One physical video (a per-movie folder) → every sibling belongs to it, + including subtitles named without the video's quality suffix. + - Multiple physical videos (4K + 1080p versions, or a flat library folder + holding many movies) → each sibling is assigned to its owning video by + boundary-aware name-prefix matching. A sibling owned by a video that + isn't being cached is skipped, so caching one movie does not drag in + every other movie's subtitles (#182). Siblings that match no video + (e.g. a shared "poster.jpg") go to the first cached video. Args: media_files: List of media file paths. @@ -2874,40 +2906,53 @@ def get_media_siblings_grouped(self, media_files: List[str], files_to_skip: Opti if not os.path.exists(directory_path): continue - # Get all non-video siblings in this directory once - all_siblings = self._find_sibling_files(directory_path, videos[0]) - # _find_sibling_files excludes the passed video, so re-add filtering for all videos - video_basenames = {os.path.basename(v) for v in videos} - all_siblings = [s for s in all_siblings if os.path.basename(s) not in video_basenames] + # Scan the directory once: count ALL physical videos present (not just + # the ones being cached) and collect non-video sibling files. + video_stems_in_dir, all_siblings = self._scan_directory(directory_path) - if len(videos) == 1: - # Single video in directory — all siblings belong to it (fast path) - result[videos[0]] = all_siblings + if len(video_stems_in_dir) <= 1: + # Per-movie folder — the only video here is the one we're caching, + # so every sibling belongs to it (fast path). Subtitles named + # without the video's quality suffix (e.g. "English.srt") are + # correctly grabbed here. + result[videos[0]] = list(all_siblings) for sib in all_siblings: logging.debug(f"Sibling found: {sib}") else: - # Multiple videos — assign siblings by name-prefix matching - video_stems = {v: os.path.splitext(os.path.basename(v))[0] for v in videos} - unmatched = [] + # Flat/shared folder — multiple movies live together. Assign each + # sibling to its owning video by boundary-aware name-prefix match. + # A sidecar owned by a video that ISN'T being cached is skipped, so + # one movie no longer drags in every other movie's subtitles (#182). + cached_stem_to_path = { + os.path.splitext(os.path.basename(v))[0]: v for v in videos + } + # Longest stem first so a more specific video wins over a shorter + # prefix sibling (e.g. "Star.Trek.V…" beats "Star.Trek"). + sorted_stems = sorted(video_stems_in_dir, key=len, reverse=True) for sib_path in all_siblings: sib_name = os.path.basename(sib_path) - matched_video = None - for video, stem in video_stems.items(): - if sib_name.startswith(stem): - matched_video = video - break - if matched_video: - result[matched_video].append(sib_path) - logging.debug(f"Sibling found: {sib_path} → {os.path.basename(matched_video)}") + owner_stem = next( + (s for s in sorted_stems if name_matches_video_stem(sib_name, s)), + None, + ) + if owner_stem is None: + if is_subtitle_file(sib_path): + # A subtitle is owned by exactly one video, never shared. + # If no video here owns it, the accompanying video isn't + # being cached, so don't attach it to an unrelated movie (#182). + logging.debug(f"Skipping orphan subtitle (no owning video being cached): {sib_path}") + continue + # Non-subtitle directory-level shared asset (poster.jpg, + # fanart.jpg). Assign to the first cached video. + result[videos[0]].append(sib_path) + logging.debug(f"Sibling found (shared, assigned to {os.path.basename(videos[0])}): {sib_path}") + elif owner_stem in cached_stem_to_path: + owner = cached_stem_to_path[owner_stem] + result[owner].append(sib_path) + logging.debug(f"Sibling found: {sib_path} → {os.path.basename(owner)}") else: - unmatched.append(sib_path) - - # Assign unmatched siblings (e.g., generic "poster.jpg") to first video - if unmatched: - result[videos[0]].extend(unmatched) - for sib in unmatched: - logging.debug(f"Sibling found (unmatched, assigned to {os.path.basename(videos[0])}): {sib}") + logging.debug(f"Skipping sibling owned by non-cached video '{owner_stem}': {sib_path}") # TV show root scan: if any video is in a Season-like folder, # also discover show-root assets (poster.jpg, fanart.jpg, etc.) @@ -2961,6 +3006,50 @@ def get_media_subtitles(self, media_files: List[str], files_to_skip: Optional[Se all_files.extend(subs) return all_files + def _scan_directory(self, directory_path: str) -> Tuple[Set[str], List[str]]: + """Scan a directory once, partitioning entries into video stems and siblings. + + Counts *all* physical videos in the directory (including ``.plexcached`` + backups of previously-cached videos), not just the ones being cached. + This lets the caller tell a per-movie folder (one physical video) from a + flat/shared folder (many physical videos) and assign sidecars to the + right owner. + + Args: + directory_path: Directory to scan. + + Returns: + (video_stems, sibling_paths) where video_stems is the set of distinct + video stems physically present, and sibling_paths are full paths to + non-video, non-hidden, non-.plexcached files. + """ + video_stems: Set[str] = set() + sibling_paths: List[str] = [] + try: + for entry in os.scandir(directory_path): + if not entry.is_file(): + continue + name = entry.name + if name.startswith('.'): + continue + if name.endswith(PLEXCACHED_EXTENSION): + # Array backup of a cached file — never a sibling. If it backs + # a video, its stem still counts as a physical video present. + underlying = name[:-len(PLEXCACHED_EXTENSION)] + if is_video_file(underlying): + video_stems.add(os.path.splitext(underlying)[0]) + continue + if is_video_file(name): + video_stems.add(os.path.splitext(name)[0]) + else: + sibling_paths.append(entry.path) + except PermissionError as e: + logging.error(f"Cannot access directory {directory_path}. Permission denied. {type(e).__name__}: {e}") + except OSError as e: + logging.error(f"Cannot access directory {directory_path}. {type(e).__name__}: {e}") + + return video_stems, sibling_paths + def _find_sibling_files(self, directory_path: str, file: str) -> List[str]: """Find all non-video, non-hidden sibling files in a directory. diff --git a/tests/test_extension_free_caching.py b/tests/test_extension_free_caching.py index f829bc9b..540f5984 100644 --- a/tests/test_extension_free_caching.py +++ b/tests/test_extension_free_caching.py @@ -23,6 +23,7 @@ WatchlistTracker, is_video_file, is_directory_level_file, + name_matches_video_stem, is_season_like_folder, _get_file_category, find_matching_plexcached, @@ -241,6 +242,117 @@ def test_multi_version_each_has_own_siblings(self, temp_dir): assert sub_4k not in result[video_1080] assert sub_1080 not in result[video_4k] + def test_flat_folder_does_not_grab_other_movies_subtitles(self, temp_dir): + """A flat library folder: caching one movie must not drag in other movies' + subtitles (#182). Seeded from the actual issue screenshots.""" + cached = create_test_file( + os.path.join(temp_dir, "Deadwood - The Movie (2019) 1080p.x265rf22FAST.10bit.mp4"), "v" + ) + cached_sub = create_test_file( + os.path.join(temp_dir, "Deadwood - The Movie (2019) 1080p.x265rf22FAST.10bit.en.srt"), "s" + ) + # Other movies that physically share the flat folder but are NOT being cached. + other_movies = [ + "The Revenant 2015 1080p BluRay x264 DTS-JYK", + "Bombshell.2019.1080p.WEBRip.x264.AAC5.1-[YTS.MX]", + "12 Years A Slave 2013 1080p", + "The.Last.of.the.Mohicans.DC.1992.1080p.BrRip.x264.YIFY", + "Poor.Things.2023.1080p.WEBRip.x265.10bit.AAC5.1-[YTS.MX]", + ] + other_subs = [] + for stem in other_movies: + create_test_file(os.path.join(temp_dir, stem + ".mkv"), "v") + other_subs.append(create_test_file(os.path.join(temp_dir, stem + ".srt"), "s")) + + finder = SiblingFileFinder() + result = finder.get_media_siblings_grouped([cached]) + + # The cached movie keeps only its own subtitle... + assert cached_sub in result[cached] + # ...and none of the unrelated movies' subtitles ride along. + for sub in other_subs: + assert sub not in result[cached] + + def test_flat_folder_subtitles_only_excludes_orphans(self, temp_dir): + """Subtitles-only mode in a flat folder: only the cached movie's SRT.""" + cached = create_test_file(os.path.join(temp_dir, "Toy.Story.1995.PROPER.1080p.BluRay.x265-RARBG.mkv"), "v") + cached_sub = create_test_file(os.path.join(temp_dir, "Toy.Story.1995.PROPER.1080p.BluRay.x265-RARBG.srt"), "s") + create_test_file(os.path.join(temp_dir, "The.Pianist.2002.1080p.BluRay.H264.AAC-RARBG.mkv"), "v") + orphan_sub = create_test_file(os.path.join(temp_dir, "The.Pianist.2002.1080p.BluRay.H264.AAC-RARBG.srt"), "s") + + finder = SiblingFileFinder() + result = finder.get_media_subtitles_grouped([cached]) + + assert cached_sub in result[cached] + assert orphan_sub not in result[cached] + + def test_single_movie_folder_keeps_non_prefixed_subtitle(self, temp_dir): + """Standard per-movie folder: a subtitle without the video's quality suffix + (e.g. 'English.srt') is still cached — the fix must not regress this.""" + video = create_test_file(os.path.join(temp_dir, "Movie (2012) [Bluray-1080p].mkv"), "v") + bare_sub = create_test_file(os.path.join(temp_dir, "English.srt"), "s") + named_sub = create_test_file(os.path.join(temp_dir, "Movie (2012).en.srt"), "s") + + finder = SiblingFileFinder() + result = finder.get_media_siblings_grouped([video]) + + assert bare_sub in result[video] + assert named_sub in result[video] + + def test_flat_folder_counts_plexcached_video_as_owner(self, temp_dir): + """A previously-cached movie's .plexcached backup still counts as a physical + video, so its subtitle is not grabbed by the movie being cached now.""" + cached = create_test_file(os.path.join(temp_dir, "Movie A (2020).mkv"), "v") + # Movie B's video lives only as an array backup (it was cached earlier). + create_test_file(os.path.join(temp_dir, "Movie B (2021).mkv.plexcached"), "v") + orphan_sub = create_test_file(os.path.join(temp_dir, "Movie B (2021).en.srt"), "s") + + finder = SiblingFileFinder() + result = finder.get_media_siblings_grouped([cached]) + + assert orphan_sub not in result[cached] + + def test_flat_folder_orphan_subtitle_is_skipped(self, temp_dir): + """In a flat folder, a subtitle that matches no cached video must not be + handed to an unrelated movie — even if its own video isn't name-matchable.""" + cached = create_test_file(os.path.join(temp_dir, "Movie A (2020).mkv"), "v") + # A second movie shares the folder, plus a generically-named orphan sub + # whose owning video can't be identified by name. + create_test_file(os.path.join(temp_dir, "Movie B (2021).mkv"), "v") + orphan_sub = create_test_file(os.path.join(temp_dir, "English.srt"), "s") + + finder = SiblingFileFinder() + result = finder.get_media_siblings_grouped([cached]) + + assert orphan_sub not in result[cached] + + def test_flat_folder_shared_artwork_still_assigned(self, temp_dir): + """A non-subtitle shared asset (poster.jpg) with no name match is still + assigned to the first cached video, even in a multi-video folder.""" + cached = create_test_file(os.path.join(temp_dir, "Movie A (2020).mkv"), "v") + create_test_file(os.path.join(temp_dir, "Movie B (2021).mkv"), "v") + poster = create_test_file(os.path.join(temp_dir, "poster.jpg"), "img") + + finder = SiblingFileFinder() + result = finder.get_media_siblings_grouped([cached]) + + assert poster in result[cached] + + +class TestNameMatchesVideoStem: + def test_exact_stem_with_extension(self): + assert name_matches_video_stem("Movie (2012).en.srt", "Movie (2012)") + + def test_dash_separated_sidecar(self): + assert name_matches_video_stem("Movie - [1080P]-FGT-fanart.jpg", "Movie - [1080P]-FGT") + + def test_rejects_alphanumeric_boundary(self): + # "Movie 10..." must not match the stem "Movie 1" (#96-style collision). + assert not name_matches_video_stem("Movie 10.en.srt", "Movie 1") + + def test_non_matching_prefix(self): + assert not name_matches_video_stem("The Revenant 2015.srt", "Deadwood - The Movie (2019)") + # ============================================================ # CacheTimestampTracker migration tests