From 752ab309ddfce41f1c8cea1239f9b61f7d18548f Mon Sep 17 00:00:00 2001 From: Brandon Haney <121782102+Brandon-Haney@users.noreply.github.com> Date: Mon, 25 May 2026 20:17:43 -0500 Subject: [PATCH] Fix #169: keep TV episodes contiguous when cache space is tight Two layers were causing the "S04E03 skipped, S04E04+E05 cached" gap pattern when episode sizes varied within a show (e.g. 4K vs SDR mix). 1. Ordering was lost before space-fit. _gather_media_to_cache collected paths into a set(), so OnDeck "current + next N episodes" arrived at _apply_cache_limit in arbitrary hash-bucket order. Switched to a dict-as-ordered-set so insertion order survives: pinned, then OnDeck in fetch order, then siblings, then watchlist. 2. The space-fit loop in _apply_cache_limit was greedy first-fit, so a large episode would be skipped and smaller later episodes of the same show would be packed instead, creating viewing gaps. Now tracks shows whose sequence broke at the space boundary and skips every later file mapped to that show via media_info_map. Movies and other shows still pack independently. Added TestNoGapShowOrdering in tests/test_plexcache_quota.py covering the gap scenario, mid-sequence breaks, cross-show independence, the no-metadata fallback, and the all-fit happy path. --- core/app.py | 75 +++++++++++++---- tests/test_plexcache_quota.py | 152 ++++++++++++++++++++++++++++++++++ 2 files changed, 213 insertions(+), 14 deletions(-) diff --git a/core/app.py b/core/app.py index 2ca9ce37..f670ad60 100644 --- a/core/app.py +++ b/core/app.py @@ -1020,8 +1020,12 @@ def _process_media(self) -> None: logging.info("") logging.info("--- Fetching Media ---") - # Use a set to collect already-modified paths (real source paths) - modified_paths_set = set() + # Ordered dedup of already-modified paths (real source paths). Dict + # preserves insertion order (Python 3.7+), so OnDeck "current + next N + # episodes" survives all the way to _apply_cache_limit. Using a plain + # set() here drops that order and lets the space-fit loop create + # viewing gaps (e.g. cache S04E04+E05 while skipping S04E03). See #169. + modified_paths: Dict[str, None] = {} # Prepare OnDeck tracker for new run (preserves first_seen for retention tracking) # Snapshot the rating_key index before update loop for upgrade detection @@ -1033,7 +1037,7 @@ def _process_media(self) -> None: # a pinned item is also currently on someone's OnDeck. The protection that # consumes pinned_paths_cache lands in subsequent commits (priority manager # in 2b, eviction/move-back in 2c) — this commit only sets up the gathering. - self._process_pinned_media(modified_paths_set) + self._process_pinned_media(modified_paths) if self.should_stop: logging.info("Operation stopped during media processing") @@ -1109,9 +1113,13 @@ def _process_media(self) -> None: # Cleanup entries no longer on any user's OnDeck self.ondeck_tracker.cleanup_unseen() - # Store modified OnDeck items for filtering later + # Store modified OnDeck items for filtering later. self.ondeck_items + # stays a set for fast membership tests elsewhere, but the ordered + # `modified_ondeck` list is what feeds modified_paths so episode + # fetch order (current → next N) is preserved for #169. self.ondeck_items = set(modified_ondeck) - modified_paths_set.update(self.ondeck_items) + for path in modified_ondeck: + modified_paths.setdefault(path, None) # Update priority manager with active ondeck cache paths (for episode position scoring) # When retention is enabled, expired items should not get episode position bonuses @@ -1145,7 +1153,8 @@ def _process_media(self) -> None: sibling_count = sum(len(siblings) for siblings in ondeck_sibling_map.values()) # Add all siblings to the modified paths set for siblings in ondeck_sibling_map.values(): - modified_paths_set.update(siblings) + for sibling in siblings: + modified_paths.setdefault(sibling, None) logging.debug(f"Found {sibling_count} sibling files for OnDeck media") # Track source for OnDeck siblings @@ -1165,7 +1174,8 @@ def _process_media(self) -> None: if watchlist_items: # Store watchlist items (don't override ondeck source for items in both) self.watchlist_items = watchlist_items - modified_paths_set.update(watchlist_items) + for item in watchlist_items: + modified_paths.setdefault(item, None) # Track source for watchlist items (only if not already tracked as ondeck) for item in watchlist_items: @@ -1178,7 +1188,7 @@ def _process_media(self) -> None: # Run modify_file_paths on all collected paths to ensure consistent path format logging.debug("Finalizing media to cache list...") - self.media_to_cache = self.file_path_modifier.modify_file_paths(list(modified_paths_set)) + self.media_to_cache = self.file_path_modifier.modify_file_paths(list(modified_paths)) # Log consolidated summary of skipped disabled libraries self.file_path_modifier.log_disabled_skips_summary() @@ -1231,7 +1241,7 @@ def _process_media(self) -> None: self.file_filter.set_media_info_map(self.media_info_map) self._check_files_to_move_back_to_array() - def _process_pinned_media(self, modified_paths_set: set) -> None: + def _process_pinned_media(self, modified_paths: Dict[str, None]) -> None: """Resolve pinned media to real-path file set and merge into the run. This runs before OnDeck/Watchlist so pinned items "win" the source_map @@ -1245,7 +1255,8 @@ def _process_pinned_media(self, modified_paths_set: set) -> None: self.pinned_paths_cache — cache-form paths self.sibling_map — sibling files for pinned episodes/movies self.source_map — path → "pinned" entries - modified_paths_set — extended with pinned paths + siblings + modified_paths — extended with pinned paths + siblings + (dict-as-ordered-set, preserves order) Orphan pins (rating_keys no longer resolvable in Plex) are removed from the tracker and logged. @@ -1299,7 +1310,10 @@ def _process_pinned_media(self, modified_paths_set: set) -> None: modified_pinned = self.file_path_modifier.modify_file_paths(pinned_plex_paths) pinned_video_paths = set(modified_pinned) - modified_paths_set.update(pinned_video_paths) + # Preserve insertion order from `modified_pinned` (the list form) so + # pinned items keep their priority position in the cache fill queue. + for path in modified_pinned: + modified_paths.setdefault(path, None) # Mark every pinned video real path in the source_map FIRST — so OnDeck # and Watchlist's "if not already tracked" guards don't overwrite us. @@ -1327,7 +1341,7 @@ def _process_pinned_media(self, modified_paths_set: set) -> None: pinned_sibling_paths: Set[str] = set() for siblings in pinned_sibling_map.values(): for sibling in siblings: - modified_paths_set.add(sibling) + modified_paths.setdefault(sibling, None) self.source_map[sibling] = "pinned" pinned_sibling_paths.add(sibling) @@ -2166,7 +2180,16 @@ def _apply_cache_limit(self, media_files: List[str], cache_dir: str) -> List[str """Apply cache size limit, min free space, and plexcache quota, filtering out files that would exceed limits. Returns the list of files that fit within the most restrictive constraint. - Files are prioritized in the order they appear (OnDeck items should come first). + Files are processed in the order they appear (OnDeck "current + next N + episodes" first, then watchlist). Callers are responsible for + delivering an ordered list — _gather_media_to_cache uses a + dict-as-ordered-set to preserve OnDeck fetch order. See #169. + + No-gap guarantee for TV: once an episode of show X doesn't fit, every + subsequent file mapped to show X via media_info_map is skipped too. + This prevents the "S04E03 skipped, E04+E05 cached" gap pattern when + episode sizes vary within a show (e.g. 4K vs SDR mix). Movies and + other shows continue to pack independently into remaining space. """ cache_limit_bytes, limit_readable = self._get_effective_cache_limit(cache_dir) min_free_bytes, min_free_readable = self._get_effective_min_free_space(cache_dir) @@ -2238,16 +2261,37 @@ def _apply_cache_limit(self, media_files: List[str], cache_dir: str) -> List[str skipped_count = 0 skipped_size = 0 inaccessible_files = [] + # Shows whose sequence broke at the space boundary — used to skip + # later episodes of the same show and avoid viewing gaps (#169). + shows_at_capacity: Set[str] = set() + gap_skipped_count = 0 + media_info_map = getattr(self, 'media_info_map', {}) or {} for file in media_files: try: + info = media_info_map.get(file) or {} + episode_info = info.get('episode_info') or {} + show_key = episode_info.get('show') + file_size = os.path.getsize(file) + + # Earlier episode of this show didn't fit — don't cache later + # ones either, even if they'd individually fit. Prevents the + # "skip S04E03 but cache E04/E05" gap (#169). + if show_key and show_key in shows_at_capacity: + skipped_count += 1 + skipped_size += file_size + gap_skipped_count += 1 + continue + if file_size <= available_space: files_to_cache.append(file) available_space -= file_size else: skipped_count += 1 skipped_size += file_size + if show_key: + shows_at_capacity.add(show_key) except (OSError, FileNotFoundError) as e: # File doesn't exist or can't be accessed - track for logging inaccessible_files.append((file, str(e))) @@ -2261,7 +2305,10 @@ def _apply_cache_limit(self, media_files: List[str], cache_dir: str) -> List[str if skipped_count > 0: skipped_gb = skipped_size / (1024**3) constraint_name = quota_readable if bottleneck == "plexcache_quota" else (min_free_readable if bottleneck == "min_free_space" else limit_readable) - logging.warning(f"Cache limit reached: skipped {skipped_count} files ({skipped_gb:.2f}GB) that would exceed the {constraint_name} limit") + msg = f"Cache limit reached: skipped {skipped_count} files ({skipped_gb:.2f}GB) that would exceed the {constraint_name} limit" + if gap_skipped_count: + msg += f" ({gap_skipped_count} skipped to keep show episodes contiguous)" + logging.warning(msg) return files_to_cache diff --git a/tests/test_plexcache_quota.py b/tests/test_plexcache_quota.py index 94166a51..f1586cca 100644 --- a/tests/test_plexcache_quota.py +++ b/tests/test_plexcache_quota.py @@ -212,6 +212,158 @@ def test_all_three_constraints_quota_wins(self, tmp_path): assert len(result) == 1 +class TestNoGapShowOrdering: + """Tests for #169: episodes of the same show stay contiguous when space is tight. + + Once an episode of show X doesn't fit, every later file mapped to show X + via media_info_map is skipped — even if it would individually fit. Movies + and other shows still pack independently into the remaining space. + """ + + def _build_episode_file(self, cache_dir, show, season, episode): + """Create a placeholder file for an episode and return its path.""" + from conftest import create_test_file + path = os.path.join(cache_dir, f"{show}_S{season:02d}E{episode:02d}.mkv") + create_test_file(path, size_bytes=1024) + return path + + def test_oversized_episode_skips_later_episodes_same_show(self, tmp_path): + """S04E03 oversized → E04 and E05 skipped even though each would fit.""" + app, cache_dir, _, disk, _ = _build_app( + tmp_path, quota_bytes=210*GB, # 10GB available + ) + + e3 = self._build_episode_file(cache_dir, "ShowA", 4, 3) + e4 = self._build_episode_file(cache_dir, "ShowA", 4, 4) + e5 = self._build_episode_file(cache_dir, "ShowA", 4, 5) + files = [e3, e4, e5] + + app.media_info_map = { + e3: {"media_type": "episode", "episode_info": {"show": "ShowA", "season": 4, "episode": 3}}, + e4: {"media_type": "episode", "episode_info": {"show": "ShowA", "season": 4, "episode": 4}}, + e5: {"media_type": "episode", "episode_info": {"show": "ShowA", "season": 4, "episode": 5}}, + } + + # E03 is 15GB (won't fit in 10GB), E04+E05 are 2GB each (would fit) + sizes = {e3: 15*GB, e4: 2*GB, e5: 2*GB} + + with patch('core.app.get_disk_usage', return_value=disk), \ + patch.object(app, '_get_plexcache_tracked_size', return_value=(200*GB, [])), \ + patch('os.path.getsize', side_effect=lambda f: sizes[f]): + result = app._apply_cache_limit(files, cache_dir) + + # E03 skipped (too big) → E04, E05 skipped to keep show contiguous + assert result == [] + + def test_first_episode_fits_then_oversized_blocks_rest(self, tmp_path): + """E03 fits, E04 oversized → E05+ skipped even though they'd fit.""" + app, cache_dir, _, disk, _ = _build_app( + tmp_path, quota_bytes=220*GB, # 20GB available + ) + + e3 = self._build_episode_file(cache_dir, "ShowA", 4, 3) + e4 = self._build_episode_file(cache_dir, "ShowA", 4, 4) + e5 = self._build_episode_file(cache_dir, "ShowA", 4, 5) + files = [e3, e4, e5] + + app.media_info_map = { + e3: {"media_type": "episode", "episode_info": {"show": "ShowA", "season": 4, "episode": 3}}, + e4: {"media_type": "episode", "episode_info": {"show": "ShowA", "season": 4, "episode": 4}}, + e5: {"media_type": "episode", "episode_info": {"show": "ShowA", "season": 4, "episode": 5}}, + } + + # E03 = 5GB (fits, 15GB left), E04 = 18GB (won't fit), E05 = 2GB (would fit) + sizes = {e3: 5*GB, e4: 18*GB, e5: 2*GB} + + with patch('core.app.get_disk_usage', return_value=disk), \ + patch.object(app, '_get_plexcache_tracked_size', return_value=(200*GB, [])), \ + patch('os.path.getsize', side_effect=lambda f: sizes[f]): + result = app._apply_cache_limit(files, cache_dir) + + # E03 cached; E04 oversized; E05 skipped (same show as the gap-maker) + assert result == [e3] + + def test_other_shows_still_pack_after_one_show_at_capacity(self, tmp_path): + """A show running out of space doesn't starve other shows or movies.""" + app, cache_dir, _, disk, _ = _build_app( + tmp_path, quota_bytes=215*GB, # 15GB available + ) + + a_e3 = self._build_episode_file(cache_dir, "ShowA", 4, 3) + a_e4 = self._build_episode_file(cache_dir, "ShowA", 4, 4) + b_e1 = self._build_episode_file(cache_dir, "ShowB", 1, 1) + movie = os.path.join(cache_dir, "movie.mkv") + from conftest import create_test_file + create_test_file(movie, size_bytes=1024) + + files = [a_e3, a_e4, b_e1, movie] + + app.media_info_map = { + a_e3: {"media_type": "episode", "episode_info": {"show": "ShowA", "season": 4, "episode": 3}}, + a_e4: {"media_type": "episode", "episode_info": {"show": "ShowA", "season": 4, "episode": 4}}, + b_e1: {"media_type": "episode", "episode_info": {"show": "ShowB", "season": 1, "episode": 1}}, + movie: {"media_type": "movie", "episode_info": None}, + } + + # ShowA E03 = 20GB (won't fit), ShowA E04 = 3GB, ShowB E01 = 4GB, movie = 5GB + sizes = {a_e3: 20*GB, a_e4: 3*GB, b_e1: 4*GB, movie: 5*GB} + + with patch('core.app.get_disk_usage', return_value=disk), \ + patch.object(app, '_get_plexcache_tracked_size', return_value=(200*GB, [])), \ + patch('os.path.getsize', side_effect=lambda f: sizes[f]): + result = app._apply_cache_limit(files, cache_dir) + + # ShowA E03 skipped → ShowA E04 also skipped (gap protection). + # ShowB E01 (4GB) fits → 11GB left. Movie (5GB) fits → 6GB left. + assert result == [b_e1, movie] + + def test_files_without_show_metadata_pack_normally(self, tmp_path): + """Non-episode files (movies, siblings) without show keys keep first-fit behavior.""" + app, cache_dir, _, disk, _ = _build_app( + tmp_path, quota_bytes=215*GB, # 15GB available + ) + + big = os.path.join(cache_dir, "big.mkv") + small1 = os.path.join(cache_dir, "small1.mkv") + small2 = os.path.join(cache_dir, "small2.mkv") + from conftest import create_test_file + for p in (big, small1, small2): + create_test_file(p, size_bytes=1024) + files = [big, small1, small2] + + # No media_info_map entries → no show grouping → first-fit semantics + app.media_info_map = {} + sizes = {big: 20*GB, small1: 5*GB, small2: 5*GB} + + with patch('core.app.get_disk_usage', return_value=disk), \ + patch.object(app, '_get_plexcache_tracked_size', return_value=(200*GB, [])), \ + patch('os.path.getsize', side_effect=lambda f: sizes[f]): + result = app._apply_cache_limit(files, cache_dir) + + # Without show metadata, the greedy fit keeps the small ones + assert result == [small1, small2] + + def test_in_order_episodes_all_fit(self, tmp_path): + """When everything fits in order, no episodes are skipped.""" + app, cache_dir, _, disk, _ = _build_app( + tmp_path, quota_bytes=300*GB, # 100GB available + ) + + eps = [self._build_episode_file(cache_dir, "ShowA", 1, i) for i in range(1, 5)] + app.media_info_map = { + eps[i]: {"media_type": "episode", "episode_info": {"show": "ShowA", "season": 1, "episode": i + 1}} + for i in range(4) + } + sizes = {ep: 5*GB for ep in eps} + + with patch('core.app.get_disk_usage', return_value=disk), \ + patch.object(app, '_get_plexcache_tracked_size', return_value=(200*GB, [])), \ + patch('os.path.getsize', side_effect=lambda f: sizes[f]): + result = app._apply_cache_limit(eps, cache_dir) + + assert result == eps + + class TestPlexcacheQuotaConfig: """Tests for plexcache_quota config parsing."""