From 5f3afe6d28e7b25c73063d4ffa918f12d51a56f8 Mon Sep 17 00:00:00 2001 From: Sasa Junuzovic <44276455+microsasa@users.noreply.github.com> Date: Sun, 19 Apr 2026 00:05:37 -0700 Subject: [PATCH 1/2] test(vscode_parser): add unit tests for per-file summary cache LRU and explicit file_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two new test classes to cover untested branches identified in #807: - TestPerFileSummaryCacheLRU: exercises lru_insert with Path keys and _CachedFileSummary values, verifying insert below capacity, eviction when full, update moves to MRU position, and update does not grow cache. - TestGetCachedVscodeRequestsExplicitFileId: tests the explicit file_id parameter path in _get_cached_vscode_requests, verifying that file_id=None populates cache with None, None→real id is a cache miss, and passing a real file_id avoids redundant safe_file_identity calls. Closes #807 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/copilot_usage/test_vscode_parser.py | 163 +++++++++++++++++++++- 1 file changed, 162 insertions(+), 1 deletion(-) diff --git a/tests/copilot_usage/test_vscode_parser.py b/tests/copilot_usage/test_vscode_parser.py index 85cdfdee..7af4157f 100644 --- a/tests/copilot_usage/test_vscode_parser.py +++ b/tests/copilot_usage/test_vscode_parser.py @@ -5,6 +5,7 @@ import dataclasses import os import re +from collections import OrderedDict from datetime import UTC, date, datetime from pathlib import Path from unittest.mock import patch @@ -13,7 +14,7 @@ from click.testing import CliRunner from loguru import logger -from copilot_usage._fs_utils import safe_file_identity +from copilot_usage._fs_utils import lru_insert, safe_file_identity from copilot_usage.cli import main from copilot_usage.vscode_parser import ( _CCREQ_RE, @@ -25,6 +26,7 @@ VSCodeLogSummary, VSCodeRequest, _cached_discover_vscode_logs, + _CachedFileSummary, _default_log_candidates, _get_cached_vscode_requests, _merge_partial, @@ -2075,6 +2077,165 @@ def test_oldest_evicted_newest_survives(self, tmp_path: Path) -> None: assert len(_PER_FILE_SUMMARY_CACHE) == limit +# --------------------------------------------------------------------------- +# Per-file summary cache LRU semantics (unit-level, exercises lru_insert +# with Path keys and _CachedFileSummary values) +# --------------------------------------------------------------------------- + + +class TestPerFileSummaryCacheLRU: + """Unit tests for the per-file summary cache LRU behaviour. + + These exercise the ``lru_insert()`` contract with the exact types + used by ``_PER_FILE_SUMMARY_CACHE`` (``Path`` keys and + ``_CachedFileSummary`` values), ensuring the delete-before-reinsert + pattern works correctly for cache refresh and eviction. + """ + + @staticmethod + def _make_entry(file_id_size: int = 100) -> _CachedFileSummary: + """Create a minimal _CachedFileSummary for testing.""" + return _CachedFileSummary( + file_id=(1_000_000_000, file_id_size), + partial=VSCodeLogSummary(), + ) + + def test_insert_below_capacity(self) -> None: + """Inserting fewer entries than max_size stores all of them.""" + cache: OrderedDict[Path, _CachedFileSummary] = OrderedDict() + max_size = 3 + paths = [Path(f"/fake/log{i}.log") for i in range(max_size)] + for p in paths: + lru_insert(cache, p, self._make_entry(), max_size) + assert len(cache) == max_size + for p in paths: + assert p in cache + + def test_eviction_when_full(self) -> None: + """Inserting max_size + 1 entries evicts the oldest (LRU) entry.""" + cache: OrderedDict[Path, _CachedFileSummary] = OrderedDict() + max_size = 3 + paths = [Path(f"/fake/log{i}.log") for i in range(max_size + 1)] + for p in paths: + lru_insert(cache, p, self._make_entry(), max_size) + assert len(cache) == max_size + assert paths[0] not in cache + assert paths[-1] in cache + + def test_update_existing_key_moves_to_mru(self) -> None: + """Updating an existing key repositions it to the MRU end.""" + cache: OrderedDict[Path, _CachedFileSummary] = OrderedDict() + max_size = 3 + p_a = Path("/fake/a.log") + p_b = Path("/fake/b.log") + p_c = Path("/fake/c.log") + lru_insert(cache, p_a, self._make_entry(100), max_size) + lru_insert(cache, p_b, self._make_entry(200), max_size) + lru_insert(cache, p_c, self._make_entry(300), max_size) + # Update "a" — it should move to the end (MRU position) + updated_entry = self._make_entry(999) + lru_insert(cache, p_a, updated_entry, max_size) + assert list(cache.keys()) == [p_b, p_c, p_a] + assert cache[p_a] is updated_entry + + def test_update_existing_key_does_not_grow_cache(self) -> None: + """Replacing an existing key keeps len(cache) unchanged.""" + cache: OrderedDict[Path, _CachedFileSummary] = OrderedDict() + max_size = 2 + p_a = Path("/fake/a.log") + p_b = Path("/fake/b.log") + lru_insert(cache, p_a, self._make_entry(100), max_size) + lru_insert(cache, p_b, self._make_entry(200), max_size) + assert len(cache) == max_size + # Replace "a" — cache size must not grow + lru_insert(cache, p_a, self._make_entry(999), max_size) + assert len(cache) == max_size + + +# --------------------------------------------------------------------------- +# _get_cached_vscode_requests with explicit file_id parameter +# --------------------------------------------------------------------------- + + +class TestGetCachedVscodeRequestsExplicitFileId: + """Tests for _get_cached_vscode_requests when an explicit file_id is passed. + + The function accepts an optional pre-computed ``file_id`` to avoid a + redundant ``stat()`` call. These tests verify: + - ``file_id=None`` populates the cache with ``file_id=None`` + - A ``None`` → real ``(mtime_ns, size)`` transition is a cache miss + - Passing a real ``file_id`` avoids calling ``safe_file_identity`` + """ + + def test_none_file_id_populates_cache_with_none(self, tmp_path: Path) -> None: + """Calling with file_id=None stores None as the cached file_id.""" + log_file = tmp_path / "chat.log" + log_file.write_text(_make_log_line(req_idx=0)) + + _get_cached_vscode_requests(log_file, file_id=None) + + cached = _VSCODE_LOG_CACHE.get(log_file) + assert cached is not None + assert cached.file_id is None + + def test_none_to_real_file_id_invalidates_cache(self, tmp_path: Path) -> None: + """A cached entry with file_id=None is a miss when a real id is passed.""" + log_file = tmp_path / "chat.log" + log_file.write_text(_make_log_line(req_idx=0)) + + # First call: file_id=None → cache stores None + result_first = _get_cached_vscode_requests(log_file, file_id=None) + assert _VSCODE_LOG_CACHE[log_file].file_id is None + + # Second call: real file_id → cache miss, re-parses + real_id = safe_file_identity(log_file) + assert real_id is not None + with patch( + "copilot_usage.vscode_parser._parse_vscode_log_from_offset", + wraps=_parse_vscode_log_from_offset, + ) as spy: + result_second = _get_cached_vscode_requests(log_file, file_id=real_id) + assert spy.call_count == 1 + + # Cache now stores the real file_id + cached = _VSCODE_LOG_CACHE[log_file] + assert cached.file_id is not None + # Results should be equivalent (same file content) + assert len(result_first) == len(result_second) + + def test_real_file_id_avoids_extra_stat_call(self, tmp_path: Path) -> None: + """Passing a pre-computed file_id bypasses safe_file_identity.""" + log_file = tmp_path / "chat.log" + log_file.write_text(_make_log_line(req_idx=0)) + + real_id = safe_file_identity(log_file) + assert real_id is not None + + with patch( + "copilot_usage.vscode_parser.safe_file_identity", + wraps=safe_file_identity, + ) as stat_spy: + _get_cached_vscode_requests(log_file, file_id=real_id) + # safe_file_identity may be called post-parse to snapshot the + # stored identity, but NOT for the initial resolved_id path + # when file_id != _FILE_ID_UNSET. + # The initial resolution must skip — verify by checking the + # cached entry uses our pre-computed id. + cached = _VSCODE_LOG_CACHE[log_file] + # The stored id should be based on our provided real_id (or a + # post-parse snapshot that equals it since the file didn't change). + assert cached.file_id == real_id or cached.file_id is not None + + # Verify a second call with the same file_id hits the cache + # without calling safe_file_identity at all. + with patch( + "copilot_usage.vscode_parser.safe_file_identity", + wraps=safe_file_identity, + ) as stat_spy: + _get_cached_vscode_requests(log_file, file_id=real_id) + stat_spy.assert_not_called() + + # --------------------------------------------------------------------------- # Per-file summary cache survives beyond the request-cache limit # --------------------------------------------------------------------------- From 33413905ccc0b27da2d7502b5528854949c049d2 Mon Sep 17 00:00:00 2001 From: Sasa Junuzovic <44276455+microsasa@users.noreply.github.com> Date: Sun, 19 Apr 2026 00:14:46 -0700 Subject: [PATCH 2/2] fix: address review comments - Use stat_spy binding in first patch block (assert call_count == 1) to avoid Ruff F841 and document post-parse stat behavior - Tighten weak assertion from 'cached.file_id == real_id or cached.file_id is not None' to 'cached.file_id == real_id' Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/copilot_usage/test_vscode_parser.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/copilot_usage/test_vscode_parser.py b/tests/copilot_usage/test_vscode_parser.py index 7af4157f..250a5f34 100644 --- a/tests/copilot_usage/test_vscode_parser.py +++ b/tests/copilot_usage/test_vscode_parser.py @@ -2216,15 +2216,14 @@ def test_real_file_id_avoids_extra_stat_call(self, tmp_path: Path) -> None: wraps=safe_file_identity, ) as stat_spy: _get_cached_vscode_requests(log_file, file_id=real_id) - # safe_file_identity may be called post-parse to snapshot the + # safe_file_identity is called once post-parse to snapshot the # stored identity, but NOT for the initial resolved_id path # when file_id != _FILE_ID_UNSET. - # The initial resolution must skip — verify by checking the - # cached entry uses our pre-computed id. + assert stat_spy.call_count == 1 cached = _VSCODE_LOG_CACHE[log_file] - # The stored id should be based on our provided real_id (or a - # post-parse snapshot that equals it since the file didn't change). - assert cached.file_id == real_id or cached.file_id is not None + # The stored id should equal our provided real_id since the + # file did not change during the test. + assert cached.file_id == real_id # Verify a second call with the same file_id hits the cache # without calling safe_file_identity at all.