From 2cbfcd6369b0db7fad34916f0a273a7a6a7c7972 Mon Sep 17 00:00:00 2001 From: bhavana-giri Date: Fri, 3 Apr 2026 16:09:02 +0530 Subject: [PATCH 01/10] Handle missing tiktoken encoding data in api token counting --- agent_memory_server/api.py | 57 ++++++++++++++++++++++++++------- tests/test_issue_237.py | 65 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 110 insertions(+), 12 deletions(-) create mode 100644 tests/test_issue_237.py diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py index b5be13dc..85b2babf 100644 --- a/agent_memory_server/api.py +++ b/agent_memory_server/api.py @@ -54,6 +54,8 @@ logger = get_logger(__name__) +_TIKTOKEN_ENCODING_CACHE: Any | None = None +_TIKTOKEN_ENCODING_LOAD_ATTEMPTED = False router = APIRouter() @@ -102,15 +104,47 @@ def _get_effective_token_limit( def _calculate_messages_token_count(messages: list[MemoryMessage]) -> int: """Calculate total token count for a list of messages.""" - encoding = tiktoken.get_encoding("cl100k_base") - total_tokens = 0 + return sum(_estimate_message_token_count(msg) for msg in messages) + + +def _get_tiktoken_encoding() -> Any | None: + """Load the tokenizer encoding once and fall back safely if unavailable.""" + global _TIKTOKEN_ENCODING_CACHE, _TIKTOKEN_ENCODING_LOAD_ATTEMPTED + + if _TIKTOKEN_ENCODING_CACHE is not None: + return _TIKTOKEN_ENCODING_CACHE + if _TIKTOKEN_ENCODING_LOAD_ATTEMPTED: + return None + + _TIKTOKEN_ENCODING_LOAD_ATTEMPTED = True + try: + _TIKTOKEN_ENCODING_CACHE = tiktoken.get_encoding("cl100k_base") + except Exception as exc: + logger.warning( + "tiktoken encoding unavailable, using character-based token estimate", + error=str(exc), + ) + return None + + return _TIKTOKEN_ENCODING_CACHE + + +def _estimate_text_token_count(text: str) -> int: + """Estimate token count when tiktoken is unavailable.""" + return max(1, (len(text) + 3) // 4) + + +def _count_text_tokens(text: str) -> int: + """Count tokens accurately when possible, otherwise fall back to estimation.""" + encoding = _get_tiktoken_encoding() + if encoding is None: + return _estimate_text_token_count(text) + return len(encoding.encode(text)) - for msg in messages: - msg_str = f"{msg.role}: {msg.content}" - msg_tokens = len(encoding.encode(msg_str)) - total_tokens += msg_tokens - return total_tokens +def _estimate_message_token_count(message: MemoryMessage) -> int: + """Count tokens for a single working-memory message.""" + return _count_text_tokens(f"{message.role}: {message.content}") def _calculate_context_usage_percentages( @@ -250,7 +284,6 @@ async def _summarize_working_memory( buffer_tokens = min(max(230, summarization_max_tokens // 100), 1000) max_message_tokens = summarization_max_tokens - summary_max_tokens - buffer_tokens - encoding = tiktoken.get_encoding("cl100k_base") total_tokens = 0 messages_to_summarize = [] @@ -266,7 +299,7 @@ async def _summarize_working_memory( for i in range(len(memory.messages) - 1, -1, -1): msg = memory.messages[i] msg_str = f"{msg.role}: {msg.content}" - msg_tokens = len(encoding.encode(msg_str)) + msg_tokens = _count_text_tokens(msg_str) if recent_messages_tokens + msg_tokens <= target_remaining_tokens: recent_messages_tokens += msg_tokens @@ -281,12 +314,12 @@ async def _summarize_working_memory( for msg in messages_to_check: msg_str = f"{msg.role}: {msg.content}" - msg_tokens = len(encoding.encode(msg_str)) + msg_tokens = _count_text_tokens(msg_str) # Handle oversized messages if msg_tokens > max_message_tokens: - msg_str = msg_str[: max_message_tokens // 2] - msg_tokens = len(encoding.encode(msg_str)) + msg_str = msg_str[: max(max_message_tokens * 4, 1)] + msg_tokens = _count_text_tokens(msg_str) if total_tokens + msg_tokens <= max_message_tokens: total_tokens += msg_tokens diff --git a/tests/test_issue_237.py b/tests/test_issue_237.py new file mode 100644 index 00000000..9d28dd52 --- /dev/null +++ b/tests/test_issue_237.py @@ -0,0 +1,65 @@ +"""Tests for GitHub issue #237: safe token counting when tiktoken is unavailable.""" + +from unittest.mock import patch + +import pytest + +from agent_memory_server.api import _calculate_messages_token_count +from agent_memory_server.models import MemoryMessage + + +class TestIssue237TiktokenFallback: + def test_calculate_messages_token_count_falls_back_when_tiktoken_unavailable( + self, + ): + """Token counting should degrade gracefully when the encoding cannot load.""" + messages = [MemoryMessage(role="user", content="Hello world")] + + with ( + patch("agent_memory_server.api._TIKTOKEN_ENCODING_CACHE", None), + patch("agent_memory_server.api._TIKTOKEN_ENCODING_LOAD_ATTEMPTED", False), + patch( + "agent_memory_server.api.tiktoken.get_encoding", + side_effect=Exception("Could not download encoding data"), + ), + ): + token_count = _calculate_messages_token_count(messages) + + assert token_count > 0 + + @pytest.mark.asyncio + async def test_get_working_memory_uses_fallback_when_tiktoken_unavailable( + self, client + ): + """GET should return session data instead of a 500 when tokenization fails.""" + if client is None: + pytest.skip("Client not available") + + session_id = "issue-237-api" + + put_response = await client.put( + f"/v1/working-memory/{session_id}", + json={ + "messages": [{"role": "user", "content": "Hello from issue 237"}], + "user_id": "alice", + "namespace": "demo", + }, + ) + assert put_response.status_code == 200 + + with ( + patch("agent_memory_server.api._TIKTOKEN_ENCODING_CACHE", None), + patch("agent_memory_server.api._TIKTOKEN_ENCODING_LOAD_ATTEMPTED", False), + patch( + "agent_memory_server.api.tiktoken.get_encoding", + side_effect=Exception("Could not download encoding data"), + ), + ): + get_response = await client.get( + f"/v1/working-memory/{session_id}?model_name=gpt-4o" + ) + + assert get_response.status_code == 200, get_response.text + data = get_response.json() + assert data["session_id"] == session_id + assert len(data["messages"]) == 1 From 38f0396ea1a4167ec6595ee92a5c06cfe8f5c130 Mon Sep 17 00:00:00 2001 From: bhavana-giri Date: Fri, 3 Apr 2026 16:24:29 +0530 Subject: [PATCH 02/10] Tighten oversized message truncation --- agent_memory_server/api.py | 22 +++++++++++++++++++++- tests/test_issue_237.py | 23 ++++++++++++++++++++++- 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py index 85b2babf..cc8ada22 100644 --- a/agent_memory_server/api.py +++ b/agent_memory_server/api.py @@ -147,6 +147,26 @@ def _estimate_message_token_count(message: MemoryMessage) -> int: return _count_text_tokens(f"{message.role}: {message.content}") +def _truncate_text_to_token_budget(text: str, max_tokens: int) -> str: + """Trim text so its token count fits within the requested budget.""" + if max_tokens <= 0: + return "" + + if _count_text_tokens(text) <= max_tokens: + return text + + low = 0 + high = len(text) + while low < high: + mid = (low + high + 1) // 2 + if _count_text_tokens(text[:mid]) <= max_tokens: + low = mid + else: + high = mid - 1 + + return text[:low] + + def _calculate_context_usage_percentages( messages: list[MemoryMessage], model_name: ModelNameLiteral | None, @@ -318,7 +338,7 @@ async def _summarize_working_memory( # Handle oversized messages if msg_tokens > max_message_tokens: - msg_str = msg_str[: max(max_message_tokens * 4, 1)] + msg_str = _truncate_text_to_token_budget(msg_str, max_message_tokens) msg_tokens = _count_text_tokens(msg_str) if total_tokens + msg_tokens <= max_message_tokens: diff --git a/tests/test_issue_237.py b/tests/test_issue_237.py index 9d28dd52..fc8dc8bb 100644 --- a/tests/test_issue_237.py +++ b/tests/test_issue_237.py @@ -4,7 +4,10 @@ import pytest -from agent_memory_server.api import _calculate_messages_token_count +from agent_memory_server.api import ( + _calculate_messages_token_count, + _truncate_text_to_token_budget, +) from agent_memory_server.models import MemoryMessage @@ -63,3 +66,21 @@ async def test_get_working_memory_uses_fallback_when_tiktoken_unavailable( data = get_response.json() assert data["session_id"] == session_id assert len(data["messages"]) == 1 + + def test_truncate_text_to_token_budget_respects_actual_token_limit(self): + """Oversized messages should be trimmed until they fit the target budget.""" + + class FakeEncoding: + def encode(self, text: str) -> list[int]: + # Treat every character as a token so naive char*4 truncation would fail. + return [0] * len(text) + + long_text = "user: " + ("x" * 100) + + with ( + patch("agent_memory_server.api._TIKTOKEN_ENCODING_CACHE", FakeEncoding()), + patch("agent_memory_server.api._TIKTOKEN_ENCODING_LOAD_ATTEMPTED", True), + ): + truncated = _truncate_text_to_token_budget(long_text, 10) + + assert len(truncated) == 10 From 0ef76f6ff552baf67fa2fbd9e80e13047fb6dcb3 Mon Sep 17 00:00:00 2001 From: bhavana-giri Date: Fri, 3 Apr 2026 16:26:58 +0530 Subject: [PATCH 03/10] Keep issue 237 fix narrowly scoped --- agent_memory_server/api.py | 22 +--------------------- tests/test_issue_237.py | 23 +---------------------- 2 files changed, 2 insertions(+), 43 deletions(-) diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py index cc8ada22..24e4f2a7 100644 --- a/agent_memory_server/api.py +++ b/agent_memory_server/api.py @@ -147,26 +147,6 @@ def _estimate_message_token_count(message: MemoryMessage) -> int: return _count_text_tokens(f"{message.role}: {message.content}") -def _truncate_text_to_token_budget(text: str, max_tokens: int) -> str: - """Trim text so its token count fits within the requested budget.""" - if max_tokens <= 0: - return "" - - if _count_text_tokens(text) <= max_tokens: - return text - - low = 0 - high = len(text) - while low < high: - mid = (low + high + 1) // 2 - if _count_text_tokens(text[:mid]) <= max_tokens: - low = mid - else: - high = mid - 1 - - return text[:low] - - def _calculate_context_usage_percentages( messages: list[MemoryMessage], model_name: ModelNameLiteral | None, @@ -338,7 +318,7 @@ async def _summarize_working_memory( # Handle oversized messages if msg_tokens > max_message_tokens: - msg_str = _truncate_text_to_token_budget(msg_str, max_message_tokens) + msg_str = msg_str[: max_message_tokens // 2] msg_tokens = _count_text_tokens(msg_str) if total_tokens + msg_tokens <= max_message_tokens: diff --git a/tests/test_issue_237.py b/tests/test_issue_237.py index fc8dc8bb..9d28dd52 100644 --- a/tests/test_issue_237.py +++ b/tests/test_issue_237.py @@ -4,10 +4,7 @@ import pytest -from agent_memory_server.api import ( - _calculate_messages_token_count, - _truncate_text_to_token_budget, -) +from agent_memory_server.api import _calculate_messages_token_count from agent_memory_server.models import MemoryMessage @@ -66,21 +63,3 @@ async def test_get_working_memory_uses_fallback_when_tiktoken_unavailable( data = get_response.json() assert data["session_id"] == session_id assert len(data["messages"]) == 1 - - def test_truncate_text_to_token_budget_respects_actual_token_limit(self): - """Oversized messages should be trimmed until they fit the target budget.""" - - class FakeEncoding: - def encode(self, text: str) -> list[int]: - # Treat every character as a token so naive char*4 truncation would fail. - return [0] * len(text) - - long_text = "user: " + ("x" * 100) - - with ( - patch("agent_memory_server.api._TIKTOKEN_ENCODING_CACHE", FakeEncoding()), - patch("agent_memory_server.api._TIKTOKEN_ENCODING_LOAD_ATTEMPTED", True), - ): - truncated = _truncate_text_to_token_budget(long_text, 10) - - assert len(truncated) == 10 From 2947bbceea396279b69b5e1cc1bf3b6e3a5fa587 Mon Sep 17 00:00:00 2001 From: bhavana-giri Date: Fri, 3 Apr 2026 18:14:33 +0530 Subject: [PATCH 04/10] Use lower-case names for tiktoken state --- agent_memory_server/api.py | 18 +++++++++--------- tests/test_issue_237.py | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py index 24e4f2a7..0e9fc9df 100644 --- a/agent_memory_server/api.py +++ b/agent_memory_server/api.py @@ -54,8 +54,8 @@ logger = get_logger(__name__) -_TIKTOKEN_ENCODING_CACHE: Any | None = None -_TIKTOKEN_ENCODING_LOAD_ATTEMPTED = False +_tiktoken_encoding: Any | None = None +_tiktoken_encoding_load_attempted = False router = APIRouter() @@ -109,16 +109,16 @@ def _calculate_messages_token_count(messages: list[MemoryMessage]) -> int: def _get_tiktoken_encoding() -> Any | None: """Load the tokenizer encoding once and fall back safely if unavailable.""" - global _TIKTOKEN_ENCODING_CACHE, _TIKTOKEN_ENCODING_LOAD_ATTEMPTED + global _tiktoken_encoding, _tiktoken_encoding_load_attempted - if _TIKTOKEN_ENCODING_CACHE is not None: - return _TIKTOKEN_ENCODING_CACHE - if _TIKTOKEN_ENCODING_LOAD_ATTEMPTED: + if _tiktoken_encoding is not None: + return _tiktoken_encoding + if _tiktoken_encoding_load_attempted: return None - _TIKTOKEN_ENCODING_LOAD_ATTEMPTED = True + _tiktoken_encoding_load_attempted = True try: - _TIKTOKEN_ENCODING_CACHE = tiktoken.get_encoding("cl100k_base") + _tiktoken_encoding = tiktoken.get_encoding("cl100k_base") except Exception as exc: logger.warning( "tiktoken encoding unavailable, using character-based token estimate", @@ -126,7 +126,7 @@ def _get_tiktoken_encoding() -> Any | None: ) return None - return _TIKTOKEN_ENCODING_CACHE + return _tiktoken_encoding def _estimate_text_token_count(text: str) -> int: diff --git a/tests/test_issue_237.py b/tests/test_issue_237.py index 9d28dd52..52f54e6f 100644 --- a/tests/test_issue_237.py +++ b/tests/test_issue_237.py @@ -16,8 +16,8 @@ def test_calculate_messages_token_count_falls_back_when_tiktoken_unavailable( messages = [MemoryMessage(role="user", content="Hello world")] with ( - patch("agent_memory_server.api._TIKTOKEN_ENCODING_CACHE", None), - patch("agent_memory_server.api._TIKTOKEN_ENCODING_LOAD_ATTEMPTED", False), + patch("agent_memory_server.api._tiktoken_encoding", None), + patch("agent_memory_server.api._tiktoken_encoding_load_attempted", False), patch( "agent_memory_server.api.tiktoken.get_encoding", side_effect=Exception("Could not download encoding data"), @@ -48,8 +48,8 @@ async def test_get_working_memory_uses_fallback_when_tiktoken_unavailable( assert put_response.status_code == 200 with ( - patch("agent_memory_server.api._TIKTOKEN_ENCODING_CACHE", None), - patch("agent_memory_server.api._TIKTOKEN_ENCODING_LOAD_ATTEMPTED", False), + patch("agent_memory_server.api._tiktoken_encoding", None), + patch("agent_memory_server.api._tiktoken_encoding_load_attempted", False), patch( "agent_memory_server.api.tiktoken.get_encoding", side_effect=Exception("Could not download encoding data"), From 05d3c9535aef16400ed499817305c96d77af2a2c Mon Sep 17 00:00:00 2001 From: bhavana-giri Date: Fri, 3 Apr 2026 22:30:17 +0530 Subject: [PATCH 05/10] Address Copilot review cleanups --- agent_memory_server/api.py | 4 ++-- tests/test_issue_237.py | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py index 0e9fc9df..af1564b0 100644 --- a/agent_memory_server/api.py +++ b/agent_memory_server/api.py @@ -104,7 +104,7 @@ def _get_effective_token_limit( def _calculate_messages_token_count(messages: list[MemoryMessage]) -> int: """Calculate total token count for a list of messages.""" - return sum(_estimate_message_token_count(msg) for msg in messages) + return sum(_count_message_tokens(msg) for msg in messages) def _get_tiktoken_encoding() -> Any | None: @@ -142,7 +142,7 @@ def _count_text_tokens(text: str) -> int: return len(encoding.encode(text)) -def _estimate_message_token_count(message: MemoryMessage) -> int: +def _count_message_tokens(message: MemoryMessage) -> int: """Count tokens for a single working-memory message.""" return _count_text_tokens(f"{message.role}: {message.content}") diff --git a/tests/test_issue_237.py b/tests/test_issue_237.py index 52f54e6f..0efbd9c9 100644 --- a/tests/test_issue_237.py +++ b/tests/test_issue_237.py @@ -32,9 +32,6 @@ async def test_get_working_memory_uses_fallback_when_tiktoken_unavailable( self, client ): """GET should return session data instead of a 500 when tokenization fails.""" - if client is None: - pytest.skip("Client not available") - session_id = "issue-237-api" put_response = await client.put( From 84472124b40d6c59526c7fd7869981ecd0017465 Mon Sep 17 00:00:00 2001 From: bhavana-giri Date: Fri, 3 Apr 2026 22:47:29 +0530 Subject: [PATCH 06/10] Simplify tiktoken encoding cache state --- agent_memory_server/api.py | 6 +----- tests/test_issue_237.py | 2 -- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py index af1564b0..575ecf08 100644 --- a/agent_memory_server/api.py +++ b/agent_memory_server/api.py @@ -55,7 +55,6 @@ logger = get_logger(__name__) _tiktoken_encoding: Any | None = None -_tiktoken_encoding_load_attempted = False router = APIRouter() @@ -109,14 +108,11 @@ def _calculate_messages_token_count(messages: list[MemoryMessage]) -> int: def _get_tiktoken_encoding() -> Any | None: """Load the tokenizer encoding once and fall back safely if unavailable.""" - global _tiktoken_encoding, _tiktoken_encoding_load_attempted + global _tiktoken_encoding if _tiktoken_encoding is not None: return _tiktoken_encoding - if _tiktoken_encoding_load_attempted: - return None - _tiktoken_encoding_load_attempted = True try: _tiktoken_encoding = tiktoken.get_encoding("cl100k_base") except Exception as exc: diff --git a/tests/test_issue_237.py b/tests/test_issue_237.py index 0efbd9c9..7eaf7b56 100644 --- a/tests/test_issue_237.py +++ b/tests/test_issue_237.py @@ -17,7 +17,6 @@ def test_calculate_messages_token_count_falls_back_when_tiktoken_unavailable( with ( patch("agent_memory_server.api._tiktoken_encoding", None), - patch("agent_memory_server.api._tiktoken_encoding_load_attempted", False), patch( "agent_memory_server.api.tiktoken.get_encoding", side_effect=Exception("Could not download encoding data"), @@ -46,7 +45,6 @@ async def test_get_working_memory_uses_fallback_when_tiktoken_unavailable( with ( patch("agent_memory_server.api._tiktoken_encoding", None), - patch("agent_memory_server.api._tiktoken_encoding_load_attempted", False), patch( "agent_memory_server.api.tiktoken.get_encoding", side_effect=Exception("Could not download encoding data"), From 1acdc0e3343187fcdb736304ffff0b35a185a4fb Mon Sep 17 00:00:00 2001 From: bhavana-giri Date: Fri, 3 Apr 2026 23:00:58 +0530 Subject: [PATCH 07/10] Memoize failed tiktoken initialization --- agent_memory_server/api.py | 6 +++++- tests/test_issue_237.py | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py index 575ecf08..af1564b0 100644 --- a/agent_memory_server/api.py +++ b/agent_memory_server/api.py @@ -55,6 +55,7 @@ logger = get_logger(__name__) _tiktoken_encoding: Any | None = None +_tiktoken_encoding_load_attempted = False router = APIRouter() @@ -108,11 +109,14 @@ def _calculate_messages_token_count(messages: list[MemoryMessage]) -> int: def _get_tiktoken_encoding() -> Any | None: """Load the tokenizer encoding once and fall back safely if unavailable.""" - global _tiktoken_encoding + global _tiktoken_encoding, _tiktoken_encoding_load_attempted if _tiktoken_encoding is not None: return _tiktoken_encoding + if _tiktoken_encoding_load_attempted: + return None + _tiktoken_encoding_load_attempted = True try: _tiktoken_encoding = tiktoken.get_encoding("cl100k_base") except Exception as exc: diff --git a/tests/test_issue_237.py b/tests/test_issue_237.py index 7eaf7b56..0efbd9c9 100644 --- a/tests/test_issue_237.py +++ b/tests/test_issue_237.py @@ -17,6 +17,7 @@ def test_calculate_messages_token_count_falls_back_when_tiktoken_unavailable( with ( patch("agent_memory_server.api._tiktoken_encoding", None), + patch("agent_memory_server.api._tiktoken_encoding_load_attempted", False), patch( "agent_memory_server.api.tiktoken.get_encoding", side_effect=Exception("Could not download encoding data"), @@ -45,6 +46,7 @@ async def test_get_working_memory_uses_fallback_when_tiktoken_unavailable( with ( patch("agent_memory_server.api._tiktoken_encoding", None), + patch("agent_memory_server.api._tiktoken_encoding_load_attempted", False), patch( "agent_memory_server.api.tiktoken.get_encoding", side_effect=Exception("Could not download encoding data"), From e4dc8ff6132acd2685957eadaea49d47c311fc8a Mon Sep 17 00:00:00 2001 From: bhavana-giri Date: Fri, 3 Apr 2026 23:20:51 +0530 Subject: [PATCH 08/10] Add tiktoken retry backoff --- agent_memory_server/api.py | 17 ++++++++++---- tests/test_issue_237.py | 48 ++++++++++++++++++++++++++++++++++---- 2 files changed, 57 insertions(+), 8 deletions(-) diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py index af1564b0..cc0794b6 100644 --- a/agent_memory_server/api.py +++ b/agent_memory_server/api.py @@ -1,3 +1,4 @@ +import time from typing import Any import tiktoken @@ -55,7 +56,8 @@ logger = get_logger(__name__) _tiktoken_encoding: Any | None = None -_tiktoken_encoding_load_attempted = False +_tiktoken_encoding_last_failed_at: float | None = None +_TIKTOKEN_ENCODING_RETRY_INTERVAL_SECONDS = 300 router = APIRouter() @@ -109,17 +111,24 @@ def _calculate_messages_token_count(messages: list[MemoryMessage]) -> int: def _get_tiktoken_encoding() -> Any | None: """Load the tokenizer encoding once and fall back safely if unavailable.""" - global _tiktoken_encoding, _tiktoken_encoding_load_attempted + global _tiktoken_encoding, _tiktoken_encoding_last_failed_at if _tiktoken_encoding is not None: return _tiktoken_encoding - if _tiktoken_encoding_load_attempted: + + now = time.monotonic() + if ( + _tiktoken_encoding_last_failed_at is not None + and now - _tiktoken_encoding_last_failed_at + < _TIKTOKEN_ENCODING_RETRY_INTERVAL_SECONDS + ): return None - _tiktoken_encoding_load_attempted = True try: _tiktoken_encoding = tiktoken.get_encoding("cl100k_base") + _tiktoken_encoding_last_failed_at = None except Exception as exc: + _tiktoken_encoding_last_failed_at = now logger.warning( "tiktoken encoding unavailable, using character-based token estimate", error=str(exc), diff --git a/tests/test_issue_237.py b/tests/test_issue_237.py index 0efbd9c9..067bd5e3 100644 --- a/tests/test_issue_237.py +++ b/tests/test_issue_237.py @@ -1,10 +1,13 @@ """Tests for GitHub issue #237: safe token counting when tiktoken is unavailable.""" -from unittest.mock import patch +from unittest.mock import Mock, patch import pytest -from agent_memory_server.api import _calculate_messages_token_count +from agent_memory_server.api import ( + _calculate_messages_token_count, + _get_tiktoken_encoding, +) from agent_memory_server.models import MemoryMessage @@ -17,7 +20,7 @@ def test_calculate_messages_token_count_falls_back_when_tiktoken_unavailable( with ( patch("agent_memory_server.api._tiktoken_encoding", None), - patch("agent_memory_server.api._tiktoken_encoding_load_attempted", False), + patch("agent_memory_server.api._tiktoken_encoding_last_failed_at", None), patch( "agent_memory_server.api.tiktoken.get_encoding", side_effect=Exception("Could not download encoding data"), @@ -46,7 +49,7 @@ async def test_get_working_memory_uses_fallback_when_tiktoken_unavailable( with ( patch("agent_memory_server.api._tiktoken_encoding", None), - patch("agent_memory_server.api._tiktoken_encoding_load_attempted", False), + patch("agent_memory_server.api._tiktoken_encoding_last_failed_at", None), patch( "agent_memory_server.api.tiktoken.get_encoding", side_effect=Exception("Could not download encoding data"), @@ -60,3 +63,40 @@ async def test_get_working_memory_uses_fallback_when_tiktoken_unavailable( data = get_response.json() assert data["session_id"] == session_id assert len(data["messages"]) == 1 + + def test_get_tiktoken_encoding_skips_retries_within_backoff_window(self): + """Repeated calls should not re-attempt loading within the retry interval.""" + mock_get_encoding = Mock(side_effect=Exception("Could not download encoding")) + + with ( + patch("agent_memory_server.api._tiktoken_encoding", None), + patch("agent_memory_server.api._tiktoken_encoding_last_failed_at", None), + patch("agent_memory_server.api.time.monotonic", side_effect=[100.0, 101.0]), + patch("agent_memory_server.api.tiktoken.get_encoding", mock_get_encoding), + ): + assert _get_tiktoken_encoding() is None + assert _get_tiktoken_encoding() is None + + assert mock_get_encoding.call_count == 1 + + def test_get_tiktoken_encoding_retries_after_backoff_window(self): + """A later call should retry loading once the backoff window has passed.""" + + class FakeEncoding: + def encode(self, text: str) -> list[int]: + return [1] * len(text) + + mock_get_encoding = Mock( + side_effect=[Exception("temporary failure"), FakeEncoding()] + ) + + with ( + patch("agent_memory_server.api._tiktoken_encoding", None), + patch("agent_memory_server.api._tiktoken_encoding_last_failed_at", None), + patch("agent_memory_server.api.time.monotonic", side_effect=[100.0, 401.0]), + patch("agent_memory_server.api.tiktoken.get_encoding", mock_get_encoding), + ): + assert _get_tiktoken_encoding() is None + assert _get_tiktoken_encoding() is not None + + assert mock_get_encoding.call_count == 2 From 705e950fd800fd5e5ce3621980c3a4b33c67f7ff Mon Sep 17 00:00:00 2001 From: bhavana-giri Date: Sat, 4 Apr 2026 09:55:19 +0530 Subject: [PATCH 09/10] Refactor tiktoken retry window check --- agent_memory_server/api.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py index cc0794b6..0bba96f9 100644 --- a/agent_memory_server/api.py +++ b/agent_memory_server/api.py @@ -113,15 +113,18 @@ def _get_tiktoken_encoding() -> Any | None: """Load the tokenizer encoding once and fall back safely if unavailable.""" global _tiktoken_encoding, _tiktoken_encoding_last_failed_at + now = time.monotonic() + last_failed_at = _tiktoken_encoding_last_failed_at + if _tiktoken_encoding is not None: return _tiktoken_encoding - now = time.monotonic() - if ( - _tiktoken_encoding_last_failed_at is not None - and now - _tiktoken_encoding_last_failed_at - < _TIKTOKEN_ENCODING_RETRY_INTERVAL_SECONDS - ): + retry_after = ( + None + if last_failed_at is None + else last_failed_at + _TIKTOKEN_ENCODING_RETRY_INTERVAL_SECONDS + ) + if retry_after is not None and now < retry_after: return None try: From d1bddbcc330e89985ae60a80906b5b5dd81c5eed Mon Sep 17 00:00:00 2001 From: Brian Sam-Bodden Date: Mon, 6 Apr 2026 08:04:28 -0700 Subject: [PATCH 10/10] Update agent_memory_server/api.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- agent_memory_server/api.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/agent_memory_server/api.py b/agent_memory_server/api.py index 0bba96f9..6f84fe08 100644 --- a/agent_memory_server/api.py +++ b/agent_memory_server/api.py @@ -113,12 +113,11 @@ def _get_tiktoken_encoding() -> Any | None: """Load the tokenizer encoding once and fall back safely if unavailable.""" global _tiktoken_encoding, _tiktoken_encoding_last_failed_at - now = time.monotonic() - last_failed_at = _tiktoken_encoding_last_failed_at - if _tiktoken_encoding is not None: return _tiktoken_encoding + now = time.monotonic() + last_failed_at = _tiktoken_encoding_last_failed_at retry_after = ( None if last_failed_at is None