From 73b8ca30ac514c8fccb84340e933c1bac34c2f23 Mon Sep 17 00:00:00 2001 From: Will Date: Fri, 20 Mar 2026 00:58:25 -0400 Subject: [PATCH] Cache Canvas module lookups --- README.md | 2 + tests/test_agent.py | 57 +++++++++++++++++++++++++++ tools/canvas_tools.py | 90 ++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 144 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index db2c0d5..4083403 100644 --- a/README.md +++ b/README.md @@ -59,11 +59,13 @@ Set values in .env: - COURSE_CONTEXT_CHROMA_PATH (optional; defaults to `.chroma`) - COURSE_CONTEXT_COLLECTION (optional; defaults to `course-context`) - COURSE_CONTEXT_DEFAULT_LIMIT (optional; defaults to `5`) +- CANVAS_MODULE_CACHE_TTL_SECONDS (optional; defaults to `300`) Notes: - CANVAS_USE_MCP=true uses the Smithery-hosted Canvas MCP server. - CANVAS_USE_MCP=false uses direct Canvas REST calls and is recommended for headless server deployments. +- CANVAS_MODULE_CACHE_TTL_SECONDS controls lightweight in-process caching for Canvas module lookups to reduce repeated API requests. ## Usage diff --git a/tests/test_agent.py b/tests/test_agent.py index 9fd9eb8..8a56379 100644 --- a/tests/test_agent.py +++ b/tests/test_agent.py @@ -544,6 +544,63 @@ def test_get_course_modules_uses_direct_api(self): assert result == [{'id': 9, 'name': 'Week 1', 'items': []}] direct_mock.assert_called_once_with(123) + def test_get_course_modules_uses_cache_on_repeat_calls(self): + """Repeated module listing calls should reuse the in-process cache.""" + from tools.canvas_tools import CanvasTools + + CanvasTools.clear_shared_cache() + try: + with patch.dict('os.environ', { + 'CANVAS_API_URL': 'https://test.canvas.com', + 'CANVAS_API_TOKEN': 'test_token', + 'CANVAS_MODULE_CACHE_TTL_SECONDS': '300', + }): + tools = CanvasTools() + + with patch('tools.canvas_tools.requests.get') as get_mock: + response_mock = Mock() + response_mock.raise_for_status.return_value = None + response_mock.json.return_value = [ + {'id': 9, 'name': 'Week 1', 'items': []}, + ] + get_mock.return_value = response_mock + + first = asyncio.run(tools.get_course_modules(123)) + second = asyncio.run(tools.get_course_modules(123)) + + assert first == second + assert get_mock.call_count == 1 + finally: + CanvasTools.clear_shared_cache() + + def test_module_item_context_uses_cache_on_repeat_calls(self): + """Repeated module item reads should not refetch cached page content.""" + from tools.canvas_tools import CanvasTools + + CanvasTools.clear_shared_cache() + try: + with patch.dict('os.environ', { + 'CANVAS_API_URL': 'https://test.canvas.com', + 'CANVAS_API_TOKEN': 'test_token', + 'CANVAS_MODULE_CACHE_TTL_SECONDS': '300', + }): + tools = CanvasTools() + module = {'id': 9, 'name': 'Week 1'} + item = {'id': 11, 'title': 'Bayes page', 'type': 'Page', 'page_url': 'bayes-page'} + + with patch.object( + CanvasTools, + '_direct_get_page', + return_value={'title': 'Bayes page', 'body': '

Posterior update explanation.

'}, + ) as page_mock: + first = tools._module_item_to_context(123, module, item) + second = tools._module_item_to_context(123, module, item) + + assert first == second + page_mock.assert_called_once_with(123, 'bayes-page') + finally: + CanvasTools.clear_shared_cache() + def test_search_course_module_context_ranks_relevant_items(self): """Module context search should return only relevant ranked items.""" from tools.canvas_tools import CanvasTools diff --git a/tools/canvas_tools.py b/tools/canvas_tools.py index c937000..c7e5d1c 100644 --- a/tools/canvas_tools.py +++ b/tools/canvas_tools.py @@ -5,6 +5,7 @@ import os import json import re +import time import requests from typing import Dict, List, Optional, Any from mcp import ClientSession, StdioServerParameters @@ -16,6 +17,8 @@ class CanvasTools: """Tools for interacting with Canvas LMS via the Smithery Canvas MCP server.""" + + _shared_cache: dict[str, tuple[float, Any]] = {} def __init__(self): self.canvas_url = os.getenv("CANVAS_API_URL", "https://canvas.instructure.com") @@ -23,6 +26,37 @@ def __init__(self): self.use_mcp = os.getenv("CANVAS_USE_MCP", "true").strip().lower() in { "1", "true", "yes", "on" } + self.module_cache_ttl_seconds = max( + int(os.getenv("CANVAS_MODULE_CACHE_TTL_SECONDS", "300") or "300"), + 0, + ) + + @classmethod + def clear_shared_cache(cls) -> None: + """Clear the in-process cache used for repeated module lookups.""" + cls._shared_cache.clear() + + def _cache_key(self, *parts: Any) -> str: + return "::".join(str(part) for part in (self.canvas_url.rstrip("/"), *parts)) + + def _cache_get(self, key: str) -> Any: + if self.module_cache_ttl_seconds <= 0: + return None + + cached = self._shared_cache.get(key) + if not cached: + return None + + stored_at, value = cached + if time.monotonic() - stored_at > self.module_cache_ttl_seconds: + self._shared_cache.pop(key, None) + return None + return value + + def _cache_set(self, key: str, value: Any) -> Any: + if self.module_cache_ttl_seconds > 0: + self._shared_cache[key] = (time.monotonic(), value) + return value def _canvas_headers(self) -> Dict[str, str]: if not self.canvas_token: @@ -94,6 +128,11 @@ def _normalize_module(self, module: Dict[str, Any]) -> Dict[str, Any]: } def _direct_get_course_modules(self, course_id: int) -> List[Dict[str, Any]]: + cache_key = self._cache_key("modules", course_id) + cached = self._cache_get(cache_key) + if cached is not None: + return cached + response = requests.get( f"{self.canvas_url.rstrip('/')}/api/v1/courses/{course_id}/modules", headers=self._canvas_headers(), @@ -102,34 +141,52 @@ def _direct_get_course_modules(self, course_id: int) -> List[Dict[str, Any]]: ) response.raise_for_status() modules = response.json() - return [self._normalize_module(module) for module in modules] + return self._cache_set( + cache_key, + [self._normalize_module(module) for module in modules], + ) def _direct_get_assignment(self, course_id: int, assignment_id: int) -> Dict[str, Any]: + cache_key = self._cache_key("assignment", course_id, assignment_id) + cached = self._cache_get(cache_key) + if cached is not None: + return cached + response = requests.get( f"{self.canvas_url.rstrip('/')}/api/v1/courses/{course_id}/assignments/{assignment_id}", headers=self._canvas_headers(), timeout=30, ) response.raise_for_status() - return self._normalize_assignment(response.json()) + return self._cache_set(cache_key, self._normalize_assignment(response.json())) def _direct_get_page(self, course_id: int, page_url: str) -> Dict[str, Any]: + cache_key = self._cache_key("page", course_id, page_url) + cached = self._cache_get(cache_key) + if cached is not None: + return cached + response = requests.get( f"{self.canvas_url.rstrip('/')}/api/v1/courses/{course_id}/pages/{page_url}", headers=self._canvas_headers(), timeout=30, ) response.raise_for_status() - return response.json() + return self._cache_set(cache_key, response.json()) def _direct_get_discussion_topic(self, course_id: int, topic_id: int) -> Dict[str, Any]: + cache_key = self._cache_key("discussion", course_id, topic_id) + cached = self._cache_get(cache_key) + if cached is not None: + return cached + response = requests.get( f"{self.canvas_url.rstrip('/')}/api/v1/courses/{course_id}/discussion_topics/{topic_id}", headers=self._canvas_headers(), timeout=30, ) response.raise_for_status() - return response.json() + return self._cache_set(cache_key, response.json()) @staticmethod def _query_terms(query: str) -> list[str]: @@ -172,6 +229,21 @@ def _module_item_to_context( module: Dict[str, Any], item: Dict[str, Any], ) -> Optional[Dict[str, Any]]: + cache_key = self._cache_key( + "module-item-context", + course_id, + module.get("id"), + item.get("id"), + item.get("type"), + item.get("content_id"), + item.get("page_url"), + item.get("external_url"), + item.get("url"), + ) + cached = self._cache_get(cache_key) + if cached is not None: + return cached + title = item.get("title") or item.get("page_url") or item.get("type", "Module Item") item_type = item.get("type", "Unknown") text_parts = [ @@ -212,7 +284,15 @@ def _module_item_to_context( text = "\n\n".join(part.strip() for part in text_parts if part and part.strip()) if not text.strip(): return None - return self._build_module_context_entry(course_id=course_id, module=module, item=item_with_title, text=text) + return self._cache_set( + cache_key, + self._build_module_context_entry( + course_id=course_id, + module=module, + item=item_with_title, + text=text, + ), + ) def _score_module_context(self, query_terms: list[str], entry: Dict[str, Any]) -> int: haystack = " ".join(