From 0b02721cc928ac26d32da2dd66bb2bca0cdb9de8 Mon Sep 17 00:00:00 2001 From: rajkumarsakthivel Date: Thu, 11 Jun 2026 14:35:03 +0100 Subject: [PATCH 1/2] feat: show dollar savings in status line, session resume, and MCP status Every developer touchpoint now shows estimated cost saved alongside token percentages. The dollar amount is calculated from the configured pricing model (default: opus). This surfaces in: - cce status --oneline (session start hook) - Session resume context (memory hooks) - MCP index_status response The goal is to make savings tangible and shareable. --- src/context_engine/cli.py | 9 ++++++++- src/context_engine/integration/mcp_server.py | 20 ++++++++++++++++++-- src/context_engine/memory/hooks.py | 15 ++++++++++++++- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/context_engine/cli.py b/src/context_engine/cli.py index 1e3e3ef..5e17929 100644 --- a/src/context_engine/cli.py +++ b/src/context_engine/cli.py @@ -1011,7 +1011,14 @@ def status(ctx: click.Context, output_json: bool, oneline: bool) -> None: served = stats.get("served_tokens", 0) if q > 0 and full > 0: pct = int((full - served) / full * 100) - savings = f" · {pct}% saved over {q} queries" + tokens_saved = full - served + from context_engine.pricing import get_model_pricing + model = config.pricing_model.lower() + all_pricing = get_model_pricing() + rate = all_pricing.get(model, all_pricing.get("opus", {"input": 15.0})) + cost = tokens_saved * rate["input"] / 1_000_000 + cost_str = f"${cost:.2f}" if cost >= 0.01 else "<$0.01" + savings = f" · {pct}% saved over {q} queries ({cost_str} saved)" except Exception: pass click.echo( diff --git a/src/context_engine/integration/mcp_server.py b/src/context_engine/integration/mcp_server.py index 8a14897..781f324 100644 --- a/src/context_engine/integration/mcp_server.py +++ b/src/context_engine/integration/mcp_server.py @@ -1432,6 +1432,20 @@ def _handle_session_event(self, args): body = self._apply_output_compression(body) return [TextContent(type="text", text=body)] + def _fmt_cost_saved(self, tokens_saved: int) -> str: + """Format cost savings as a short string, e.g. ', $4.37 saved'.""" + try: + from context_engine.pricing import get_model_pricing + model = self._config.pricing_model.lower() + pricing = get_model_pricing() + rate = pricing.get(model, pricing.get("opus", {"input": 15.0})) + cost = tokens_saved * rate["input"] / 1_000_000 + if cost >= 0.01: + return f", ${cost:.2f} saved" + except Exception: + pass + return "" + async def _handle_index_status(self): queries = self._stats["queries"] raw = self._stats["raw_tokens"] @@ -1450,15 +1464,17 @@ async def _handle_index_status(self): if full_file > 0: full_saved = full_file - served full_pct = int(full_saved / full_file * 100) + cost_note = self._fmt_cost_saved(full_saved) status_parts.append( f"Token savings ({queries} queries): " f"{full_file:,} full-file baseline → {served:,} served " - f"({full_pct}% saved)" + f"({full_pct}% saved{cost_note})" ) else: + cost_note = self._fmt_cost_saved(saved) status_parts.append( f"Token savings ({queries} queries): {raw:,} raw → {served:,} served " - f"({saved:,} saved, {pct}%)" + f"({saved:,} saved, {pct}%{cost_note})" ) else: status_parts.append( diff --git a/src/context_engine/memory/hooks.py b/src/context_engine/memory/hooks.py index de384ad..bfe94e4 100644 --- a/src/context_engine/memory/hooks.py +++ b/src/context_engine/memory/hooks.py @@ -85,9 +85,22 @@ def _fmt_k(n: int) -> str: return f"{n / 1_000:.1f}k" return str(n) + tokens_saved = total_baseline - total_served + cost_str = "" + try: + from context_engine.pricing import get_model_pricing + pricing = get_model_pricing() + # Use opus as default; the session hook doesn't have config access + rate = pricing.get("opus", {"input": 15.0})["input"] + cost = tokens_saved * rate / 1_000_000 + if cost >= 0.01: + cost_str = f", ${cost:.2f} saved" + except Exception: + pass + return ( f"CCE saved {saved_pct:.0f}% of input tokens across {total_queries} queries " - f"({_fmt_k(total_baseline)} baseline, {_fmt_k(total_served)} served)" + f"({_fmt_k(total_baseline)} baseline, {_fmt_k(total_served)} served{cost_str})" ) From a1abaa896ef138d0431fef3805e2b4d2ee3758c6 Mon Sep 17 00:00:00 2001 From: rajkumarsakthivel Date: Mon, 15 Jun 2026 19:00:02 +0100 Subject: [PATCH 2/2] fix: clamp savings to non-negative, avoid network fetch on hot paths - Clamp tokens_saved to max(0, ...) in hooks, MCP index_status, and CLI oneline to prevent negative percentages and dollar amounts - Use static pricing (no network) in session hook and CLI oneline paths to avoid latency on session start / status queries - Guard _fmt_cost_saved early return when tokens_saved <= 0 --- src/context_engine/cli.py | 18 +++++++++--------- src/context_engine/integration/mcp_server.py | 8 +++++--- src/context_engine/memory/hooks.py | 14 ++++++++------ 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/src/context_engine/cli.py b/src/context_engine/cli.py index 5e17929..20bd4a0 100644 --- a/src/context_engine/cli.py +++ b/src/context_engine/cli.py @@ -1010,15 +1010,15 @@ def status(ctx: click.Context, output_json: bool, oneline: bool) -> None: full = stats.get("full_file_tokens", 0) served = stats.get("served_tokens", 0) if q > 0 and full > 0: - pct = int((full - served) / full * 100) - tokens_saved = full - served - from context_engine.pricing import get_model_pricing - model = config.pricing_model.lower() - all_pricing = get_model_pricing() - rate = all_pricing.get(model, all_pricing.get("opus", {"input": 15.0})) - cost = tokens_saved * rate["input"] / 1_000_000 - cost_str = f"${cost:.2f}" if cost >= 0.01 else "<$0.01" - savings = f" · {pct}% saved over {q} queries ({cost_str} saved)" + tokens_saved = max(0, full - served) + if tokens_saved > 0: + pct = int(tokens_saved / full * 100) + from context_engine.pricing import _STATIC_PRICING + model = config.pricing_model.lower() + rate = _STATIC_PRICING.get(model, _STATIC_PRICING.get("opus", {"input": 15.0})) + cost = tokens_saved * rate["input"] / 1_000_000 + cost_str = f"${cost:.2f}" if cost >= 0.01 else "<$0.01" + savings = f" · {pct}% saved over {q} queries ({cost_str} saved)" except Exception: pass click.echo( diff --git a/src/context_engine/integration/mcp_server.py b/src/context_engine/integration/mcp_server.py index 781f324..2e4341b 100644 --- a/src/context_engine/integration/mcp_server.py +++ b/src/context_engine/integration/mcp_server.py @@ -1434,6 +1434,8 @@ def _handle_session_event(self, args): def _fmt_cost_saved(self, tokens_saved: int) -> str: """Format cost savings as a short string, e.g. ', $4.37 saved'.""" + if tokens_saved <= 0: + return "" try: from context_engine.pricing import get_model_pricing model = self._config.pricing_model.lower() @@ -1451,7 +1453,7 @@ async def _handle_index_status(self): raw = self._stats["raw_tokens"] served = self._stats["served_tokens"] full_file = self._stats.get("full_file_tokens", 0) - saved = raw - served + saved = max(0, raw - served) pct = int(saved / raw * 100) if raw > 0 else 0 status_parts = [ @@ -1462,8 +1464,8 @@ async def _handle_index_status(self): if queries > 0: # Show full-file baseline savings (the headline number) if full_file > 0: - full_saved = full_file - served - full_pct = int(full_saved / full_file * 100) + full_saved = max(0, full_file - served) + full_pct = int(full_saved / full_file * 100) if full_saved > 0 else 0 cost_note = self._fmt_cost_saved(full_saved) status_parts.append( f"Token savings ({queries} queries): " diff --git a/src/context_engine/memory/hooks.py b/src/context_engine/memory/hooks.py index bfe94e4..3b4163d 100644 --- a/src/context_engine/memory/hooks.py +++ b/src/context_engine/memory/hooks.py @@ -76,7 +76,11 @@ def _build_savings_line(conn: sqlite3.Connection) -> str: if total_baseline <= 0 or total_queries <= 0: return "" - saved_pct = (1 - total_served / total_baseline) * 100 + tokens_saved = max(0, total_baseline - total_served) + if tokens_saved == 0: + return "" + + saved_pct = tokens_saved / total_baseline * 100 def _fmt_k(n: int) -> str: if n >= 1_000_000: @@ -85,13 +89,11 @@ def _fmt_k(n: int) -> str: return f"{n / 1_000:.1f}k" return str(n) - tokens_saved = total_baseline - total_served cost_str = "" try: - from context_engine.pricing import get_model_pricing - pricing = get_model_pricing() - # Use opus as default; the session hook doesn't have config access - rate = pricing.get("opus", {"input": 15.0})["input"] + from context_engine.pricing import _STATIC_PRICING + # Use static opus pricing to avoid network fetch on session start + rate = _STATIC_PRICING.get("opus", {"input": 15.0})["input"] cost = tokens_saved * rate / 1_000_000 if cost >= 0.01: cost_str = f", ${cost:.2f} saved"