diff --git a/src/context_engine/cli.py b/src/context_engine/cli.py index 1cb21c5..103c829 100644 --- a/src/context_engine/cli.py +++ b/src/context_engine/cli.py @@ -1023,8 +1023,15 @@ def status(ctx: click.Context, output_json: bool, oneline: bool) -> None: full = stats.get("full_file_tokens", 0) served = stats.get("served_tokens", 0) if q > 0 and full > 0: - pct = int((full - served) / full * 100) - savings = f" · {pct}% saved over {q} queries" + tokens_saved = max(0, full - served) + if tokens_saved > 0: + pct = int(tokens_saved / full * 100) + from context_engine.pricing import _STATIC_PRICING + model = config.pricing_model.lower() + rate = _STATIC_PRICING.get(model, _STATIC_PRICING.get("opus", {"input": 15.0})) + cost = tokens_saved * rate["input"] / 1_000_000 + cost_str = f"${cost:.2f}" if cost >= 0.01 else "<$0.01" + savings = f" · {pct}% saved over {q} queries ({cost_str} saved)" except Exception: pass click.echo( diff --git a/src/context_engine/integration/mcp_server.py b/src/context_engine/integration/mcp_server.py index 6d49ba4..b4b9a69 100644 --- a/src/context_engine/integration/mcp_server.py +++ b/src/context_engine/integration/mcp_server.py @@ -1432,12 +1432,28 @@ def _handle_session_event(self, args): body = self._apply_output_compression(body) return [TextContent(type="text", text=body)] + def _fmt_cost_saved(self, tokens_saved: int) -> str: + """Format cost savings as a short string, e.g. ', $4.37 saved'.""" + if tokens_saved <= 0: + return "" + try: + from context_engine.pricing import get_model_pricing + model = self._config.pricing_model.lower() + pricing = get_model_pricing() + rate = pricing.get(model, pricing.get("opus", {"input": 15.0})) + cost = tokens_saved * rate["input"] / 1_000_000 + if cost >= 0.01: + return f", ${cost:.2f} saved" + except Exception: + pass + return "" + async def _handle_index_status(self): queries = self._stats["queries"] raw = self._stats["raw_tokens"] served = self._stats["served_tokens"] full_file = self._stats.get("full_file_tokens", 0) - saved = raw - served + saved = max(0, raw - served) pct = int(saved / raw * 100) if raw > 0 else 0 status_parts = [ @@ -1448,17 +1464,19 @@ async def _handle_index_status(self): if queries > 0: # Show full-file baseline savings (the headline number) if full_file > 0: - full_saved = full_file - served - full_pct = int(full_saved / full_file * 100) + full_saved = max(0, full_file - served) + full_pct = int(full_saved / full_file * 100) if full_saved > 0 else 0 + cost_note = self._fmt_cost_saved(full_saved) status_parts.append( f"Token savings ({queries} queries): " f"{full_file:,} full-file baseline → {served:,} served " - f"({full_pct}% saved)" + f"({full_pct}% saved{cost_note})" ) else: + cost_note = self._fmt_cost_saved(saved) status_parts.append( f"Token savings ({queries} queries): {raw:,} raw → {served:,} served " - f"({saved:,} saved, {pct}%)" + f"({saved:,} saved, {pct}%{cost_note})" ) else: status_parts.append( diff --git a/src/context_engine/memory/hooks.py b/src/context_engine/memory/hooks.py index de384ad..3b4163d 100644 --- a/src/context_engine/memory/hooks.py +++ b/src/context_engine/memory/hooks.py @@ -76,7 +76,11 @@ def _build_savings_line(conn: sqlite3.Connection) -> str: if total_baseline <= 0 or total_queries <= 0: return "" - saved_pct = (1 - total_served / total_baseline) * 100 + tokens_saved = max(0, total_baseline - total_served) + if tokens_saved == 0: + return "" + + saved_pct = tokens_saved / total_baseline * 100 def _fmt_k(n: int) -> str: if n >= 1_000_000: @@ -85,9 +89,20 @@ def _fmt_k(n: int) -> str: return f"{n / 1_000:.1f}k" return str(n) + cost_str = "" + try: + from context_engine.pricing import _STATIC_PRICING + # Use static opus pricing to avoid network fetch on session start + rate = _STATIC_PRICING.get("opus", {"input": 15.0})["input"] + cost = tokens_saved * rate / 1_000_000 + if cost >= 0.01: + cost_str = f", ${cost:.2f} saved" + except Exception: + pass + return ( f"CCE saved {saved_pct:.0f}% of input tokens across {total_queries} queries " - f"({_fmt_k(total_baseline)} baseline, {_fmt_k(total_served)} served)" + f"({_fmt_k(total_baseline)} baseline, {_fmt_k(total_served)} served{cost_str})" )