diff --git a/docs/images/dashboard-savings.png b/docs/images/dashboard-savings.png new file mode 100644 index 0000000..8768bba Binary files /dev/null and b/docs/images/dashboard-savings.png differ diff --git a/src/context_engine/cli.py b/src/context_engine/cli.py index b481ccc..1db7cf7 100644 --- a/src/context_engine/cli.py +++ b/src/context_engine/cli.py @@ -1431,17 +1431,14 @@ def _load_buckets(project_dir: Path) -> tuple[dict, dict]: return empty, {} from context_engine.cli_style import dim, bold - from context_engine.pricing import get_model_pricing + from context_engine.pricing import resolve_pricing - _all_pricing = get_model_pricing() - _pricing_model = config.pricing_model.lower() - _default = _all_pricing.get("opus", {"input": 15.0, "output": 75.0}) - _model_pricing = _all_pricing.get(_pricing_model, _default) + _model_label, _model_pricing = resolve_pricing(config) _input_price_per_m = _model_pricing["input"] _output_price_per_m = _model_pricing["output"] _INPUT_COST = _input_price_per_m / 1_000_000 _OUTPUT_COST = _output_price_per_m / 1_000_000 - _model_label = _pricing_model.capitalize() + _model_label = _model_label.capitalize() _GRID_COLS = 10 _FILLED = "⛁" _EMPTY = "⛶" diff --git a/src/context_engine/config.py b/src/context_engine/config.py index 2309762..f3b13d0 100644 --- a/src/context_engine/config.py +++ b/src/context_engine/config.py @@ -95,6 +95,8 @@ class Config: # Pricing (for savings estimates) pricing_model: str = "opus" + pricing_input: float | None = None # $/1M input tokens override + pricing_output: float | None = None # $/1M output tokens override # Storage storage_path: str = str(_CCE_HOME / "projects") @@ -137,6 +139,8 @@ def _deep_merge(base: dict, override: dict) -> dict: "audit_log_enabled": bool, "storage_path": str, "pricing_model": str, + "pricing_input": (int, float, type(None)), + "pricing_output": (int, float, type(None)), } @@ -159,6 +163,8 @@ def _apply_dict_to_config(config: Config, data: dict) -> None: ("audit", "enabled"): "audit_log_enabled", ("storage", "path"): "storage_path", ("pricing", "model"): "pricing_model", + ("pricing", "input"): "pricing_input", + ("pricing", "output"): "pricing_output", } for (section, key), attr in mapping.items(): if section in data and isinstance(data[section], dict) and key in data[section]: diff --git a/src/context_engine/dashboard/_page.py b/src/context_engine/dashboard/_page.py index ff75c0f..bd868f1 100644 --- a/src/context_engine/dashboard/_page.py +++ b/src/context_engine/dashboard/_page.py @@ -768,7 +768,14 @@ +
+
+
Est. cost saved
+
\u2014
+
+
+
@@ -1423,11 +1430,19 @@ var pct = d.savings_pct || 0; var usedPct = baseline > 0 ? Math.round(served/baseline*100) : 0; + var costSaved = d.cost_saved || 0; + var pricingModel = d.pricing_model || 'opus'; + var inputPrice = d.input_price_per_m || 0; + var outputPrice = d.output_price_per_m || 0; + // Stat cards document.getElementById('sv-queries').textContent = fmt(queries); document.getElementById('sv-saved').textContent = fmtK(saved); document.getElementById('sv-pct').textContent = pct+'%'; + document.getElementById('sv-cost').textContent = costSaved < 0.01 && costSaved > 0 ? '<$0.01' : '$'+costSaved.toFixed(2); drawMiniRing('sv-ring', pct, 'var(--purple)'); + document.getElementById('sv-pricing-note').textContent = + 'Cost estimate based on '+pricingModel+' pricing (input $'+inputPrice+'/1M, output $'+outputPrice+'/1M). Configure via pricing.model, pricing.input, pricing.output in ~/.cce/config.yaml or .context-engine.yaml.'; // Big donut if (baseline > 0) { diff --git a/src/context_engine/dashboard/server.py b/src/context_engine/dashboard/server.py index d555b3c..59b5118 100644 --- a/src/context_engine/dashboard/server.py +++ b/src/context_engine/dashboard/server.py @@ -331,12 +331,24 @@ async def get_savings() -> dict: baseline = full_file if full_file > 0 else raw saved = max(0, baseline - served) pct = int(saved / baseline * 100) if baseline > 0 else 0 + + from context_engine.pricing import resolve_pricing, list_available_models + + label, model_pricing = resolve_pricing(config, fetch_live=False) + input_cost = saved * model_pricing["input"] / 1_000_000 + cost_saved = input_cost + return { "queries": stats.get("queries", 0), "baseline_tokens": baseline, "served_tokens": served, "tokens_saved": saved, "savings_pct": pct, + "pricing_model": label, + "input_price_per_m": model_pricing["input"], + "output_price_per_m": model_pricing["output"], + "cost_saved": round(cost_saved, 2), + "available_models": list_available_models(), } # ── action routes ────────────────────────────────────────────────────── diff --git a/src/context_engine/pricing.py b/src/context_engine/pricing.py index 3dc26d8..9606609 100644 --- a/src/context_engine/pricing.py +++ b/src/context_engine/pricing.py @@ -1,9 +1,18 @@ -"""Dynamic model pricing — fetched from Anthropic docs, cached locally.""" +"""Model pricing for savings estimates. + +Anthropic pricing is fetched from docs and cached. Other providers use +static fallbacks that are updated with releases. +""" +from __future__ import annotations + import json import re import time from pathlib import Path -from typing import TypedDict +from typing import TYPE_CHECKING, TypedDict + +if TYPE_CHECKING: + from context_engine.config import Config _CCE_HOME = Path.home() / ".cce" _CACHE_PATH = _CCE_HOME / "pricing_cache.json" @@ -16,20 +25,39 @@ class ModelPricing(TypedDict): output: float # $/1M output tokens -# Used only when fetch fails and no cache exists -_FALLBACK: dict[str, ModelPricing] = { +# Anthropic fallback (used when fetch fails and no cache exists) +_ANTHROPIC_FALLBACK: dict[str, ModelPricing] = { "opus": {"input": 15.0, "output": 75.0}, "sonnet": {"input": 3.0, "output": 15.0}, "haiku": {"input": 0.80, "output": 4.0}, } -# Flat input-only fallback kept for backward compat with existing cache files -_FALLBACK_INPUT: dict[str, float] = { - "opus": 15.0, - "sonnet": 3.0, - "haiku": 0.80, +# Static pricing for non-Anthropic models. Updated with releases. +# Keys are lowercase, matched against config pricing.model. +_STATIC_PRICING: dict[str, ModelPricing] = { + # OpenAI + "gpt-4o": {"input": 2.50, "output": 10.0}, + "gpt-4o-mini": {"input": 0.15, "output": 0.60}, + "gpt-4.1": {"input": 2.0, "output": 8.0}, + "gpt-4.1-mini": {"input": 0.40, "output": 1.60}, + "gpt-4.1-nano": {"input": 0.10, "output": 0.40}, + "o3": {"input": 2.0, "output": 8.0}, + "o3-mini": {"input": 1.10, "output": 4.40}, + "o4-mini": {"input": 1.10, "output": 4.40}, + "codex-mini": {"input": 1.50, "output": 6.0}, + # Google + "gemini-2.5-pro": {"input": 1.25, "output": 10.0}, + "gemini-2.5-flash": {"input": 0.15, "output": 0.60}, + "gemini-2.0-flash": {"input": 0.10, "output": 0.40}, + # Anthropic (duplicated here so static lookup works without fetching) + "opus": {"input": 15.0, "output": 75.0}, + "sonnet": {"input": 3.0, "output": 15.0}, + "haiku": {"input": 0.80, "output": 4.0}, } +# Backward compat alias +_FALLBACK = _ANTHROPIC_FALLBACK + def _parse_html(html: str) -> dict[str, ModelPricing] | None: """Parse per-family input + output pricing from Anthropic docs HTML table.""" @@ -136,13 +164,71 @@ def _save_cache(pricing: dict[str, ModelPricing]) -> None: pass -def get_model_pricing() -> dict[str, ModelPricing]: - """Return {family: {input, output}} pricing per 1M tokens. Cached 7 days.""" +def get_model_pricing(*, fetch_live: bool = True) -> dict[str, ModelPricing]: + """Return {model: {input, output}} pricing per 1M tokens. + + Merges static pricing for all providers with live Anthropic pricing + (fetched from docs, cached 7 days). Live data wins for Anthropic models. + + When *fetch_live* is False, only static + cached pricing is used (no + network request). Use this on hot paths like dashboard requests. + """ + result = dict(_STATIC_PRICING) + cached = _load_cache() + if cached: + result.update(cached) + return result + if fetch_live: + fetched = _fetch() + if fetched: + _save_cache(fetched) + result.update(fetched) + return result + return result + + +def list_available_models() -> list[str]: + """Return sorted list of all model keys with known pricing. + + Uses static pricing plus any cached live pricing without triggering + a network fetch, so this is safe for dashboard/CLI hot paths. + """ + result = dict(_STATIC_PRICING) cached = _load_cache() if cached: - return cached - fetched = _fetch() - if fetched: - _save_cache(fetched) - return fetched - return dict(_FALLBACK) + result.update(cached) + return sorted(result.keys()) + + +def resolve_pricing( + config: Config, *, fetch_live: bool = True +) -> tuple[str, ModelPricing]: + """Return (model_label, {input, output}) respecting config overrides. + + Priority: + 1. Explicit pricing.input / pricing.output in config (full override) + 2. Lookup by pricing.model in the merged pricing table + 3. Fallback to Opus + + When *fetch_live* is False, skip network fetch (use static + cache only). + """ + model = config.pricing_model.strip().lower() + all_pricing = get_model_pricing(fetch_live=fetch_live) + opus_default: ModelPricing = {"input": 15.0, "output": 75.0} + default = all_pricing.get("opus", opus_default) + base = all_pricing.get(model, default) + + resolved: ModelPricing = { + "input": config.pricing_input if config.pricing_input is not None else base["input"], + "output": config.pricing_output if config.pricing_output is not None else base["output"], + } + + # Label reflects whether user overrode rates or fell back + if config.pricing_input is not None or config.pricing_output is not None: + label = f"{model} (custom)" + elif model not in all_pricing: + label = f"opus (fallback from {model})" + else: + label = model + + return label, resolved diff --git a/tests/dashboard/test_server.py b/tests/dashboard/test_server.py index 17c2e32..74a7d61 100644 --- a/tests/dashboard/test_server.py +++ b/tests/dashboard/test_server.py @@ -153,7 +153,9 @@ def test_sessions_returns_persisted(tmp_path): assert len(data[0]["decisions"]) == 1 -def test_savings_no_data(tmp_path): +@patch("context_engine.pricing._fetch", return_value=None) +@patch("context_engine.pricing._load_cache", return_value=None) +def test_savings_no_data(mock_cache, mock_fetch, tmp_path): client, _ = _make_client(tmp_path) r = client.get("/api/savings") assert r.status_code == 200 @@ -161,9 +163,14 @@ def test_savings_no_data(tmp_path): assert data["queries"] == 0 assert data["tokens_saved"] == 0 assert data["savings_pct"] == 0 + assert "pricing_model" in data + assert "available_models" in data + assert isinstance(data["available_models"], list) -def test_savings_with_data(tmp_path): +@patch("context_engine.pricing._fetch", return_value=None) +@patch("context_engine.pricing._load_cache", return_value=None) +def test_savings_with_data(mock_cache, mock_fetch, tmp_path): client, storage_base = _make_client(tmp_path) stats = {"queries": 38, "full_file_tokens": 48000, "served_tokens": 14200, "raw_tokens": 14200} (storage_base / "stats.json").write_text(json.dumps(stats)) @@ -174,6 +181,8 @@ def test_savings_with_data(tmp_path): assert data["baseline_tokens"] == 48000 assert data["tokens_saved"] == 33800 assert data["savings_pct"] == 70 + assert data["pricing_model"] == "opus" + assert data["cost_saved"] == round(33800 * 15.0 / 1_000_000, 2) def test_export_returns_combined(tmp_path): diff --git a/tests/test_cli_smoke.py b/tests/test_cli_smoke.py index 02d6236..88ca34a 100644 --- a/tests/test_cli_smoke.py +++ b/tests/test_cli_smoke.py @@ -256,14 +256,20 @@ def test_pricing_fetch_and_fallback(): def test_pricing_fallback_on_network_error(): - """When fetch fails, fallback pricing is returned.""" - from context_engine.pricing import get_model_pricing, _FALLBACK, _CACHE_PATH + """When fetch fails, static pricing for all providers is returned.""" + from context_engine.pricing import get_model_pricing, _STATIC_PRICING, _CACHE_PATH # Clear cache so it tries to fetch if _CACHE_PATH.exists(): _CACHE_PATH.unlink() with patch("context_engine.pricing._fetch", return_value=None): pricing = get_model_pricing() - assert pricing == _FALLBACK + assert pricing == _STATIC_PRICING + # Anthropic models present + assert "opus" in pricing + assert "sonnet" in pricing + # Non-Anthropic models present + assert "gpt-4o" in pricing + assert "gemini-2.5-pro" in pricing def test_pricing_shown_in_savings_output(runner, storage):