diff --git a/docs/images/dashboard-savings.png b/docs/images/dashboard-savings.png
new file mode 100644
index 0000000..8768bba
Binary files /dev/null and b/docs/images/dashboard-savings.png differ
diff --git a/src/context_engine/cli.py b/src/context_engine/cli.py
index b481ccc..1db7cf7 100644
--- a/src/context_engine/cli.py
+++ b/src/context_engine/cli.py
@@ -1431,17 +1431,14 @@ def _load_buckets(project_dir: Path) -> tuple[dict, dict]:
return empty, {}
from context_engine.cli_style import dim, bold
- from context_engine.pricing import get_model_pricing
+ from context_engine.pricing import resolve_pricing
- _all_pricing = get_model_pricing()
- _pricing_model = config.pricing_model.lower()
- _default = _all_pricing.get("opus", {"input": 15.0, "output": 75.0})
- _model_pricing = _all_pricing.get(_pricing_model, _default)
+ _model_label, _model_pricing = resolve_pricing(config)
_input_price_per_m = _model_pricing["input"]
_output_price_per_m = _model_pricing["output"]
_INPUT_COST = _input_price_per_m / 1_000_000
_OUTPUT_COST = _output_price_per_m / 1_000_000
- _model_label = _pricing_model.capitalize()
+ _model_label = _model_label.capitalize()
_GRID_COLS = 10
_FILLED = "⛁"
_EMPTY = "⛶"
diff --git a/src/context_engine/config.py b/src/context_engine/config.py
index 2309762..f3b13d0 100644
--- a/src/context_engine/config.py
+++ b/src/context_engine/config.py
@@ -95,6 +95,8 @@ class Config:
# Pricing (for savings estimates)
pricing_model: str = "opus"
+ pricing_input: float | None = None # $/1M input tokens override
+ pricing_output: float | None = None # $/1M output tokens override
# Storage
storage_path: str = str(_CCE_HOME / "projects")
@@ -137,6 +139,8 @@ def _deep_merge(base: dict, override: dict) -> dict:
"audit_log_enabled": bool,
"storage_path": str,
"pricing_model": str,
+ "pricing_input": (int, float, type(None)),
+ "pricing_output": (int, float, type(None)),
}
@@ -159,6 +163,8 @@ def _apply_dict_to_config(config: Config, data: dict) -> None:
("audit", "enabled"): "audit_log_enabled",
("storage", "path"): "storage_path",
("pricing", "model"): "pricing_model",
+ ("pricing", "input"): "pricing_input",
+ ("pricing", "output"): "pricing_output",
}
for (section, key), attr in mapping.items():
if section in data and isinstance(data[section], dict) and key in data[section]:
diff --git a/src/context_engine/dashboard/_page.py b/src/context_engine/dashboard/_page.py
index ff75c0f..bd868f1 100644
--- a/src/context_engine/dashboard/_page.py
+++ b/src/context_engine/dashboard/_page.py
@@ -768,7 +768,14 @@
+
+
+
Est. cost saved
+
\u2014
+
+
+
@@ -1423,11 +1430,19 @@
var pct = d.savings_pct || 0;
var usedPct = baseline > 0 ? Math.round(served/baseline*100) : 0;
+ var costSaved = d.cost_saved || 0;
+ var pricingModel = d.pricing_model || 'opus';
+ var inputPrice = d.input_price_per_m || 0;
+ var outputPrice = d.output_price_per_m || 0;
+
// Stat cards
document.getElementById('sv-queries').textContent = fmt(queries);
document.getElementById('sv-saved').textContent = fmtK(saved);
document.getElementById('sv-pct').textContent = pct+'%';
+ document.getElementById('sv-cost').textContent = costSaved < 0.01 && costSaved > 0 ? '<$0.01' : '$'+costSaved.toFixed(2);
drawMiniRing('sv-ring', pct, 'var(--purple)');
+ document.getElementById('sv-pricing-note').textContent =
+ 'Cost estimate based on '+pricingModel+' pricing (input $'+inputPrice+'/1M, output $'+outputPrice+'/1M). Configure via pricing.model, pricing.input, pricing.output in ~/.cce/config.yaml or .context-engine.yaml.';
// Big donut
if (baseline > 0) {
diff --git a/src/context_engine/dashboard/server.py b/src/context_engine/dashboard/server.py
index d555b3c..59b5118 100644
--- a/src/context_engine/dashboard/server.py
+++ b/src/context_engine/dashboard/server.py
@@ -331,12 +331,24 @@ async def get_savings() -> dict:
baseline = full_file if full_file > 0 else raw
saved = max(0, baseline - served)
pct = int(saved / baseline * 100) if baseline > 0 else 0
+
+ from context_engine.pricing import resolve_pricing, list_available_models
+
+ label, model_pricing = resolve_pricing(config, fetch_live=False)
+ input_cost = saved * model_pricing["input"] / 1_000_000
+ cost_saved = input_cost
+
return {
"queries": stats.get("queries", 0),
"baseline_tokens": baseline,
"served_tokens": served,
"tokens_saved": saved,
"savings_pct": pct,
+ "pricing_model": label,
+ "input_price_per_m": model_pricing["input"],
+ "output_price_per_m": model_pricing["output"],
+ "cost_saved": round(cost_saved, 2),
+ "available_models": list_available_models(),
}
# ── action routes ──────────────────────────────────────────────────────
diff --git a/src/context_engine/pricing.py b/src/context_engine/pricing.py
index 3dc26d8..9606609 100644
--- a/src/context_engine/pricing.py
+++ b/src/context_engine/pricing.py
@@ -1,9 +1,18 @@
-"""Dynamic model pricing — fetched from Anthropic docs, cached locally."""
+"""Model pricing for savings estimates.
+
+Anthropic pricing is fetched from docs and cached. Other providers use
+static fallbacks that are updated with releases.
+"""
+from __future__ import annotations
+
import json
import re
import time
from pathlib import Path
-from typing import TypedDict
+from typing import TYPE_CHECKING, TypedDict
+
+if TYPE_CHECKING:
+ from context_engine.config import Config
_CCE_HOME = Path.home() / ".cce"
_CACHE_PATH = _CCE_HOME / "pricing_cache.json"
@@ -16,20 +25,39 @@ class ModelPricing(TypedDict):
output: float # $/1M output tokens
-# Used only when fetch fails and no cache exists
-_FALLBACK: dict[str, ModelPricing] = {
+# Anthropic fallback (used when fetch fails and no cache exists)
+_ANTHROPIC_FALLBACK: dict[str, ModelPricing] = {
"opus": {"input": 15.0, "output": 75.0},
"sonnet": {"input": 3.0, "output": 15.0},
"haiku": {"input": 0.80, "output": 4.0},
}
-# Flat input-only fallback kept for backward compat with existing cache files
-_FALLBACK_INPUT: dict[str, float] = {
- "opus": 15.0,
- "sonnet": 3.0,
- "haiku": 0.80,
+# Static pricing for non-Anthropic models. Updated with releases.
+# Keys are lowercase, matched against config pricing.model.
+_STATIC_PRICING: dict[str, ModelPricing] = {
+ # OpenAI
+ "gpt-4o": {"input": 2.50, "output": 10.0},
+ "gpt-4o-mini": {"input": 0.15, "output": 0.60},
+ "gpt-4.1": {"input": 2.0, "output": 8.0},
+ "gpt-4.1-mini": {"input": 0.40, "output": 1.60},
+ "gpt-4.1-nano": {"input": 0.10, "output": 0.40},
+ "o3": {"input": 2.0, "output": 8.0},
+ "o3-mini": {"input": 1.10, "output": 4.40},
+ "o4-mini": {"input": 1.10, "output": 4.40},
+ "codex-mini": {"input": 1.50, "output": 6.0},
+ # Google
+ "gemini-2.5-pro": {"input": 1.25, "output": 10.0},
+ "gemini-2.5-flash": {"input": 0.15, "output": 0.60},
+ "gemini-2.0-flash": {"input": 0.10, "output": 0.40},
+ # Anthropic (duplicated here so static lookup works without fetching)
+ "opus": {"input": 15.0, "output": 75.0},
+ "sonnet": {"input": 3.0, "output": 15.0},
+ "haiku": {"input": 0.80, "output": 4.0},
}
+# Backward compat alias
+_FALLBACK = _ANTHROPIC_FALLBACK
+
def _parse_html(html: str) -> dict[str, ModelPricing] | None:
"""Parse per-family input + output pricing from Anthropic docs HTML table."""
@@ -136,13 +164,71 @@ def _save_cache(pricing: dict[str, ModelPricing]) -> None:
pass
-def get_model_pricing() -> dict[str, ModelPricing]:
- """Return {family: {input, output}} pricing per 1M tokens. Cached 7 days."""
+def get_model_pricing(*, fetch_live: bool = True) -> dict[str, ModelPricing]:
+ """Return {model: {input, output}} pricing per 1M tokens.
+
+ Merges static pricing for all providers with live Anthropic pricing
+ (fetched from docs, cached 7 days). Live data wins for Anthropic models.
+
+ When *fetch_live* is False, only static + cached pricing is used (no
+ network request). Use this on hot paths like dashboard requests.
+ """
+ result = dict(_STATIC_PRICING)
+ cached = _load_cache()
+ if cached:
+ result.update(cached)
+ return result
+ if fetch_live:
+ fetched = _fetch()
+ if fetched:
+ _save_cache(fetched)
+ result.update(fetched)
+ return result
+ return result
+
+
+def list_available_models() -> list[str]:
+ """Return sorted list of all model keys with known pricing.
+
+ Uses static pricing plus any cached live pricing without triggering
+ a network fetch, so this is safe for dashboard/CLI hot paths.
+ """
+ result = dict(_STATIC_PRICING)
cached = _load_cache()
if cached:
- return cached
- fetched = _fetch()
- if fetched:
- _save_cache(fetched)
- return fetched
- return dict(_FALLBACK)
+ result.update(cached)
+ return sorted(result.keys())
+
+
+def resolve_pricing(
+ config: Config, *, fetch_live: bool = True
+) -> tuple[str, ModelPricing]:
+ """Return (model_label, {input, output}) respecting config overrides.
+
+ Priority:
+ 1. Explicit pricing.input / pricing.output in config (full override)
+ 2. Lookup by pricing.model in the merged pricing table
+ 3. Fallback to Opus
+
+ When *fetch_live* is False, skip network fetch (use static + cache only).
+ """
+ model = config.pricing_model.strip().lower()
+ all_pricing = get_model_pricing(fetch_live=fetch_live)
+ opus_default: ModelPricing = {"input": 15.0, "output": 75.0}
+ default = all_pricing.get("opus", opus_default)
+ base = all_pricing.get(model, default)
+
+ resolved: ModelPricing = {
+ "input": config.pricing_input if config.pricing_input is not None else base["input"],
+ "output": config.pricing_output if config.pricing_output is not None else base["output"],
+ }
+
+ # Label reflects whether user overrode rates or fell back
+ if config.pricing_input is not None or config.pricing_output is not None:
+ label = f"{model} (custom)"
+ elif model not in all_pricing:
+ label = f"opus (fallback from {model})"
+ else:
+ label = model
+
+ return label, resolved
diff --git a/tests/dashboard/test_server.py b/tests/dashboard/test_server.py
index 17c2e32..74a7d61 100644
--- a/tests/dashboard/test_server.py
+++ b/tests/dashboard/test_server.py
@@ -153,7 +153,9 @@ def test_sessions_returns_persisted(tmp_path):
assert len(data[0]["decisions"]) == 1
-def test_savings_no_data(tmp_path):
+@patch("context_engine.pricing._fetch", return_value=None)
+@patch("context_engine.pricing._load_cache", return_value=None)
+def test_savings_no_data(mock_cache, mock_fetch, tmp_path):
client, _ = _make_client(tmp_path)
r = client.get("/api/savings")
assert r.status_code == 200
@@ -161,9 +163,14 @@ def test_savings_no_data(tmp_path):
assert data["queries"] == 0
assert data["tokens_saved"] == 0
assert data["savings_pct"] == 0
+ assert "pricing_model" in data
+ assert "available_models" in data
+ assert isinstance(data["available_models"], list)
-def test_savings_with_data(tmp_path):
+@patch("context_engine.pricing._fetch", return_value=None)
+@patch("context_engine.pricing._load_cache", return_value=None)
+def test_savings_with_data(mock_cache, mock_fetch, tmp_path):
client, storage_base = _make_client(tmp_path)
stats = {"queries": 38, "full_file_tokens": 48000, "served_tokens": 14200, "raw_tokens": 14200}
(storage_base / "stats.json").write_text(json.dumps(stats))
@@ -174,6 +181,8 @@ def test_savings_with_data(tmp_path):
assert data["baseline_tokens"] == 48000
assert data["tokens_saved"] == 33800
assert data["savings_pct"] == 70
+ assert data["pricing_model"] == "opus"
+ assert data["cost_saved"] == round(33800 * 15.0 / 1_000_000, 2)
def test_export_returns_combined(tmp_path):
diff --git a/tests/test_cli_smoke.py b/tests/test_cli_smoke.py
index 02d6236..88ca34a 100644
--- a/tests/test_cli_smoke.py
+++ b/tests/test_cli_smoke.py
@@ -256,14 +256,20 @@ def test_pricing_fetch_and_fallback():
def test_pricing_fallback_on_network_error():
- """When fetch fails, fallback pricing is returned."""
- from context_engine.pricing import get_model_pricing, _FALLBACK, _CACHE_PATH
+ """When fetch fails, static pricing for all providers is returned."""
+ from context_engine.pricing import get_model_pricing, _STATIC_PRICING, _CACHE_PATH
# Clear cache so it tries to fetch
if _CACHE_PATH.exists():
_CACHE_PATH.unlink()
with patch("context_engine.pricing._fetch", return_value=None):
pricing = get_model_pricing()
- assert pricing == _FALLBACK
+ assert pricing == _STATIC_PRICING
+ # Anthropic models present
+ assert "opus" in pricing
+ assert "sonnet" in pricing
+ # Non-Anthropic models present
+ assert "gpt-4o" in pricing
+ assert "gemini-2.5-pro" in pricing
def test_pricing_shown_in_savings_output(runner, storage):