elara-labs · rajkumarsakthivel · Jun 15, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026
@@ -1431,17 +1431,14 @@ def _load_buckets(project_dir: Path) -> tuple[dict, dict]:
         return empty, {}
 
     from context_engine.cli_style import dim, bold
-    from context_engine.pricing import get_model_pricing
+    from context_engine.pricing import resolve_pricing
 
-    _all_pricing = get_model_pricing()
-    _pricing_model = config.pricing_model.lower()
-    _default = _all_pricing.get("opus", {"input": 15.0, "output": 75.0})
-    _model_pricing = _all_pricing.get(_pricing_model, _default)
+    _model_label, _model_pricing = resolve_pricing(config)
     _input_price_per_m = _model_pricing["input"]
     _output_price_per_m = _model_pricing["output"]
     _INPUT_COST = _input_price_per_m / 1_000_000
     _OUTPUT_COST = _output_price_per_m / 1_000_000
-    _model_label = _pricing_model.capitalize()
+    _model_label = _model_label.capitalize()
     _GRID_COLS = 10
     _FILLED = "⛁"
     _EMPTY = "⛶"

@@ -95,6 +95,8 @@ class Config:
 
     # Pricing (for savings estimates)
     pricing_model: str = "opus"
+    pricing_input: float | None = None   # $/1M input tokens override
+    pricing_output: float | None = None  # $/1M output tokens override
 
     # Storage
     storage_path: str = str(_CCE_HOME / "projects")
@@ -137,6 +139,8 @@ def _deep_merge(base: dict, override: dict) -> dict:
     "audit_log_enabled": bool,
     "storage_path": str,
     "pricing_model": str,
+    "pricing_input": (int, float, type(None)),
+    "pricing_output": (int, float, type(None)),
 }
 
 
@@ -159,6 +163,8 @@ def _apply_dict_to_config(config: Config, data: dict) -> None:
         ("audit", "enabled"): "audit_log_enabled",
         ("storage", "path"): "storage_path",
         ("pricing", "model"): "pricing_model",
+        ("pricing", "input"): "pricing_input",
+        ("pricing", "output"): "pricing_output",
     }
     for (section, key), attr in mapping.items():
         if section in data and isinstance(data[section], dict) and key in data[section]:

@@ -768,7 +768,14 @@
           </div>
           <svg width="32" height="32" viewBox="0 0 32 32" id="sv-ring" style="flex-shrink:0;margin-left:8px"></svg>
         </div>
+        <div class="stat-card blue">
+          <div class="stat-left">
+            <div class="stat-label">Est. cost saved</div>
+            <div class="stat-num blue" id="sv-cost">\u2014</div>
+          </div>
+        </div>
       </div>
+      <div style="padding:0 0 8px 4px;font-size:11px;font-family:var(--mono);color:var(--text3)" id="sv-pricing-note"></div>
 
       <!-- Chart row: big donut + stacked breakdown -->
       <div class="panel-row" style="margin-bottom:10px">
@@ -1423,11 +1430,19 @@
     var pct      = d.savings_pct    || 0;
     var usedPct  = baseline > 0 ? Math.round(served/baseline*100) : 0;
 
+    var costSaved   = d.cost_saved      || 0;
+    var pricingModel = d.pricing_model  || 'opus';
+    var inputPrice  = d.input_price_per_m  || 0;
+    var outputPrice = d.output_price_per_m || 0;
+
     // Stat cards
     document.getElementById('sv-queries').textContent = fmt(queries);
     document.getElementById('sv-saved').textContent   = fmtK(saved);
     document.getElementById('sv-pct').textContent     = pct+'%';
+    document.getElementById('sv-cost').textContent    = costSaved < 0.01 && costSaved > 0 ? '<$0.01' : '$'+costSaved.toFixed(2);
     drawMiniRing('sv-ring', pct, 'var(--purple)');
+    document.getElementById('sv-pricing-note').textContent =
+      'Cost estimate based on '+pricingModel+' pricing (input $'+inputPrice+'/1M, output $'+outputPrice+'/1M). Configure via pricing.model, pricing.input, pricing.output in ~/.cce/config.yaml or .context-engine.yaml.';
 
     // Big donut
     if (baseline > 0) {

@@ -331,12 +331,24 @@ async def get_savings() -> dict:
         baseline = full_file if full_file > 0 else raw
         saved = max(0, baseline - served)
         pct = int(saved / baseline * 100) if baseline > 0 else 0
+
+        from context_engine.pricing import resolve_pricing, list_available_models
+
+        label, model_pricing = resolve_pricing(config, fetch_live=False)
+        input_cost = saved * model_pricing["input"] / 1_000_000
+        cost_saved = input_cost
+
         return {
             "queries": stats.get("queries", 0),
             "baseline_tokens": baseline,
             "served_tokens": served,
             "tokens_saved": saved,
             "savings_pct": pct,
+            "pricing_model": label,
+            "input_price_per_m": model_pricing["input"],
+            "output_price_per_m": model_pricing["output"],
+            "cost_saved": round(cost_saved, 2),
+            "available_models": list_available_models(),
         }
 
     # ── action routes ──────────────────────────────────────────────────────

@@ -1,9 +1,18 @@
-"""Dynamic model pricing — fetched from Anthropic docs, cached locally."""
+"""Model pricing for savings estimates.
+
+Anthropic pricing is fetched from docs and cached. Other providers use
+static fallbacks that are updated with releases.
+"""
+from __future__ import annotations
+
 import json
 import re
 import time
 from pathlib import Path
-from typing import TypedDict
+from typing import TYPE_CHECKING, TypedDict
+
+if TYPE_CHECKING:
+    from context_engine.config import Config
 
 _CCE_HOME = Path.home() / ".cce"
 _CACHE_PATH = _CCE_HOME / "pricing_cache.json"
@@ -16,20 +25,39 @@ class ModelPricing(TypedDict):
     output: float  # $/1M output tokens
 
 
-# Used only when fetch fails and no cache exists
-_FALLBACK: dict[str, ModelPricing] = {
+# Anthropic fallback (used when fetch fails and no cache exists)
+_ANTHROPIC_FALLBACK: dict[str, ModelPricing] = {
     "opus": {"input": 15.0, "output": 75.0},
     "sonnet": {"input": 3.0, "output": 15.0},
     "haiku": {"input": 0.80, "output": 4.0},
 }
 
-# Flat input-only fallback kept for backward compat with existing cache files
-_FALLBACK_INPUT: dict[str, float] = {
-    "opus": 15.0,
-    "sonnet": 3.0,
-    "haiku": 0.80,
+# Static pricing for non-Anthropic models. Updated with releases.
+# Keys are lowercase, matched against config pricing.model.
+_STATIC_PRICING: dict[str, ModelPricing] = {
+    # OpenAI
+    "gpt-4o": {"input": 2.50, "output": 10.0},
+    "gpt-4o-mini": {"input": 0.15, "output": 0.60},
+    "gpt-4.1": {"input": 2.0, "output": 8.0},
+    "gpt-4.1-mini": {"input": 0.40, "output": 1.60},
+    "gpt-4.1-nano": {"input": 0.10, "output": 0.40},
+    "o3": {"input": 2.0, "output": 8.0},
+    "o3-mini": {"input": 1.10, "output": 4.40},
+    "o4-mini": {"input": 1.10, "output": 4.40},
+    "codex-mini": {"input": 1.50, "output": 6.0},
+    # Google
+    "gemini-2.5-pro": {"input": 1.25, "output": 10.0},
+    "gemini-2.5-flash": {"input": 0.15, "output": 0.60},
+    "gemini-2.0-flash": {"input": 0.10, "output": 0.40},
+    # Anthropic (duplicated here so static lookup works without fetching)
+    "opus": {"input": 15.0, "output": 75.0},
+    "sonnet": {"input": 3.0, "output": 15.0},
+    "haiku": {"input": 0.80, "output": 4.0},
 }
 
+# Backward compat alias
+_FALLBACK = _ANTHROPIC_FALLBACK
+
 
 def _parse_html(html: str) -> dict[str, ModelPricing] | None:
     """Parse per-family input + output pricing from Anthropic docs HTML table."""
@@ -136,13 +164,71 @@ def _save_cache(pricing: dict[str, ModelPricing]) -> None:
         pass
 
 
-def get_model_pricing() -> dict[str, ModelPricing]:
-    """Return {family: {input, output}} pricing per 1M tokens. Cached 7 days."""
+def get_model_pricing(*, fetch_live: bool = True) -> dict[str, ModelPricing]:
+    """Return {model: {input, output}} pricing per 1M tokens.
+
+    Merges static pricing for all providers with live Anthropic pricing
+    (fetched from docs, cached 7 days). Live data wins for Anthropic models.
+
+    When *fetch_live* is False, only static + cached pricing is used (no
+    network request). Use this on hot paths like dashboard requests.
+    """
+    result = dict(_STATIC_PRICING)
+    cached = _load_cache()
+    if cached:
+        result.update(cached)
+        return result
+    if fetch_live:
+        fetched = _fetch()
+        if fetched:
+            _save_cache(fetched)
+            result.update(fetched)
+            return result
+    return result
+
+
+def list_available_models() -> list[str]:
+    """Return sorted list of all model keys with known pricing.
+
+    Uses static pricing plus any cached live pricing without triggering
+    a network fetch, so this is safe for dashboard/CLI hot paths.
+    """
+    result = dict(_STATIC_PRICING)
     cached = _load_cache()
     if cached:
-        return cached
-    fetched = _fetch()
-    if fetched:
-        _save_cache(fetched)
-        return fetched
-    return dict(_FALLBACK)
+        result.update(cached)
+    return sorted(result.keys())
+
+
+def resolve_pricing(
+    config: Config, *, fetch_live: bool = True
+) -> tuple[str, ModelPricing]:
+    """Return (model_label, {input, output}) respecting config overrides.
+
+    Priority:
+    1. Explicit pricing.input / pricing.output in config (full override)
+    2. Lookup by pricing.model in the merged pricing table
+    3. Fallback to Opus
+
+    When *fetch_live* is False, skip network fetch (use static + cache only).
+    """
+    model = config.pricing_model.strip().lower()
+    all_pricing = get_model_pricing(fetch_live=fetch_live)
+    opus_default: ModelPricing = {"input": 15.0, "output": 75.0}
+    default = all_pricing.get("opus", opus_default)
+    base = all_pricing.get(model, default)
+
+    resolved: ModelPricing = {
+        "input": config.pricing_input if config.pricing_input is not None else base["input"],
+        "output": config.pricing_output if config.pricing_output is not None else base["output"],
+    }
+
+    # Label reflects whether user overrode rates or fell back
+    if config.pricing_input is not None or config.pricing_output is not None:
+        label = f"{model} (custom)"
+    elif model not in all_pricing:
+        label = f"opus (fallback from {model})"
+    else:
+        label = model
+
+    return label, resolved
@@ -153,17 +153,24 @@ def test_sessions_returns_persisted(tmp_path):
     assert len(data[0]["decisions"]) == 1
 
 
-def test_savings_no_data(tmp_path):
+@patch("context_engine.pricing._fetch", return_value=None)
+@patch("context_engine.pricing._load_cache", return_value=None)
+def test_savings_no_data(mock_cache, mock_fetch, tmp_path):
     client, _ = _make_client(tmp_path)
     r = client.get("/api/savings")
     assert r.status_code == 200
     data = r.json()
     assert data["queries"] == 0
     assert data["tokens_saved"] == 0
     assert data["savings_pct"] == 0
+    assert "pricing_model" in data
+    assert "available_models" in data
+    assert isinstance(data["available_models"], list)
 
 
-def test_savings_with_data(tmp_path):
+@patch("context_engine.pricing._fetch", return_value=None)
+@patch("context_engine.pricing._load_cache", return_value=None)
+def test_savings_with_data(mock_cache, mock_fetch, tmp_path):
     client, storage_base = _make_client(tmp_path)
     stats = {"queries": 38, "full_file_tokens": 48000, "served_tokens": 14200, "raw_tokens": 14200}
     (storage_base / "stats.json").write_text(json.dumps(stats))
@@ -174,6 +181,8 @@ def test_savings_with_data(tmp_path):
     assert data["baseline_tokens"] == 48000
     assert data["tokens_saved"] == 33800
     assert data["savings_pct"] == 70
+    assert data["pricing_model"] == "opus"
+    assert data["cost_saved"] == round(33800 * 15.0 / 1_000_000, 2)
 
 
 def test_export_returns_combined(tmp_path):

@@ -256,14 +256,20 @@ def test_pricing_fetch_and_fallback():
 
 
 def test_pricing_fallback_on_network_error():
-    """When fetch fails, fallback pricing is returned."""
-    from context_engine.pricing import get_model_pricing, _FALLBACK, _CACHE_PATH
+    """When fetch fails, static pricing for all providers is returned."""
+    from context_engine.pricing import get_model_pricing, _STATIC_PRICING, _CACHE_PATH
     # Clear cache so it tries to fetch
     if _CACHE_PATH.exists():
         _CACHE_PATH.unlink()
     with patch("context_engine.pricing._fetch", return_value=None):
         pricing = get_model_pricing()
-    assert pricing == _FALLBACK
+    assert pricing == _STATIC_PRICING
+    # Anthropic models present
+    assert "opus" in pricing
+    assert "sonnet" in pricing
+    # Non-Anthropic models present
+    assert "gpt-4o" in pricing
+    assert "gemini-2.5-pro" in pricing
 
 
 def test_pricing_shown_in_savings_output(runner, storage):