Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added docs/images/dashboard-savings.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
9 changes: 3 additions & 6 deletions src/context_engine/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1431,17 +1431,14 @@ def _load_buckets(project_dir: Path) -> tuple[dict, dict]:
return empty, {}

from context_engine.cli_style import dim, bold
from context_engine.pricing import get_model_pricing
from context_engine.pricing import resolve_pricing

_all_pricing = get_model_pricing()
_pricing_model = config.pricing_model.lower()
_default = _all_pricing.get("opus", {"input": 15.0, "output": 75.0})
_model_pricing = _all_pricing.get(_pricing_model, _default)
_model_label, _model_pricing = resolve_pricing(config)
_input_price_per_m = _model_pricing["input"]
_output_price_per_m = _model_pricing["output"]
_INPUT_COST = _input_price_per_m / 1_000_000
_OUTPUT_COST = _output_price_per_m / 1_000_000
_model_label = _pricing_model.capitalize()
_model_label = _model_label.capitalize()
_GRID_COLS = 10
_FILLED = "⛁"
_EMPTY = "⛶"
Expand Down
6 changes: 6 additions & 0 deletions src/context_engine/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ class Config:

# Pricing (for savings estimates)
pricing_model: str = "opus"
pricing_input: float | None = None # $/1M input tokens override
pricing_output: float | None = None # $/1M output tokens override

# Storage
storage_path: str = str(_CCE_HOME / "projects")
Expand Down Expand Up @@ -137,6 +139,8 @@ def _deep_merge(base: dict, override: dict) -> dict:
"audit_log_enabled": bool,
"storage_path": str,
"pricing_model": str,
"pricing_input": (int, float, type(None)),
"pricing_output": (int, float, type(None)),
}


Expand All @@ -159,6 +163,8 @@ def _apply_dict_to_config(config: Config, data: dict) -> None:
("audit", "enabled"): "audit_log_enabled",
("storage", "path"): "storage_path",
("pricing", "model"): "pricing_model",
("pricing", "input"): "pricing_input",
("pricing", "output"): "pricing_output",
}
for (section, key), attr in mapping.items():
if section in data and isinstance(data[section], dict) and key in data[section]:
Expand Down
15 changes: 15 additions & 0 deletions src/context_engine/dashboard/_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,14 @@
</div>
<svg width="32" height="32" viewBox="0 0 32 32" id="sv-ring" style="flex-shrink:0;margin-left:8px"></svg>
</div>
<div class="stat-card blue">
<div class="stat-left">
<div class="stat-label">Est. cost saved</div>
<div class="stat-num blue" id="sv-cost">\u2014</div>
</div>
</div>
</div>
<div style="padding:0 0 8px 4px;font-size:11px;font-family:var(--mono);color:var(--text3)" id="sv-pricing-note"></div>

<!-- Chart row: big donut + stacked breakdown -->
<div class="panel-row" style="margin-bottom:10px">
Expand Down Expand Up @@ -1423,11 +1430,19 @@
var pct = d.savings_pct || 0;
var usedPct = baseline > 0 ? Math.round(served/baseline*100) : 0;

var costSaved = d.cost_saved || 0;
var pricingModel = d.pricing_model || 'opus';
var inputPrice = d.input_price_per_m || 0;
var outputPrice = d.output_price_per_m || 0;

// Stat cards
document.getElementById('sv-queries').textContent = fmt(queries);
document.getElementById('sv-saved').textContent = fmtK(saved);
document.getElementById('sv-pct').textContent = pct+'%';
document.getElementById('sv-cost').textContent = costSaved < 0.01 && costSaved > 0 ? '<$0.01' : '$'+costSaved.toFixed(2);
drawMiniRing('sv-ring', pct, 'var(--purple)');
document.getElementById('sv-pricing-note').textContent =
'Cost estimate based on '+pricingModel+' pricing (input $'+inputPrice+'/1M, output $'+outputPrice+'/1M). Configure via pricing.model, pricing.input, pricing.output in ~/.cce/config.yaml or .context-engine.yaml.';

// Big donut
if (baseline > 0) {
Expand Down
12 changes: 12 additions & 0 deletions src/context_engine/dashboard/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,12 +331,24 @@ async def get_savings() -> dict:
baseline = full_file if full_file > 0 else raw
saved = max(0, baseline - served)
pct = int(saved / baseline * 100) if baseline > 0 else 0

from context_engine.pricing import resolve_pricing, list_available_models

label, model_pricing = resolve_pricing(config, fetch_live=False)
input_cost = saved * model_pricing["input"] / 1_000_000
cost_saved = input_cost
Comment thread
rajkumarsakthivel marked this conversation as resolved.

return {
"queries": stats.get("queries", 0),
"baseline_tokens": baseline,
"served_tokens": served,
"tokens_saved": saved,
"savings_pct": pct,
"pricing_model": label,
"input_price_per_m": model_pricing["input"],
"output_price_per_m": model_pricing["output"],
"cost_saved": round(cost_saved, 2),
"available_models": list_available_models(),
}

# ── action routes ──────────────────────────────────────────────────────
Expand Down
120 changes: 103 additions & 17 deletions src/context_engine/pricing.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
"""Dynamic model pricing — fetched from Anthropic docs, cached locally."""
"""Model pricing for savings estimates.

Anthropic pricing is fetched from docs and cached. Other providers use
static fallbacks that are updated with releases.
"""
from __future__ import annotations

import json
import re
import time
from pathlib import Path
from typing import TypedDict
from typing import TYPE_CHECKING, TypedDict

if TYPE_CHECKING:
from context_engine.config import Config

_CCE_HOME = Path.home() / ".cce"
_CACHE_PATH = _CCE_HOME / "pricing_cache.json"
Expand All @@ -16,20 +25,39 @@ class ModelPricing(TypedDict):
output: float # $/1M output tokens


# Used only when fetch fails and no cache exists
_FALLBACK: dict[str, ModelPricing] = {
# Anthropic fallback (used when fetch fails and no cache exists)
_ANTHROPIC_FALLBACK: dict[str, ModelPricing] = {
"opus": {"input": 15.0, "output": 75.0},
"sonnet": {"input": 3.0, "output": 15.0},
"haiku": {"input": 0.80, "output": 4.0},
}

# Flat input-only fallback kept for backward compat with existing cache files
_FALLBACK_INPUT: dict[str, float] = {
"opus": 15.0,
"sonnet": 3.0,
"haiku": 0.80,
# Static pricing for non-Anthropic models. Updated with releases.
# Keys are lowercase, matched against config pricing.model.
_STATIC_PRICING: dict[str, ModelPricing] = {
# OpenAI
"gpt-4o": {"input": 2.50, "output": 10.0},
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
"gpt-4.1": {"input": 2.0, "output": 8.0},
"gpt-4.1-mini": {"input": 0.40, "output": 1.60},
"gpt-4.1-nano": {"input": 0.10, "output": 0.40},
"o3": {"input": 2.0, "output": 8.0},
"o3-mini": {"input": 1.10, "output": 4.40},
"o4-mini": {"input": 1.10, "output": 4.40},
"codex-mini": {"input": 1.50, "output": 6.0},
# Google
"gemini-2.5-pro": {"input": 1.25, "output": 10.0},
"gemini-2.5-flash": {"input": 0.15, "output": 0.60},
"gemini-2.0-flash": {"input": 0.10, "output": 0.40},
# Anthropic (duplicated here so static lookup works without fetching)
"opus": {"input": 15.0, "output": 75.0},
"sonnet": {"input": 3.0, "output": 15.0},
"haiku": {"input": 0.80, "output": 4.0},
}

# Backward compat alias
_FALLBACK = _ANTHROPIC_FALLBACK


def _parse_html(html: str) -> dict[str, ModelPricing] | None:
"""Parse per-family input + output pricing from Anthropic docs HTML table."""
Expand Down Expand Up @@ -136,13 +164,71 @@ def _save_cache(pricing: dict[str, ModelPricing]) -> None:
pass


def get_model_pricing() -> dict[str, ModelPricing]:
"""Return {family: {input, output}} pricing per 1M tokens. Cached 7 days."""
def get_model_pricing(*, fetch_live: bool = True) -> dict[str, ModelPricing]:
"""Return {model: {input, output}} pricing per 1M tokens.

Merges static pricing for all providers with live Anthropic pricing
(fetched from docs, cached 7 days). Live data wins for Anthropic models.

When *fetch_live* is False, only static + cached pricing is used (no
network request). Use this on hot paths like dashboard requests.
"""
result = dict(_STATIC_PRICING)
cached = _load_cache()
if cached:
result.update(cached)
return result
if fetch_live:
fetched = _fetch()
if fetched:
_save_cache(fetched)
result.update(fetched)
return result
return result


def list_available_models() -> list[str]:
"""Return sorted list of all model keys with known pricing.

Uses static pricing plus any cached live pricing without triggering
a network fetch, so this is safe for dashboard/CLI hot paths.
"""
result = dict(_STATIC_PRICING)
cached = _load_cache()
if cached:
return cached
fetched = _fetch()
if fetched:
_save_cache(fetched)
return fetched
return dict(_FALLBACK)
result.update(cached)
return sorted(result.keys())


def resolve_pricing(
config: Config, *, fetch_live: bool = True
) -> tuple[str, ModelPricing]:
"""Return (model_label, {input, output}) respecting config overrides.

Priority:
1. Explicit pricing.input / pricing.output in config (full override)
2. Lookup by pricing.model in the merged pricing table
3. Fallback to Opus

When *fetch_live* is False, skip network fetch (use static + cache only).
"""
model = config.pricing_model.strip().lower()
all_pricing = get_model_pricing(fetch_live=fetch_live)
opus_default: ModelPricing = {"input": 15.0, "output": 75.0}
default = all_pricing.get("opus", opus_default)
base = all_pricing.get(model, default)

resolved: ModelPricing = {
"input": config.pricing_input if config.pricing_input is not None else base["input"],
"output": config.pricing_output if config.pricing_output is not None else base["output"],
}

# Label reflects whether user overrode rates or fell back
if config.pricing_input is not None or config.pricing_output is not None:
label = f"{model} (custom)"
elif model not in all_pricing:
label = f"opus (fallback from {model})"
else:
label = model

return label, resolved
13 changes: 11 additions & 2 deletions tests/dashboard/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,17 +153,24 @@ def test_sessions_returns_persisted(tmp_path):
assert len(data[0]["decisions"]) == 1


def test_savings_no_data(tmp_path):
@patch("context_engine.pricing._fetch", return_value=None)
@patch("context_engine.pricing._load_cache", return_value=None)
def test_savings_no_data(mock_cache, mock_fetch, tmp_path):
client, _ = _make_client(tmp_path)
r = client.get("/api/savings")
assert r.status_code == 200
data = r.json()
assert data["queries"] == 0
assert data["tokens_saved"] == 0
assert data["savings_pct"] == 0
assert "pricing_model" in data
assert "available_models" in data
assert isinstance(data["available_models"], list)
Comment thread
rajkumarsakthivel marked this conversation as resolved.


def test_savings_with_data(tmp_path):
@patch("context_engine.pricing._fetch", return_value=None)
@patch("context_engine.pricing._load_cache", return_value=None)
def test_savings_with_data(mock_cache, mock_fetch, tmp_path):
client, storage_base = _make_client(tmp_path)
stats = {"queries": 38, "full_file_tokens": 48000, "served_tokens": 14200, "raw_tokens": 14200}
(storage_base / "stats.json").write_text(json.dumps(stats))
Expand All @@ -174,6 +181,8 @@ def test_savings_with_data(tmp_path):
assert data["baseline_tokens"] == 48000
assert data["tokens_saved"] == 33800
assert data["savings_pct"] == 70
assert data["pricing_model"] == "opus"
assert data["cost_saved"] == round(33800 * 15.0 / 1_000_000, 2)
Comment thread
rajkumarsakthivel marked this conversation as resolved.


def test_export_returns_combined(tmp_path):
Expand Down
12 changes: 9 additions & 3 deletions tests/test_cli_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,14 +256,20 @@ def test_pricing_fetch_and_fallback():


def test_pricing_fallback_on_network_error():
"""When fetch fails, fallback pricing is returned."""
from context_engine.pricing import get_model_pricing, _FALLBACK, _CACHE_PATH
"""When fetch fails, static pricing for all providers is returned."""
from context_engine.pricing import get_model_pricing, _STATIC_PRICING, _CACHE_PATH
# Clear cache so it tries to fetch
if _CACHE_PATH.exists():
_CACHE_PATH.unlink()
with patch("context_engine.pricing._fetch", return_value=None):
pricing = get_model_pricing()
assert pricing == _FALLBACK
assert pricing == _STATIC_PRICING
# Anthropic models present
assert "opus" in pricing
assert "sonnet" in pricing
# Non-Anthropic models present
assert "gpt-4o" in pricing
assert "gemini-2.5-pro" in pricing


def test_pricing_shown_in_savings_output(runner, storage):
Expand Down
Loading