diff --git a/proxy.py b/proxy.py index 72aa099..8f6cb81 100644 --- a/proxy.py +++ b/proxy.py @@ -666,6 +666,18 @@ def transform_messages_body(raw: bytes): model_before = body.get("model") route = {} + # 1M context window: Claude Code appends a "[1m]" suffix to a model id to ask + # the client for the 1M window (it also sends the context-1m beta header; see + # the launchers' UC_FORCE_1M / [1m] handling). That suffix is a client-side + # convention, not an Anthropic model id, so it must not reach routing (it would + # not match a configured route or an orchestrator/worker pick) or the upstream. + # Strip it up front so "[1m]" behaves exactly like "" everywhere below; + # the 1M window is unaffected because it rides the beta header, left untouched. + if isinstance(model_before, str) and model_before.endswith("[1m]"): + model_before = model_before[:-len("[1m]")] + body["model"] = model_before + changed = True + # Orchestrator/Worker: classify tier and remap the model id to the selected # orchestrator (heavy) or worker (fast) model. This also captures the dynamic # workflow's stock-model background traffic so it follows your pick. diff --git a/test_proxy.py b/test_proxy.py index 3df40b0..702f608 100755 --- a/test_proxy.py +++ b/test_proxy.py @@ -263,6 +263,21 @@ def main(): assert up._expand_env("Bearer ${MOCK_KEY}") == "Bearer secret123" print("[ok] ${ENV} expansion in route auth") + # PR #8 companion: Claude Code's [1m] context-window suffix on the model id + # is stripped before routing, so "[1m]" still matches ""'s route + # (the 1M window itself rides the context-1m beta header, not the id). A + # naive exact-match lookup would otherwise miss the route once the launcher + # appends [1m] to a 1M-capable Claude pick. + _saved_slots = up.UC_SLOT_MAP + up.UC_SLOT_MAP = {"claude-big": {"type": "openai_compat", "model": "big-real", + "upstream": mock + "/v1", "auth": "Bearer ${MOCK_KEY}"}} + out_1m, _ = up.transform_messages_body(json.dumps({ + "model": "claude-big[1m]", "max_tokens": 8, + "messages": [{"role": "user", "content": "hi"}]}).encode()) + assert json.loads(out_1m)["model"] == "big-real", json.loads(out_1m)["model"] + up.UC_SLOT_MAP = _saved_slots + print("[ok] 1M [1m] window suffix stripped before routing") + # Stock Claude models: the built-in fallback so real Claude stays in # /model even with no upstream list. Toggle + override are honored, and # every advertised id obeys Claude Code's /^(claude|anthropic)/i rule.