diff --git a/bin/ultracode b/bin/ultracode index 9d5d430..4f60fa1 100755 --- a/bin/ultracode +++ b/bin/ultracode @@ -175,6 +175,40 @@ PY rm -f "$SAVED_MODEL_FILE" } +uc_status() { + if ! health_ok; then + echo "UltraCode proxy is not running on $BASE_URL" >&2 + echo "Start it with: ultracode" >&2 + exit 1 + fi + "$PY" - "$BASE_URL" <<'PY' +import json,sys,urllib.request +base=sys.argv[1].rstrip('/') +with urllib.request.urlopen(base+'/healthz',timeout=3) as r: + h=json.loads(r.read().decode()) +ow=h.get('orchestrator_worker') or {} +orch=ow.get('orchestrator') or {} +worker=ow.get('worker') or {} +print('UltraCode proxy:', base) +print('Orchestrator/worker routing:', 'on' if ow.get('enabled') else 'off') +if ow.get('enabled'): + o=orch.get('display_name') or orch.get('id') or '(not set)' + w=worker.get('display_name') or worker.get('id') or '(not set)' + print(' Orchestrator:', o, ('('+orch.get('id')+')') if orch.get('id') else '') + print(' Worker: ', w, ('('+worker.get('id')+')') if worker.get('id') else '') + if ow.get('worker_explicit'): + print(' (worker was set explicitly — plain /model picks change orchestrator only)') + elif ow.get('same_model'): + print(' (same model runs orchestrator and all workers)') +print('Live detail: curl -s', base+'/healthz', '| python3 -m json.tool') +PY +} + +if [[ "${1:-}" == "status" ]]; then + uc_status + exit 0 +fi + start_proxy() { mkdir -p "$REF_DIR" : > "$OWNER_REF" diff --git a/docs/HOW_IT_WORKS.md b/docs/HOW_IT_WORKS.md index 2d70eb9..0392ab8 100644 --- a/docs/HOW_IT_WORKS.md +++ b/docs/HOW_IT_WORKS.md @@ -141,6 +141,17 @@ Selection rules: background traffic) never change the selection; they're **remapped** to it. That is what makes "use MiniMax" mean MiniMax for the whole workflow. +**Seeing the active tiers.** Claude Code's UI doesn't show orchestrator vs worker +separately. While the proxy is running: + +- `ultracode status` (or `.\windows\Start-UltraCode.ps1 -Status` on Windows) +- `GET /healthz` → `orchestrator_worker` +- `GET /uc/select` → `active` + +If a worker model hits a rate limit mid-task, pick **`Worker → `** in +`/model` — only the worker tier changes. Role-targeted slash commands like +`/model worker` are not available (that's Claude Code's picker, not the proxy). + The selection lives in the proxy process (one `claude` session), guarded by a lock, and resets when the proxy restarts. Disable tier routing with `UC_ORCH_WORKER=0` (then a pick routes 1:1 and stock ids pass through untouched). diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md index 56948b9..3654d3f 100644 --- a/docs/TROUBLESHOOTING.md +++ b/docs/TROUBLESHOOTING.md @@ -81,6 +81,50 @@ the rest of `settings.json` is left intact. - **Keep a pick for this session only without saving (even with the guard off):** press `s` in the `/model` picker instead of Enter. +### Which model is orchestrator vs worker right now? + +Orchestrator/worker routing is sticky inside the proxy process, but Claude Code's +UI doesn't show the two tiers separately. + +- **Quick status:** `ultracode status` (mac/Linux/WSL) or + `.\windows\Start-UltraCode.ps1 -Status` (Windows). Shows the active + orchestrator and worker ids + display names. +- **JSON:** `curl -s http://127.0.0.1:8141/healthz | python3 -m json.tool` → + `orchestrator_worker`. +- **Also:** `curl -s http://127.0.0.1:8141/uc/select` returns the same + `active` block while the proxy is running. + +**Changing models mid-session:** + +| What you pick in `/model` | What changes | +|---------------------------|--------------| +| A plain model (e.g. `claude-minimax-m3`) | **Both** orchestrator and worker → that model runs everything | +| `Worker → ` | **Worker only** — orchestrator stays as-is | +| Stock ids (`claude-opus-4-8`, sonnet, haiku) | **Neither tier** — they're remapped to your picks for background traffic | + +**Worker hit a rate limit mid-task?** Open `/model`, pick `Worker → `. +The orchestrator tier is unchanged; only parallel workers/sub-agents switch. + +**`/model orchestrator` / `/model worker`?** Not available — `/model` is Claude +Code's built-in picker; the proxy only sees the resulting model id on the next +request. Use the plain vs `Worker →` entries above. + +### OpenAI-compat backend errors on long sessions (context length / 400) + +The proxy forwards the **entire** Anthropic transcript to `openai_compat` backends +with no automatic trimming. On long multi-tool workflows a backend may return +`context length exceeded`, `maximum context`, or similar 400s. + +- **First:** compact the session (`/compact` in Claude Code) or start a fresh + session and carry over only what you need. +- **Switch worker only:** if the orchestrator is fine but workers are failing, + `/model` → `Worker → `. +- **Proxy hint:** when the upstream error looks context-related, the proxy log + and error message include a short note explaining that the full history was sent. + +Strict backends also require `content: null` (not `""`) on tool-only assistant +turns; the proxy handles that automatically. + ### The pre-launch selector doesn't open / says it cannot reach `/uc/select` - **Proxy not healthy yet or wrong port.** The launcher starts the proxy before diff --git a/proxy.py b/proxy.py index 0e7fa9a..977b42f 100644 --- a/proxy.py +++ b/proxy.py @@ -64,6 +64,10 @@ UC_MODEL_MAP optional JSON, e.g. {"claude-opus-4-8":"my-model"} UC_LOG optional log file path (default stderr) UC_VERBOSE default 0 + UC_BROWSER_UA User-Agent for openai_compat upstreams (default: modern + Chrome UA). Fixes CF 403 "browser_signature_banned" on + providers like crof.ai. Override with env or per-route + "headers". ROUTE SHAPE (config.json "routes" object) ----------------------------------------- @@ -146,6 +150,16 @@ DIRECTIVES = {"planner": None, "strip": True} # filled from config in main() _ROUTE_ALIASES = {} # normalized token -> concrete route id +# BROWSER_UA: browser UA for openai_compat (and classifier) calls. +# CF-protected providers (e.g. crof.ai) ban Python-urllib (error 1010 +# "browser_signature_banned"). Matches droid/factory clients. +# Override: UC_BROWSER_UA=... or route "headers". +BROWSER_UA = os.environ.get( + "UC_BROWSER_UA", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36", +) + # 1M context window: Claude Code sizes its context meter (and auto-compaction) to # 1M only when the model id it holds carries a "[1m]" suffix. For a real-Claude # passthrough route whose upstream model is 1M-capable, we ADVERTISE the picker id @@ -186,6 +200,44 @@ def _advertise_id(model_entry): return mid + _ONEM_SUFFIX return mid + +def _display_name_for_id(mid): + if not mid: + return None + for m in UC_MODELS: + if m.get("id") == mid: + return m.get("display_name", mid) + for m in _stock_models(): + if m.get("id") == mid: + return m.get("display_name", mid) + return mid + + +def _orchestrator_worker_status(): + with _SEL_LOCK: + active = dict(_ACTIVE) + orch = active.get("orch") + worker = active.get("worker") + return { + "enabled": ORCH_WORKER, + "orchestrator": {"id": orch, "display_name": _display_name_for_id(orch)}, + "worker": {"id": worker, "display_name": _display_name_for_id(worker)}, + "worker_explicit": active.get("worker_explicit", False), + "same_model": bool(orch and worker and orch == worker), + } + + +def _context_length_hint(detail): + low = (detail or "").lower() + if any(x in low for x in ("context", "token", "maximum context", + "too long", "too many tokens", "length exceeded")): + return (" (This backend rejected the full conversation history — the proxy " + "forwards the entire transcript with no trimming. Try compacting the " + "session, switching to a backend with a larger context window, or " + "starting a fresh session.)") + return "" + + try: UC_MODEL_MAP = json.loads(os.environ.get("UC_MODEL_MAP", "") or "{}") if not isinstance(UC_MODEL_MAP, dict): @@ -1288,10 +1340,14 @@ def _flush_tool_replies(by_id): "arguments": json.dumps(block.get("input") or {}, ensure_ascii=False), }, }) - entry = {"role": "assistant", "content": "\n".join(p for p in text_parts if p)} + text = "\n".join(p for p in text_parts if p) + entry = {"role": "assistant"} if tool_calls: entry["tool_calls"] = tool_calls + entry["content"] = text if text else None pending_tool_ids[:] = [tc["id"] for tc in tool_calls] + else: + entry["content"] = text messages.append(entry) continue @@ -1736,7 +1792,8 @@ def _classifier_complete(slot, system_prompt, user_content, timeout): payload[bk] = _expand_env(bv) if isinstance(bv, str) else bv data = json.dumps(payload).encode("utf-8") headers = {"Content-Type": "application/json", "Accept": "application/json", - "Content-Length": str(len(data))} + "Content-Length": str(len(data)), "User-Agent": BROWSER_UA, + "Accept-Language": "en-US,en;q=0.9"} auth = slot.get("auth") if auth and auth != "passthrough": Handler._apply_auth_header(headers, auth) @@ -1757,7 +1814,7 @@ def _classifier_complete(slot, system_prompt, user_content, timeout): "messages": [{"role": "user", "content": user_content}]} data = json.dumps(payload).encode("utf-8") headers = {"Content-Type": "application/json", "Content-Length": str(len(data)), - "anthropic-version": "2023-06-01"} + "anthropic-version": "2023-06-01", "User-Agent": BROWSER_UA} auth = slot.get("auth") if auth and auth != "passthrough": Handler._apply_auth_header(headers, auth) @@ -1960,6 +2017,7 @@ def _maybe_health(self) -> bool: "candidates": [{"id": c["id"], "cost": c.get("cost")} for c in _router_available_candidates()], }, + "orchestrator_worker": _orchestrator_worker_status(), "custom_models": [{"id": _advertise_id(m), "display_name": m["display_name"]} for m in UC_MODELS], "stock_models": [{"id": m["id"], "display_name": m["display_name"]} @@ -2041,6 +2099,7 @@ def _handle_models(self) -> bool: fwd_headers = {k: v for k, v in self.headers.items() if k.lower() not in _HOP_BY_HOP} fwd_headers["Accept-Encoding"] = "identity" + fwd_headers.setdefault("User-Agent", BROWSER_UA) url = UPSTREAM + self.path base = {"data": [], "has_more": False, "first_id": None, "last_id": None} try: @@ -2126,6 +2185,7 @@ def _proxy(self, method: str): for hk, hv in (route.get("headers") or {}).items(): fwd_headers[hk] = hv fwd_headers["Accept-Encoding"] = "identity" + fwd_headers.setdefault("User-Agent", BROWSER_UA) if body: fwd_headers["Content-Length"] = str(len(body)) req = urllib.request.Request(url, data=body or None, @@ -2183,6 +2243,8 @@ def _handle_openai_compat(self, body: bytes, route: dict): "Content-Type": "application/json", "Accept": "text/event-stream" if want_stream else "application/json", "Content-Length": str(len(payload)), + "User-Agent": BROWSER_UA, + "Accept-Language": "en-US,en;q=0.9", } auth_override = route.get("auth") if auth_override and auth_override != "passthrough": @@ -2204,9 +2266,11 @@ def _mk_events(): detail = e.read().decode("utf-8", "replace")[:800] except Exception: pass + hint = _context_length_hint(detail) log("openai_compat upstream HTTP %s for %s: %s" % (e.code, url, detail)) yield {"type": "error", "status": e.code, - "message": "openai_compat upstream %s: %s" % (e.code, detail)} + "message": "openai_compat upstream %s: %s%s" + % (e.code, detail, hint)} return except Exception as e: log("openai_compat upstream error %s for %s" % (e, url)) diff --git a/test_proxy.py b/test_proxy.py index 79922a6..73e5442 100755 --- a/test_proxy.py +++ b/test_proxy.py @@ -492,6 +492,20 @@ def _pin(text): _saved[0], _saved[1], _saved[2], _saved[3]) print("[ok] routing directives: opt-in default-off / NL opt-in / surgical strip / planner-gated / gpt-collision / dispatch / [1m] strip + advertise") + # issue #14: tool-only assistant turns must use content=null (not "") for + # strict OpenAI-compat backends on long multi-tool transcripts. + oai_tool_only = up.anthropic_to_openai({"model": "x", "messages": [ + {"role": "assistant", "content": [{"type": "tool_use", "id": "call_1", + "name": "Bash", "input": {}}]}, + {"role": "user", "content": [{"type": "tool_result", "tool_use_id": "call_1", + "content": "ok"}]}, + ]}) + assert oai_tool_only["messages"][0]["content"] is None + assert oai_tool_only["messages"][0]["tool_calls"] + assert up._context_length_hint("context length exceeded") != "" + assert up._context_length_hint("unrelated error") == "" + print("[ok] openai_compat long-context hygiene: tool-only content=null + context hint") + # issue #3: a rejected tool call (with or without a comment) must not leave # an assistant tool_calls message unanswered, and tool replies must come # BEFORE the user's text — otherwise strict backends (DeepSeek) 400 with diff --git a/windows/Start-UltraCode.ps1 b/windows/Start-UltraCode.ps1 index cfeec2d..f27ce8b 100644 --- a/windows/Start-UltraCode.ps1 +++ b/windows/Start-UltraCode.ps1 @@ -26,6 +26,7 @@ #> param( [switch]$ProxyOnly, + [switch]$Status, [int]$Port = 0, [string]$Upstream = "" ) @@ -216,6 +217,30 @@ function Stop-ProxyIfLast { } } +if ($Status) { + if (-not (Test-ProxyHealthy)) { + Write-Error "UltraCode proxy is not running on $BaseUrl. Start it with: .\windows\Start-UltraCode.ps1" + exit 1 + } + $health = Invoke-RestMethod -Uri "$BaseUrl/healthz" -TimeoutSec 3 + $ow = $health.orchestrator_worker + Write-Host "UltraCode proxy: $BaseUrl" + if ($ow.enabled) { + Write-Host "Orchestrator/worker routing: on" + Write-Host (" Orchestrator: {0} ({1})" -f $ow.orchestrator.display_name, $ow.orchestrator.id) + Write-Host (" Worker: {0} ({1})" -f $ow.worker.display_name, $ow.worker.id) + if ($ow.worker_explicit) { + Write-Host " (worker set explicitly — plain /model picks change orchestrator only)" + } elseif ($ow.same_model) { + Write-Host " (same model runs orchestrator and all workers)" + } + } else { + Write-Host "Orchestrator/worker routing: off" + } + Write-Host "Live detail: curl -s $BaseUrl/healthz | python -m json.tool" + exit 0 +} + New-Item -ItemType Directory -Force -Path $RefDir | Out-Null Save-GlobalModel New-Item -ItemType File -Force -Path $OwnerRef | Out-Null