OnlyTerp · OnlyTerp · Jun 11, 2026 · Jun 11, 2026 · devin-ai-integration · Jun 11, 2026
diff --git a/bin/ultracode b/bin/ultracode
@@ -175,6 +175,40 @@ PY
   rm -f "$SAVED_MODEL_FILE"
 }
 
+uc_status() {
+  if ! health_ok; then
+    echo "UltraCode proxy is not running on $BASE_URL" >&2
+    echo "Start it with: ultracode" >&2
+    exit 1
+  fi
+  "$PY" - "$BASE_URL" <<'PY'
+import json,sys,urllib.request
+base=sys.argv[1].rstrip('/')
+with urllib.request.urlopen(base+'/healthz',timeout=3) as r:
+    h=json.loads(r.read().decode())
+ow=h.get('orchestrator_worker') or {}
+orch=ow.get('orchestrator') or {}
+worker=ow.get('worker') or {}
+print('UltraCode proxy:', base)
+print('Orchestrator/worker routing:', 'on' if ow.get('enabled') else 'off')
+if ow.get('enabled'):
+    o=orch.get('display_name') or orch.get('id') or '(not set)'
+    w=worker.get('display_name') or worker.get('id') or '(not set)'
+    print('  Orchestrator:', o, ('('+orch.get('id')+')') if orch.get('id') else '')
+    print('  Worker:      ', w, ('('+worker.get('id')+')') if worker.get('id') else '')
+    if ow.get('worker_explicit'):
+        print('  (worker was set explicitly — plain /model picks change orchestrator only)')
+    elif ow.get('same_model'):
+        print('  (same model runs orchestrator and all workers)')
+print('Live detail: curl -s', base+'/healthz', '| python3 -m json.tool')
+PY
+}
+
+if [[ "${1:-}" == "status" ]]; then
+  uc_status
+  exit 0
+fi
+
 start_proxy() {
   mkdir -p "$REF_DIR"
   : > "$OWNER_REF"

diff --git a/docs/HOW_IT_WORKS.md b/docs/HOW_IT_WORKS.md
@@ -141,6 +141,17 @@ Selection rules:
   background traffic) never change the selection; they're **remapped** to it. That
   is what makes "use MiniMax" mean MiniMax for the whole workflow.
 
+**Seeing the active tiers.** Claude Code's UI doesn't show orchestrator vs worker
+separately. While the proxy is running:
+
+- `ultracode status` (or `.\windows\Start-UltraCode.ps1 -Status` on Windows)
+- `GET /healthz` → `orchestrator_worker`
+- `GET /uc/select` → `active`
+
+If a worker model hits a rate limit mid-task, pick **`Worker → <other>`** in
+`/model` — only the worker tier changes. Role-targeted slash commands like
+`/model worker` are not available (that's Claude Code's picker, not the proxy).
+
 The selection lives in the proxy process (one `claude` session), guarded by a
 lock, and resets when the proxy restarts. Disable tier routing with
 `UC_ORCH_WORKER=0` (then a pick routes 1:1 and stock ids pass through untouched).

diff --git a/docs/TROUBLESHOOTING.md b/docs/TROUBLESHOOTING.md
@@ -81,6 +81,50 @@ the rest of `settings.json` is left intact.
 - **Keep a pick for this session only without saving (even with the guard off):**
   press `s` in the `/model` picker instead of Enter.
 
+### Which model is orchestrator vs worker right now?
+
+Orchestrator/worker routing is sticky inside the proxy process, but Claude Code's
+UI doesn't show the two tiers separately.
+
+- **Quick status:** `ultracode status` (mac/Linux/WSL) or
+  `.\windows\Start-UltraCode.ps1 -Status` (Windows). Shows the active
+  orchestrator and worker ids + display names.
+- **JSON:** `curl -s http://127.0.0.1:8141/healthz | python3 -m json.tool` →
+  `orchestrator_worker`.
+- **Also:** `curl -s http://127.0.0.1:8141/uc/select` returns the same
+  `active` block while the proxy is running.
+
+**Changing models mid-session:**
+
+| What you pick in `/model` | What changes |
+|---------------------------|--------------|
+| A plain model (e.g. `claude-minimax-m3`) | **Both** orchestrator and worker → that model runs everything |
+| `Worker → <model>` | **Worker only** — orchestrator stays as-is |
+| Stock ids (`claude-opus-4-8`, sonnet, haiku) | **Neither tier** — they're remapped to your picks for background traffic |
+
+**Worker hit a rate limit mid-task?** Open `/model`, pick `Worker → <other model>`.
+The orchestrator tier is unchanged; only parallel workers/sub-agents switch.
+
+**`/model orchestrator` / `/model worker`?** Not available — `/model` is Claude
+Code's built-in picker; the proxy only sees the resulting model id on the next
+request. Use the plain vs `Worker →` entries above.
+
+### OpenAI-compat backend errors on long sessions (context length / 400)
+
+The proxy forwards the **entire** Anthropic transcript to `openai_compat` backends
+with no automatic trimming. On long multi-tool workflows a backend may return
+`context length exceeded`, `maximum context`, or similar 400s.
+
+- **First:** compact the session (`/compact` in Claude Code) or start a fresh
+  session and carry over only what you need.
+- **Switch worker only:** if the orchestrator is fine but workers are failing,
+  `/model` → `Worker → <model with a larger window>`.
+- **Proxy hint:** when the upstream error looks context-related, the proxy log
+  and error message include a short note explaining that the full history was sent.
+
+Strict backends also require `content: null` (not `""`) on tool-only assistant
+turns; the proxy handles that automatically.
+
 ### The pre-launch selector doesn't open / says it cannot reach `/uc/select`
 
 - **Proxy not healthy yet or wrong port.** The launcher starts the proxy before

diff --git a/proxy.py b/proxy.py
@@ -64,6 +64,10 @@
   UC_MODEL_MAP       optional JSON, e.g. {"claude-opus-4-8":"my-model"}
   UC_LOG             optional log file path (default stderr)
   UC_VERBOSE         default 0
+  UC_BROWSER_UA      User-Agent for openai_compat upstreams (default: modern
+                     Chrome UA). Fixes CF 403 "browser_signature_banned" on
+                     providers like crof.ai. Override with env or per-route
+                     "headers".
 
 ROUTE SHAPE (config.json "routes" object)
 -----------------------------------------
@@ -146,6 +150,16 @@
 DIRECTIVES = {"planner": None, "strip": True}   # filled from config in main()
 _ROUTE_ALIASES = {}                              # normalized token -> concrete route id
 
+# BROWSER_UA: browser UA for openai_compat (and classifier) calls.
+# CF-protected providers (e.g. crof.ai) ban Python-urllib (error 1010
+# "browser_signature_banned"). Matches droid/factory clients.
+# Override: UC_BROWSER_UA=... or route "headers".
+BROWSER_UA = os.environ.get(
+    "UC_BROWSER_UA",
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
+    "(KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
+)
+
 # 1M context window: Claude Code sizes its context meter (and auto-compaction) to
 # 1M only when the model id it holds carries a "[1m]" suffix. For a real-Claude
 # passthrough route whose upstream model is 1M-capable, we ADVERTISE the picker id
@@ -186,6 +200,44 @@ def _advertise_id(model_entry):
         return mid + _ONEM_SUFFIX
     return mid
 
+
+def _display_name_for_id(mid):
+    if not mid:
+        return None
+    for m in UC_MODELS:
+        if m.get("id") == mid:
+            return m.get("display_name", mid)
+    for m in _stock_models():
+        if m.get("id") == mid:
+            return m.get("display_name", mid)
+    return mid
+
+
+def _orchestrator_worker_status():
+    with _SEL_LOCK:
+        active = dict(_ACTIVE)
+    orch = active.get("orch")
+    worker = active.get("worker")
+    return {
+        "enabled": ORCH_WORKER,
+        "orchestrator": {"id": orch, "display_name": _display_name_for_id(orch)},
+        "worker": {"id": worker, "display_name": _display_name_for_id(worker)},
+        "worker_explicit": active.get("worker_explicit", False),
+        "same_model": bool(orch and worker and orch == worker),
+    }
+
+
-
+    if any(x in low for x in ("context length", "context_length", "maximum context",
+                              "too long", "too many tokens", "length exceeded")):
-
+    if any(x in low for x in ("context length", "context_length", "maximum context",
+                              "too long", "too many tokens", "length exceeded")):
+def _context_length_hint(detail):
+    low = (detail or "").lower()
+    if any(x in low for x in ("context", "token", "maximum context",
+                              "too long", "too many tokens", "length exceeded")):
+        return (" (This backend rejected the full conversation history — the proxy "
+                "forwards the entire transcript with no trimming. Try compacting the "
+                "session, switching to a backend with a larger context window, or "
+                "starting a fresh session.)")
+    return ""
+
+
 try:
     UC_MODEL_MAP = json.loads(os.environ.get("UC_MODEL_MAP", "") or "{}")
     if not isinstance(UC_MODEL_MAP, dict):
@@ -1288,10 +1340,14 @@ def _flush_tool_replies(by_id):
                             "arguments": json.dumps(block.get("input") or {}, ensure_ascii=False),
                         },
                     })
-            entry = {"role": "assistant", "content": "\n".join(p for p in text_parts if p)}
+            text = "\n".join(p for p in text_parts if p)
+            entry = {"role": "assistant"}
             if tool_calls:
                 entry["tool_calls"] = tool_calls
+                entry["content"] = text if text else None
                 pending_tool_ids[:] = [tc["id"] for tc in tool_calls]
+            else:
+                entry["content"] = text
             messages.append(entry)
             continue
 
@@ -1736,7 +1792,8 @@ def _classifier_complete(slot, system_prompt, user_content, timeout):
                 payload[bk] = _expand_env(bv) if isinstance(bv, str) else bv
         data = json.dumps(payload).encode("utf-8")
         headers = {"Content-Type": "application/json", "Accept": "application/json",
-                   "Content-Length": str(len(data))}
+                   "Content-Length": str(len(data)), "User-Agent": BROWSER_UA,
+                   "Accept-Language": "en-US,en;q=0.9"}
         auth = slot.get("auth")
         if auth and auth != "passthrough":
             Handler._apply_auth_header(headers, auth)
@@ -1757,7 +1814,7 @@ def _classifier_complete(slot, system_prompt, user_content, timeout):
                "messages": [{"role": "user", "content": user_content}]}
     data = json.dumps(payload).encode("utf-8")
     headers = {"Content-Type": "application/json", "Content-Length": str(len(data)),
-               "anthropic-version": "2023-06-01"}
+               "anthropic-version": "2023-06-01", "User-Agent": BROWSER_UA}
     auth = slot.get("auth")
     if auth and auth != "passthrough":
         Handler._apply_auth_header(headers, auth)
@@ -1960,6 +2017,7 @@ def _maybe_health(self) -> bool:
                     "candidates": [{"id": c["id"], "cost": c.get("cost")}
                                    for c in _router_available_candidates()],
                 },
+                "orchestrator_worker": _orchestrator_worker_status(),
                 "custom_models": [{"id": _advertise_id(m), "display_name": m["display_name"]}
                                   for m in UC_MODELS],
                 "stock_models": [{"id": m["id"], "display_name": m["display_name"]}
@@ -2041,6 +2099,7 @@ def _handle_models(self) -> bool:
         fwd_headers = {k: v for k, v in self.headers.items()
                        if k.lower() not in _HOP_BY_HOP}
         fwd_headers["Accept-Encoding"] = "identity"
+        fwd_headers.setdefault("User-Agent", BROWSER_UA)
         url = UPSTREAM + self.path
         base = {"data": [], "has_more": False, "first_id": None, "last_id": None}
         try:
@@ -2126,6 +2185,7 @@ def _proxy(self, method: str):
         for hk, hv in (route.get("headers") or {}).items():
             fwd_headers[hk] = hv
         fwd_headers["Accept-Encoding"] = "identity"
+        fwd_headers.setdefault("User-Agent", BROWSER_UA)
         if body:
             fwd_headers["Content-Length"] = str(len(body))
         req = urllib.request.Request(url, data=body or None,
@@ -2183,6 +2243,8 @@ def _handle_openai_compat(self, body: bytes, route: dict):
             "Content-Type": "application/json",
             "Accept": "text/event-stream" if want_stream else "application/json",
             "Content-Length": str(len(payload)),
+            "User-Agent": BROWSER_UA,
+            "Accept-Language": "en-US,en;q=0.9",
         }
         auth_override = route.get("auth")
         if auth_override and auth_override != "passthrough":
@@ -2204,9 +2266,11 @@ def _mk_events():
                     detail = e.read().decode("utf-8", "replace")[:800]
                 except Exception:
                     pass
+                hint = _context_length_hint(detail)
                 log("openai_compat upstream HTTP %s for %s: %s" % (e.code, url, detail))
                 yield {"type": "error", "status": e.code,
-                       "message": "openai_compat upstream %s: %s" % (e.code, detail)}
+                       "message": "openai_compat upstream %s: %s%s"
+                       % (e.code, detail, hint)}
                 return
             except Exception as e:
                 log("openai_compat upstream error %s for %s" % (e, url))

diff --git a/test_proxy.py b/test_proxy.py
@@ -492,6 +492,20 @@ def _pin(text):
             _saved[0], _saved[1], _saved[2], _saved[3])
         print("[ok] routing directives: opt-in default-off / NL opt-in / surgical strip / planner-gated / gpt-collision / dispatch / [1m] strip + advertise")
 
+        # issue #14: tool-only assistant turns must use content=null (not "") for
+        # strict OpenAI-compat backends on long multi-tool transcripts.
+        oai_tool_only = up.anthropic_to_openai({"model": "x", "messages": [
+            {"role": "assistant", "content": [{"type": "tool_use", "id": "call_1",
+                                                "name": "Bash", "input": {}}]},
+            {"role": "user", "content": [{"type": "tool_result", "tool_use_id": "call_1",
+                                           "content": "ok"}]},
+        ]})
+        assert oai_tool_only["messages"][0]["content"] is None
+        assert oai_tool_only["messages"][0]["tool_calls"]
+        assert up._context_length_hint("context length exceeded") != ""
+        assert up._context_length_hint("unrelated error") == ""
+        print("[ok] openai_compat long-context hygiene: tool-only content=null + context hint")
+
         # issue #3: a rejected tool call (with or without a comment) must not leave
         # an assistant tool_calls message unanswered, and tool replies must come
         # BEFORE the user's text — otherwise strict backends (DeepSeek) 400 with

diff --git a/windows/Start-UltraCode.ps1 b/windows/Start-UltraCode.ps1
@@ -26,6 +26,7 @@
 #>
 param(
     [switch]$ProxyOnly,
+    [switch]$Status,
     [int]$Port = 0,
     [string]$Upstream = ""
 )
@@ -216,6 +217,30 @@ function Stop-ProxyIfLast {
     }
 }
 
+if ($Status) {
+    if (-not (Test-ProxyHealthy)) {
+        Write-Error "UltraCode proxy is not running on $BaseUrl. Start it with: .\windows\Start-UltraCode.ps1"
+        exit 1
+    }
+    $health = Invoke-RestMethod -Uri "$BaseUrl/healthz" -TimeoutSec 3
+    $ow = $health.orchestrator_worker
+    Write-Host "UltraCode proxy: $BaseUrl"
+    if ($ow.enabled) {
+        Write-Host "Orchestrator/worker routing: on"
+        Write-Host ("  Orchestrator: {0} ({1})" -f $ow.orchestrator.display_name, $ow.orchestrator.id)
+        Write-Host ("  Worker:       {0} ({1})" -f $ow.worker.display_name, $ow.worker.id)
+        if ($ow.worker_explicit) {
+            Write-Host "  (worker set explicitly — plain /model picks change orchestrator only)"
+        } elseif ($ow.same_model) {
+            Write-Host "  (same model runs orchestrator and all workers)"
+        }
+    } else {
+        Write-Host "Orchestrator/worker routing: off"
+    }
+    Write-Host "Live detail: curl -s $BaseUrl/healthz | python -m json.tool"
+    exit 0
+}
+
 New-Item -ItemType Directory -Force -Path $RefDir | Out-Null
 Save-GlobalModel
 New-Item -ItemType File -Force -Path $OwnerRef | Out-Null