From c133a509878f1e3b3787c45190ac21fb4f2f8659 Mon Sep 17 00:00:00 2001
From: Martin Marinov <martinbmarinov@gmail.com>
Date: Fri, 12 Jun 2026 10:51:02 +0300
Subject: [PATCH] fix(site): raise the gateway-proxy rate limit so encrypted
 chat works

Regression from the rate-limiting in #111. One encrypted inference through
/api/gw/* is not one request: it is ~5 setup calls (auth challenge + verify,
session select, prepare, blob upload) PLUS a relay-token poll that fires up to
30 times (once a second until the worker is ready), and the playground retries
up to 3 workers. The 30/min per-IP cap 429'd the token poll whenever the worker
took more than ~25s (the common case - a full run is ~50s), so the relay token
never resolved and the answer never streamed: the 'starts the session, returns
nothing' chat bug. A direct-gateway SDK call (no proxy, no cap) returns a worker
verdict in ~50s, which isolates the cap as the cause.

Raise /api/gw/ to 600/min per IP (the upstream gateway has its own limits; this
only stops gross abuse of the open proxy). DAO/operator-preview keep 30/min.
---
 lib/rate-limit.ts             | 8 +++++++-
 tests/unit/rate-limit.test.ts | 7 +++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/lib/rate-limit.ts b/lib/rate-limit.ts
index bd4e1c5..4e5cae3 100644
--- a/lib/rate-limit.ts
+++ b/lib/rate-limit.ts
@@ -15,7 +15,13 @@ export interface RateLimitRule {
 /** Expensive routes fan out to chains/subgraphs or relay upstream; everything
  *  else is a cheap cached read. Matched by path prefix, first hit wins. */
 export const API_RULES: { prefix: string; rule: RateLimitRule }[] = [
-  { prefix: "/api/gw/", rule: { limit: 30, windowMs: 60_000 } },
+  // The gateway proxy is the encrypted-inference HOT PATH: ONE inference makes
+  // ~5 setup calls plus a relay-token poll of up to 30 requests (1/sec until the
+  // worker is ready), and the playground retries up to 3 workers. A low cap here
+  // 429s the token poll on a slow worker and the answer never streams back (the
+  // "nothing returned" bug). Give it real headroom; the upstream gateway has its
+  // own limits, this only stops gross abuse of the open proxy.
+  { prefix: "/api/gw/", rule: { limit: 600, windowMs: 60_000 } },
   { prefix: "/api/dao-", rule: { limit: 30, windowMs: 60_000 } },
   { prefix: "/api/operator-preview", rule: { limit: 30, windowMs: 60_000 } },
   { prefix: "/api/sdk-demo", rule: { limit: 20, windowMs: 60_000 } },
diff --git a/tests/unit/rate-limit.test.ts b/tests/unit/rate-limit.test.ts
index 901787f..b7248dc 100644
--- a/tests/unit/rate-limit.test.ts
+++ b/tests/unit/rate-limit.test.ts
@@ -4,11 +4,14 @@ import { consume, ruleFor, resetRateLimiter, API_RULES } from "@/lib/rate-limit"
 const RULE = { limit: 3, windowMs: 60_000 };
 
 describe("ruleFor", () => {
-  it("gives the gateway proxy, DAO scans, and operator preview the tight budget", () => {
-    for (const p of ["/api/gw/mainnet/api/models", "/api/dao-proposals", "/api/operator-preview"]) {
+  it("gives DAO scans and operator preview the tight budget", () => {
+    for (const p of ["/api/dao-proposals", "/api/operator-preview"]) {
       expect(ruleFor(p)?.limit).toBe(30);
     }
   });
+  it("gives the gateway proxy real headroom (one inference is ~35 calls incl. the token poll)", () => {
+    expect(ruleFor("/api/gw/mainnet/api/models")?.limit).toBe(600);
+  });
   it("covers every other api route with the default budget and skips pages", () => {
     expect(ruleFor("/api/network")?.limit).toBe(120);
     expect(ruleFor("/build/dao")).toBeNull();