From c133a509878f1e3b3787c45190ac21fb4f2f8659 Mon Sep 17 00:00:00 2001 From: Martin Marinov Date: Fri, 12 Jun 2026 10:51:02 +0300 Subject: [PATCH] fix(site): raise the gateway-proxy rate limit so encrypted chat works Regression from the rate-limiting in #111. One encrypted inference through /api/gw/* is not one request: it is ~5 setup calls (auth challenge + verify, session select, prepare, blob upload) PLUS a relay-token poll that fires up to 30 times (once a second until the worker is ready), and the playground retries up to 3 workers. The 30/min per-IP cap 429'd the token poll whenever the worker took more than ~25s (the common case - a full run is ~50s), so the relay token never resolved and the answer never streamed: the 'starts the session, returns nothing' chat bug. A direct-gateway SDK call (no proxy, no cap) returns a worker verdict in ~50s, which isolates the cap as the cause. Raise /api/gw/ to 600/min per IP (the upstream gateway has its own limits; this only stops gross abuse of the open proxy). DAO/operator-preview keep 30/min. --- lib/rate-limit.ts | 8 +++++++- tests/unit/rate-limit.test.ts | 7 +++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/lib/rate-limit.ts b/lib/rate-limit.ts index bd4e1c5..4e5cae3 100644 --- a/lib/rate-limit.ts +++ b/lib/rate-limit.ts @@ -15,7 +15,13 @@ export interface RateLimitRule { /** Expensive routes fan out to chains/subgraphs or relay upstream; everything * else is a cheap cached read. Matched by path prefix, first hit wins. */ export const API_RULES: { prefix: string; rule: RateLimitRule }[] = [ - { prefix: "/api/gw/", rule: { limit: 30, windowMs: 60_000 } }, + // The gateway proxy is the encrypted-inference HOT PATH: ONE inference makes + // ~5 setup calls plus a relay-token poll of up to 30 requests (1/sec until the + // worker is ready), and the playground retries up to 3 workers. A low cap here + // 429s the token poll on a slow worker and the answer never streams back (the + // "nothing returned" bug). Give it real headroom; the upstream gateway has its + // own limits, this only stops gross abuse of the open proxy. + { prefix: "/api/gw/", rule: { limit: 600, windowMs: 60_000 } }, { prefix: "/api/dao-", rule: { limit: 30, windowMs: 60_000 } }, { prefix: "/api/operator-preview", rule: { limit: 30, windowMs: 60_000 } }, { prefix: "/api/sdk-demo", rule: { limit: 20, windowMs: 60_000 } }, diff --git a/tests/unit/rate-limit.test.ts b/tests/unit/rate-limit.test.ts index 901787f..b7248dc 100644 --- a/tests/unit/rate-limit.test.ts +++ b/tests/unit/rate-limit.test.ts @@ -4,11 +4,14 @@ import { consume, ruleFor, resetRateLimiter, API_RULES } from "@/lib/rate-limit" const RULE = { limit: 3, windowMs: 60_000 }; describe("ruleFor", () => { - it("gives the gateway proxy, DAO scans, and operator preview the tight budget", () => { - for (const p of ["/api/gw/mainnet/api/models", "/api/dao-proposals", "/api/operator-preview"]) { + it("gives DAO scans and operator preview the tight budget", () => { + for (const p of ["/api/dao-proposals", "/api/operator-preview"]) { expect(ruleFor(p)?.limit).toBe(30); } }); + it("gives the gateway proxy real headroom (one inference is ~35 calls incl. the token poll)", () => { + expect(ruleFor("/api/gw/mainnet/api/models")?.limit).toBe(600); + }); it("covers every other api route with the default budget and skips pages", () => { expect(ruleFor("/api/network")?.limit).toBe(120); expect(ruleFor("/build/dao")).toBeNull();