Simplereally · Simplereally · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026
@@ -44,7 +44,7 @@ export function LimitTester() {
     const [naturalDimensions, setNaturalDimensions] = useState<{ w: number; h: number } | null>(null)
 
     const startGeneration = useMutation(api.singleGeneration.startGeneration)
-    const dispatchGeneration = useAction(api.singleGeneration.dispatchGeneration)
+    const processGeneration = useAction(api.singleGenerationProcessor.processGeneration)
 
     // BYOP context for API key
     const apiKey = usePollenApiKey()
@@ -81,7 +81,7 @@ export function LimitTester() {
                 apiKey,
             })
             setCurrentGenId(id)
-            void Promise.resolve(dispatchGeneration({ generationId: id, apiKey })).catch((dispatchError) => {
+            void Promise.resolve(processGeneration({ generationId: id, apiKey })).catch((dispatchError) => {
                 console.error("Immediate dispatch failed; recovery path will retry:", dispatchError)
             })
         } catch (error) {

@@ -1,5 +1,6 @@
+import "@testing-library/jest-dom/vitest"
 import { render, screen, fireEvent } from "@testing-library/react"
-import { describe, it, expect, vi } from "vitest"
+import { beforeEach, describe, expect, it, vi } from "vitest"
 import { BatchActionButton } from "./batch-action-button"
 
 describe("BatchActionButton", () => {
@@ -53,6 +54,25 @@ describe("BatchActionButton", () => {
         expect(onResume).toHaveBeenCalledTimes(1)
     })
 
+    it("disables resume while paused items are still in flight", () => {
+        const onResume = vi.fn()
+        render(
+            <BatchActionButton
+                {...defaultProps}
+                isPaused={true}
+                inFlightCount={2}
+                onResume={onResume}
+            />
+        )
+
+        const resumeButton = screen.getByRole("button", { name: /resume/i })
+        expect(resumeButton).toBeDisabled()
+
+        fireEvent.click(resumeButton)
+
+        expect(onResume).not.toHaveBeenCalled()
+    })
+
     it("calls onCancel when cancel button is clicked", () => {
         const onCancel = vi.fn()
         render(<BatchActionButton {...defaultProps} onCancel={onCancel} />)

@@ -45,6 +45,7 @@ export const BatchActionButton = React.memo(function BatchActionButton({
     className,
 }: BatchActionButtonProps) {
     const progress = totalCount > 0 ? (completedCount / totalCount) * 100 : 0
+    const isResumeDisabled = isPaused && inFlightCount > 0
 
     // Show in-flight indicator when paused and items are still processing
     const showInFlightIndicator = isPaused && inFlightCount > 0
@@ -94,6 +95,7 @@ export const BatchActionButton = React.memo(function BatchActionButton({
                     {/* The actual button - positioned above progress */}
                     <Button
                         onClick={isPaused ? onResume : onPause}
+                        disabled={isResumeDisabled}
                         variant="ghost"
                         size="lg"
                         className={cn(
@@ -102,11 +104,13 @@ export const BatchActionButton = React.memo(function BatchActionButton({
                             "border border-border/40 hover:border-border/60",
                             "rounded-lg overflow-hidden",
                             "transition-all duration-300",
+                            isResumeDisabled && "cursor-not-allowed opacity-70",
                             // Text color based on state
                             isPaused 
                                 ? "text-amber-600 dark:text-amber-400" 
                                 : "text-emerald-700 dark:text-emerald-300"
                         )}
+                        title={isResumeDisabled ? "Wait for in-flight images to finish before resuming" : undefined}
                     >
                         {/* Icon with subtle animation */}
                         <span className={cn(

@@ -57,7 +57,7 @@ describe("UpgradeModal", () => {
 		expect(
 			screen.getByText("generations / day"),
 		).toBeInTheDocument();
-		expect(screen.getByText("9")).toBeInTheDocument();
+		expect(screen.getByText("10")).toBeInTheDocument();
 		expect(screen.getByText("AI models included")).toBeInTheDocument();
 		expect(screen.getByText("Daily")).toBeInTheDocument();
 	});

@@ -0,0 +1,215 @@
+# Convex Optimization Branch Review
+
+## Scope
+
+I reviewed the branch against `origin/main` with focus on the changes tied to `convex-optimiziations.md`, especially:
+
+- `convex/batchGeneration.ts`
+- `convex/batchProcessor.ts`
+- `convex/singleGeneration.ts`
+- `convex/singleGenerationProcessor.ts`
+- `hooks/queries/use-generate-image.ts`
+- `components/debug/limit-tester.tsx`
+- related tests and UI surfaces
+
+I also cross-checked Convex-specific judgments against current official guidance:
+
+- `https://docs.convex.dev/functions/actions`
+- `https://docs.convex.dev/understanding/best-practices/`
+- `https://docs.convex.dev/scheduling/scheduled-functions`
+- `@convex-dev/workpool` README
+
+I ran the relevant test files as well:
+
+```bash
+bun run test hooks/queries/use-generate-image.test.tsx hooks/queries/use-batch-generation.test.ts hooks/use-batch-mode.test.ts
+```
+
+Those tests all passed, which is important context for the testing-gap finding below.
+
+## High-level take
+
+The overall direction of the branch is good and largely aligned with current Convex guidance:
+
+- moving retry waiting out of long-running actions is directionally correct
+- adding hard fetch timeouts is good
+- removing the extra single-generation action wrapper is directionally correct
+- adaptive throttling is a reasonable next step
+
+The main problem is that the new batch state machine has a correctness bug around `pause` / `resume` under pipelined scheduling.
+
+## Findings
+
+## P1 - Resuming a paused batch can duplicate item processing
+
+- **Severity**: P1
+- **Type**: clear bug, business logic regression, UX regression
+- **Where**:
+  - `convex/batchGeneration.ts:276-335`
+  - `convex/batchGeneration.ts:700-738`
+  - `convex/batchProcessor.ts:192-225`
+  - `components/studio/batch/batch-action-button.tsx:49-50`
+  - `components/studio/batch/batch-action-button.tsx:95-125`
+
+### Why this is a bug
+
+`currentIndex` is advanced when the next item is **scheduled**, not when it is **completed**.
+
+That means a paused batch can legitimately be in this state:
+
+- item `N` already scheduled / still in flight
+- `currentIndex === N`
+- UI shows `"N images finishing..."`
+
+`resumeBatchJob` then immediately schedules `currentIndex` again:
+
+- `convex/batchGeneration.ts:731-735`
+
+So if the user resumes before the already-scheduled item drains, the same item index can be processed twice.
+
+Because each first-attempt batch item also pipelines the next item at the start of processing:
+
+- `convex/batchProcessor.ts:192-204`
+
+this can cascade into:
+
+- duplicate generated images
+- inflated `completedCount` / `failedCount`
+- premature `totalProcessed >= totalCount`
+- early deletion of the batch job record
+
+### Why the UX contributes to it
+
+The UI explicitly surfaces that items are still finishing while paused:
+
+- `showInFlightIndicator = isPaused && inFlightCount > 0`
+
+but the same button still exposes `Resume` immediately. So the UI currently invites the exact unsafe transition that corrupts the batch state.
+
+### Recommendation
+
+Resume should not schedule a new item while any prior scheduled work is still outstanding.
+
+At minimum, resume needs one of these invariants:
+
+- only resume when `inFlightCount === 0`
+- track a separate `nextUnprocessedIndex` instead of reusing `currentIndex`
+- persist scheduled item identities so resume can tell whether the next index is already queued
+
+## P2 - `inFlightCount` becomes wrong after resume
+
+- **Severity**: P2
+- **Type**: clear bug, UI/UX regression
+- **Where**:
+  - `convex/batchGeneration.ts:151-175`
+  - `convex/batchGeneration.ts:303-318`
+  - `convex/batchGeneration.ts:438-450`
+  - `convex/batchGeneration.ts:724-735`
+
+### Why this is a bug
+
+The system treats `inFlightCount` as including already-scheduled outstanding work:
+
+- initial batch starts with `inFlightCount: 1`
+- scheduling the next item increments it
+- recording a result decrements it
+
+But `resumeBatchJob` schedules work without incrementing `inFlightCount`.
+
+That means after a clean paused state with `inFlightCount = 0`, resuming undercounts by one immediately. From there the counter can hit zero even though work is still running or scheduled.
+
+### Impact
+
+- the paused UI can claim everything has drained when one item is still outstanding
+- users can think it is safe to resume / pause again when it is not
+- any future logic that relies on `inFlightCount` as a drain signal will be unreliable
+
+This also makes the P1 duplication bug easier to trigger repeatedly.
+
+### Recommendation
+
+Either:
+
+- increment `inFlightCount` when resume schedules the restart item
+
+or better:
+
+- redesign the batch state model so `inFlightCount` is derived from durable queued/running state instead of manual increments/decrements
+
+## P3 - The new batch lifecycle is not protected by regression tests
+
+- **Severity**: P3
+- **Type**: high-value testing gap
+- **Where**:
+  - `hooks/use-batch-mode.test.ts:306-442`
+  - `hooks/queries/use-batch-generation.test.ts:46-107`
+  - `hooks/queries/use-batch-generation.test.ts:141-171`
+
+### Why this matters
+
+I ran the relevant tests and they all passed, but the current coverage only checks:
+
+- basic hook return shapes
+- simple paused/processing flag mapping
+- simple progress mapping
+
+It does **not** cover the dangerous transitions introduced by the optimization work:
+
+- pause while scheduled items are still outstanding
+- resume before in-flight work drains
+- `currentIndex` / `inFlightCount` invariants under pipelining
+- duplicate scheduling protection
+
+This is why the P1/P2 bug can exist while the branch still looks green.
+
+### Recommendation
+
+Add targeted tests around the batch state machine, not just hook shape tests. The minimum valuable cases are:
+
+- paused batch with `inFlightCount > 0` cannot safely schedule the same `currentIndex` twice
+- resume preserves correct `inFlightCount`
+- total processed count cannot be incremented twice for the same item index
+
+## P4 - Old public single-generation wrapper is now stale API surface
+
+- **Severity**: P4
+- **Type**: maintainability / API hygiene
+- **Where**:
+  - `convex/singleGeneration.ts:106-134`
+  - current consumers now use `api.singleGenerationProcessor.processGeneration`
+
+### Why this matters
+
+This branch correctly moved the hot path to the public `singleGenerationProcessor.processGeneration` action, which removes the extra wrapper layer and is more in line with current Convex guidance about avoiding unnecessary `runAction` overhead.
+
+However, `singleGeneration.dispatchGeneration` is still exported as a public action even though current consumers no longer use it.
+
+That leaves two public entrypoints for the same behavior:
+
+- the new direct path
+- the old wrapper path
+
+Low severity, but it increases confusion and makes it easy for a future caller to accidentally reintroduce the extra wrapper overhead.
+
+### Recommendation
+
+Remove the stale public wrapper or make the public API converge on one entrypoint only.
+
+## Non-findings worth calling out
+
+These branch decisions looked good after doc validation and I would keep them:
+
+- **Retry rescheduling instead of sleeping in actions**
+  - aligned with the cost problem described in the spec
+- **Hard provider fetch timeout**
+  - good protection against long-tail provider stalls
+- **Removing the extra single-generation action wrapper from the hot path**
+  - aligned with current Convex guidance that `runAction` should generally only be used when crossing runtimes
+- **Adaptive throttling as a scheduler-level control**
+  - reasonable, and Workpool is not obviously required yet if the batch lifecycle bugs are fixed first
+
+## Recommended fix order
+
+- **First**: fix P1 and P2 together in the batch state machine
+- **Second**: add the regression tests from P3 before making further scheduler tweaks
+- **Third**: clean up the stale single-generation wrapper from P4
@@ -0,0 +1,45 @@
+Yes — there’s a clear path to reduce this **a lot** while keeping the same UX.
+
+## What’s likely driving cost
+
+Based on your code + latest Convex docs:
+
+- `singleGenerationProcessor.processGeneration` and `batchProcessor.processBatchItem` are both Node actions (`"use node"`), and Node actions are billed at **512MB** runtime memory ([limits](https://docs.convex.dev/production/state/limits.md)).
+- Both actions spend significant wall-time waiting on external work (Pollinations + retries), and in `fetchWithRetry` you currently **sleep inside the action** between retries (`convex/lib/retry.ts`).
+- `singleGeneration.dispatchGeneration` wraps `processGeneration` via `ctx.runAction(...)` (`convex/singleGeneration.ts`), which Convex docs call out as overhead-heavy unless crossing runtimes ([actions best practices](https://docs.convex.dev/functions/actions)).
+- Batch scheduling is aggressively pipelined (`100ms + jitter`) in `convex/batchGeneration.ts`, which can increase 429/5xx retry pressure and burn more GBh.
+
+## Highest-impact fixes (without UX loss)
+
+1. **Move retry backoff out of running actions**
+   - Instead of `sleep(...)` in `fetchWithRetry`, schedule the next retry attempt via `ctx.scheduler.runAfter(...)` and exit.
+   - Keeps persistence, keeps reactive UX, cuts billed “idle waiting” time.
+
+2. **Add hard fetch timeout + retry by reschedule**
+   - Abort slow provider calls (e.g. 45-90s depending image/video), mark attempt, reschedule.
+   - Prevents long-tail stuck calls from eating GBh.
+
+3. **Adaptive batch throttling**
+   - Dynamically increase inter-item delay when 429/5xx spike, decrease when healthy.
+   - Usually lowers retries and total GBh while keeping throughput stable.
+
+4. **Remove extra action wrapper overhead in single flow**
+   - Current: client -> `dispatchGeneration` action -> `runAction(processGeneration)`.
+   - Better: reduce one layer (or switch back to mutation-scheduled processor if acceptable).
+   - This is a smaller win than #1/#2 but still worthwhile.
+
+5. **Stop burning real generation compute in dev by default**
+   - Your `dev` usage is huge too. Add a dev-only mock/placeholder path unless explicitly enabled.
+
+## Convex-idiomatic guidance (2026 docs)
+
+- Mutation that records intent + schedules background work is still the canonical pattern ([scheduled functions](https://docs.convex.dev/scheduling/scheduled-functions), [actions](https://docs.convex.dev/functions/actions)).
+- Free tier has tight scheduled concurrency; if you need controlled parallelism/retries, `@convex-dev/workpool` is now a strong fit ([workpool README](https://raw.githubusercontent.com/get-convex/workpool/main/README.md)).
+
+## Recommended order for you
+
+- **Phase 1 (fast, biggest ROI):** #1 + #2 + #5  
+- **Phase 2:** #3  
+- **Phase 3:** #4 / workpool migration if needed
+
+If you want, I can implement **Phase 1 now** directly in your codebase (retry rescheduling + timeout + dev guard) and keep your current UX contract intact.
@@ -18,6 +18,7 @@ import type * as favorites from "../favorites.js";
 import type * as follows from "../follows.js";
 import type * as generatedImages from "../generatedImages.js";
 import type * as http from "../http.js";
+import type * as lib_batchGenerationState from "../lib/batchGenerationState.js";
 import type * as lib_crypto from "../lib/crypto.js";
 import type * as lib_dirtberryCrop from "../lib/dirtberryCrop.js";
 import type * as lib_groq from "../lib/groq.js";
@@ -73,6 +74,7 @@ declare const fullApi: ApiFromModules<{
   follows: typeof follows;
   generatedImages: typeof generatedImages;
   http: typeof http;
+  "lib/batchGenerationState": typeof lib_batchGenerationState;
   "lib/crypto": typeof lib_crypto;
   "lib/dirtberryCrop": typeof lib_dirtberryCrop;
   "lib/groq": typeof lib_groq;