diff --git a/apps/web/src/routers/admin-kiloclaw-instances-router.test.ts b/apps/web/src/routers/admin-kiloclaw-instances-router.test.ts index 433ab423e..a428cc851 100644 --- a/apps/web/src/routers/admin-kiloclaw-instances-router.test.ts +++ b/apps/web/src/routers/admin-kiloclaw-instances-router.test.ts @@ -3233,6 +3233,42 @@ describe('admin.kiloclawInstances.destroyOrphanVolume', () => { expect(mockDestroyOrphanVolume).not.toHaveBeenCalled(); }); + it('clears the grace gate for a long-destroyed instance stored as Postgres timestamp text', async () => { + // Regression: the grace check is evaluated in SQL, never by parsing the + // stored timestamp with the JS `Date` constructor. A row destroyed 60 + // days ago — written in Postgres native timestamp text, not ISO 8601 — + // must clear the 7-day grace gate and reach the destroy handoff. + const destroyedAt = new Date(Date.now() - 60 * 86_400_000) + .toISOString() + .replace('T', ' ') + .replace('Z', '+00'); + const [instance] = await db + .insert(kiloclaw_instances) + .values({ + id: crypto.randomUUID(), + user_id: regularUser.id, + sandbox_id: `ki_${crypto.randomUUID().replace(/-/g, '')}`, + destroyed_at: destroyedAt, + }) + .returning({ id: kiloclaw_instances.id }); + mockDestroyOrphanVolume.mockResolvedValue({ + ok: true, + flyApp: 'inst-grace', + volumeId: VOLUME_ID, + volumeName: 'kiloclaw_grace', + alreadyGone: false, + }); + const caller = await createCallerForUser(adminUser.id); + + const result = await caller.admin.kiloclawInstances.destroyOrphanVolume({ + instanceId: instance.id, + volumeId: VOLUME_ID, + }); + + expect(result).toMatchObject({ success: true }); + expect(mockDestroyOrphanVolume).toHaveBeenCalledTimes(1); + }); + it('rejects when the user has an access-granting subscription', async () => { const instanceId = await insertDestroyedInstance({ destroyedAt: daysAgo(30), diff --git a/apps/web/src/routers/admin-kiloclaw-instances-router.ts b/apps/web/src/routers/admin-kiloclaw-instances-router.ts index b25698bf7..223e07fa0 100644 --- a/apps/web/src/routers/admin-kiloclaw-instances-router.ts +++ b/apps/web/src/routers/admin-kiloclaw-instances-router.ts @@ -4121,17 +4121,22 @@ export const adminKiloclawInstancesRouter = createTRPCRouter({ sandbox_id: kiloclaw_instances.sandbox_id, organization_id: kiloclaw_instances.organization_id, destroyed_at: kiloclaw_instances.destroyed_at, - // The latest `destroyed_at` across every destroyed row of this - // (user, sandbox). A reprovisioned sandbox has several destroyed - // rows sharing one Fly volume; the grace period runs from the most - // recent destruction, not whichever row the admin selected. - latest_sandbox_destroyed_at: sql`( - select max(latest.destroyed_at) - from ${kiloclaw_instances} as latest - where latest.user_id = ${kiloclaw_instances.user_id} - and latest.sandbox_id = ${kiloclaw_instances.sandbox_id} - and latest.destroyed_at is not null - )`, + // Whether the orphan-volume grace period has elapsed, evaluated + // entirely in Postgres. Grace runs from the LATEST destruction of + // this (user, sandbox): a reprovisioned sandbox has several + // destroyed rows sharing one Fly volume, so the clock follows the + // most recent destruction, not whichever row the admin selected. + // Computing this in SQL avoids parsing a database timestamp with + // the JS `Date` constructor, whose handling of Postgres timestamp + // text differs across the Vercel and Cloudflare runtimes. + grace_period_elapsed: sql` + extract(epoch from (now() - ( + select max(latest.destroyed_at) + from ${kiloclaw_instances} as latest + where latest.user_id = ${kiloclaw_instances.user_id} + and latest.sandbox_id = ${kiloclaw_instances.sandbox_id} + and latest.destroyed_at is not null + ))) * 1000 > ${ORPHAN_VOLUME_GRACE_PERIOD_MS}`, }) .from(kiloclaw_instances) .where(eq(kiloclaw_instances.id, input.instanceId)) @@ -4152,10 +4157,10 @@ export const adminKiloclawInstancesRouter = createTRPCRouter({ // 3. Grace period, measured from the latest destruction of this // sandbox — give Fly + the DO sweep time to self-heal first. - const now = new Date(); - const latestDestroyedAt = row.latest_sandbox_destroyed_at ?? row.destroyed_at; - const destroyedMsAgo = now.getTime() - new Date(latestDestroyedAt).getTime(); - if (destroyedMsAgo <= ORPHAN_VOLUME_GRACE_PERIOD_MS) { + // `grace_period_elapsed` is computed by Postgres in the query above; + // `false` or `null` (no destroyed row, already ruled out by gate 2) + // both fail closed. + if (row.grace_period_elapsed !== true) { throw new TRPCError({ code: 'PRECONDITION_FAILED', message: 'Instance was destroyed too recently — wait out the 7-day grace period', @@ -4173,7 +4178,7 @@ export const adminKiloclawInstancesRouter = createTRPCRouter({ organization_id: row.organization_id, }; const { accessGrantingContextKeys, pendingDestructionContextKeys } = - await getOrphanVolumeContextProtections(db, [context], now); + await getOrphanVolumeContextProtections(db, [context], new Date()); const contextKey = orphanVolumeSubscriptionContextKey(context); if (accessGrantingContextKeys.has(contextKey)) { throw new TRPCError({ diff --git a/services/kiloclaw/src/routes/platform-orphan-volume.test.ts b/services/kiloclaw/src/routes/platform-orphan-volume.test.ts index a8f305519..a05615ef3 100644 --- a/services/kiloclaw/src/routes/platform-orphan-volume.test.ts +++ b/services/kiloclaw/src/routes/platform-orphan-volume.test.ts @@ -51,8 +51,9 @@ const VOLUME_NAME = volumeNameFromSandboxId(SANDBOX_ID); /** * Instance row for INSTANCE_ID that passes the identity / destroyed / - * grace gates: identity matches, destroyed long ago, and the sandbox's - * latest destruction (`latestSandboxDestroyedAt`) is also long ago. + * grace gates: identity matches, destroyed long ago, and the grace period + * (`gracePeriodElapsed`, computed in SQL by the endpoint's instance query) + * has elapsed. */ const DEFAULT_DESTROY_ROW = { id: INSTANCE_ID, @@ -60,7 +61,7 @@ const DEFAULT_DESTROY_ROW = { sandboxId: SANDBOX_ID, organizationId: null, destroyedAt: new Date(Date.now() - 30 * 86_400_000).toISOString(), - latestSandboxDestroyedAt: new Date(Date.now() - 30 * 86_400_000).toISOString(), + gracePeriodElapsed: true, }; /** @@ -395,7 +396,7 @@ describe('POST /admin/orphan-volume-destroy', () => { sandboxId: legacySandbox, organizationId: null, destroyedAt: new Date(Date.now() - 30 * 86_400_000).toISOString(), - latestSandboxDestroyedAt: new Date(Date.now() - 30 * 86_400_000).toISOString(), + gracePeriodElapsed: true, }); vi.mocked(fly.listVolumes).mockResolvedValue([ flyVolume({ id: legacyVolumeId, name: volumeNameFromSandboxId(legacySandbox) }), @@ -534,33 +535,16 @@ describe('POST /admin/orphan-volume-destroy', () => { }); it('refuses (409) while the instance is within the grace period', async () => { + // `gracePeriodElapsed` is computed by the endpoint's instance query in + // SQL — `max(destroyed_at)` of the (user, sandbox) versus the grace + // window — so an older submitted row of a sandbox reprovisioned and + // destroyed again recently is still blocked. That SQL is exercised + // end-to-end against Postgres by the web router's `destroyOrphanVolume` + // test; here the worker just honors the precomputed flag. const { env } = makeEnv(); mockDestroyLookup({ ...DEFAULT_DESTROY_ROW, - destroyedAt: new Date(Date.now() - 2 * 86_400_000).toISOString(), - latestSandboxDestroyedAt: new Date(Date.now() - 2 * 86_400_000).toISOString(), - }); - - const response = await platform.request( - '/admin/orphan-volume-destroy', - destroyInit(validDestroyBody), - env - ); - expect(response.status).toBe(409); - expect(fly.listVolumes).not.toHaveBeenCalled(); - expect(fly.deleteVolume).not.toHaveBeenCalled(); - }); - - it('refuses (409) when a newer destruction of the same sandbox is within grace', async () => { - // The submitted instance was destroyed long ago, but the sandbox was - // reprovisioned and destroyed again recently. The grace period must run - // from that latest destruction, so an older submitted row cannot reap - // the still-shared volume early. - const { env } = makeEnv(); - mockDestroyLookup({ - ...DEFAULT_DESTROY_ROW, - destroyedAt: new Date(Date.now() - 30 * 86_400_000).toISOString(), - latestSandboxDestroyedAt: new Date(Date.now() - 2 * 86_400_000).toISOString(), + gracePeriodElapsed: false, }); const response = await platform.request( diff --git a/services/kiloclaw/src/routes/platform.ts b/services/kiloclaw/src/routes/platform.ts index a82f94e0e..87b78be71 100644 --- a/services/kiloclaw/src/routes/platform.ts +++ b/services/kiloclaw/src/routes/platform.ts @@ -4043,17 +4043,22 @@ platform.post('/admin/orphan-volume-destroy', async c => { sandboxId: kiloclaw_instances.sandbox_id, organizationId: kiloclaw_instances.organization_id, destroyedAt: kiloclaw_instances.destroyed_at, - // The latest `destroyed_at` across every destroyed row of this - // (user, sandbox). A reprovisioned sandbox has several destroyed rows - // sharing one Fly volume; the grace period must run from the most - // recent destruction, not whichever row the caller happened to submit. - latestSandboxDestroyedAt: sql`( - select max(latest.destroyed_at) - from ${kiloclaw_instances} as latest - where latest.user_id = ${kiloclaw_instances.user_id} - and latest.sandbox_id = ${kiloclaw_instances.sandbox_id} - and latest.destroyed_at is not null - )`, + // Whether the orphan-volume grace period has elapsed, evaluated entirely + // in Postgres. Grace runs from the LATEST destruction of this + // (user, sandbox): a reprovisioned sandbox has several destroyed rows + // sharing one Fly volume, so the clock follows the most recent + // destruction, not whichever row the caller happened to submit. + // Computing this in SQL avoids parsing a database timestamp with the JS + // `Date` constructor, whose handling of Postgres timestamp text differs + // across the Vercel and Cloudflare runtimes. + gracePeriodElapsed: sql` + extract(epoch from (now() - ( + select max(latest.destroyed_at) + from ${kiloclaw_instances} as latest + where latest.user_id = ${kiloclaw_instances.user_id} + and latest.sandbox_id = ${kiloclaw_instances.sandbox_id} + and latest.destroyed_at is not null + ))) * 1000 > ${ORPHAN_VOLUME_GRACE_PERIOD_MS}`, }) .from(kiloclaw_instances) .where(eq(kiloclaw_instances.id, instanceId)) @@ -4079,8 +4084,9 @@ platform.post('/admin/orphan-volume-destroy', async c => { // sandbox. A reprovisioned sandbox has several destroyed rows sharing one // Fly volume; the volume's cleanup clock runs from the most recent // destruction, so an older submitted row must not shorten the grace. - const latestDestroyedAt = instance.latestSandboxDestroyedAt ?? instance.destroyedAt; - if (Date.now() - new Date(latestDestroyedAt).getTime() <= ORPHAN_VOLUME_GRACE_PERIOD_MS) { + // `gracePeriodElapsed` is computed by Postgres in the query above; `false` + // or `null` (no destroyed row, already ruled out by gate A) both fail closed. + if (instance.gracePeriodElapsed !== true) { return c.json({ error: 'Instance is still within the orphan-volume grace period' }, 409); } // Gate C — never destroy data while this ownership context still has