Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions apps/web/src/routers/admin-kiloclaw-instances-router.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3233,6 +3233,42 @@ describe('admin.kiloclawInstances.destroyOrphanVolume', () => {
expect(mockDestroyOrphanVolume).not.toHaveBeenCalled();
});

it('clears the grace gate for a long-destroyed instance stored as Postgres timestamp text', async () => {
// Regression: the grace check is evaluated in SQL, never by parsing the
// stored timestamp with the JS `Date` constructor. A row destroyed 60
// days ago — written in Postgres native timestamp text, not ISO 8601 —
// must clear the 7-day grace gate and reach the destroy handoff.
const destroyedAt = new Date(Date.now() - 60 * 86_400_000)
.toISOString()
.replace('T', ' ')
.replace('Z', '+00');
const [instance] = await db
.insert(kiloclaw_instances)
.values({
id: crypto.randomUUID(),
user_id: regularUser.id,
sandbox_id: `ki_${crypto.randomUUID().replace(/-/g, '')}`,
destroyed_at: destroyedAt,
})
.returning({ id: kiloclaw_instances.id });
mockDestroyOrphanVolume.mockResolvedValue({
ok: true,
flyApp: 'inst-grace',
volumeId: VOLUME_ID,
volumeName: 'kiloclaw_grace',
alreadyGone: false,
});
const caller = await createCallerForUser(adminUser.id);

const result = await caller.admin.kiloclawInstances.destroyOrphanVolume({
instanceId: instance.id,
volumeId: VOLUME_ID,
});

expect(result).toMatchObject({ success: true });
expect(mockDestroyOrphanVolume).toHaveBeenCalledTimes(1);
});

it('rejects when the user has an access-granting subscription', async () => {
const instanceId = await insertDestroyedInstance({
destroyedAt: daysAgo(30),
Expand Down
37 changes: 21 additions & 16 deletions apps/web/src/routers/admin-kiloclaw-instances-router.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4121,17 +4121,22 @@ export const adminKiloclawInstancesRouter = createTRPCRouter({
sandbox_id: kiloclaw_instances.sandbox_id,
organization_id: kiloclaw_instances.organization_id,
destroyed_at: kiloclaw_instances.destroyed_at,
// The latest `destroyed_at` across every destroyed row of this
// (user, sandbox). A reprovisioned sandbox has several destroyed
// rows sharing one Fly volume; the grace period runs from the most
// recent destruction, not whichever row the admin selected.
latest_sandbox_destroyed_at: sql<string | null>`(
select max(latest.destroyed_at)
from ${kiloclaw_instances} as latest
where latest.user_id = ${kiloclaw_instances.user_id}
and latest.sandbox_id = ${kiloclaw_instances.sandbox_id}
and latest.destroyed_at is not null
)`,
// Whether the orphan-volume grace period has elapsed, evaluated
// entirely in Postgres. Grace runs from the LATEST destruction of
// this (user, sandbox): a reprovisioned sandbox has several
// destroyed rows sharing one Fly volume, so the clock follows the
// most recent destruction, not whichever row the admin selected.
// Computing this in SQL avoids parsing a database timestamp with
// the JS `Date` constructor, whose handling of Postgres timestamp
// text differs across the Vercel and Cloudflare runtimes.
grace_period_elapsed: sql<boolean>`
extract(epoch from (now() - (
select max(latest.destroyed_at)
from ${kiloclaw_instances} as latest
where latest.user_id = ${kiloclaw_instances.user_id}
and latest.sandbox_id = ${kiloclaw_instances.sandbox_id}
and latest.destroyed_at is not null
))) * 1000 > ${ORPHAN_VOLUME_GRACE_PERIOD_MS}`,
})
.from(kiloclaw_instances)
.where(eq(kiloclaw_instances.id, input.instanceId))
Expand All @@ -4152,10 +4157,10 @@ export const adminKiloclawInstancesRouter = createTRPCRouter({

// 3. Grace period, measured from the latest destruction of this
// sandbox — give Fly + the DO sweep time to self-heal first.
const now = new Date();
const latestDestroyedAt = row.latest_sandbox_destroyed_at ?? row.destroyed_at;
const destroyedMsAgo = now.getTime() - new Date(latestDestroyedAt).getTime();
if (destroyedMsAgo <= ORPHAN_VOLUME_GRACE_PERIOD_MS) {
// `grace_period_elapsed` is computed by Postgres in the query above;
// `false` or `null` (no destroyed row, already ruled out by gate 2)
// both fail closed.
if (row.grace_period_elapsed !== true) {
throw new TRPCError({
code: 'PRECONDITION_FAILED',
message: 'Instance was destroyed too recently — wait out the 7-day grace period',
Expand All @@ -4173,7 +4178,7 @@ export const adminKiloclawInstancesRouter = createTRPCRouter({
organization_id: row.organization_id,
};
const { accessGrantingContextKeys, pendingDestructionContextKeys } =
await getOrphanVolumeContextProtections(db, [context], now);
await getOrphanVolumeContextProtections(db, [context], new Date());
const contextKey = orphanVolumeSubscriptionContextKey(context);
if (accessGrantingContextKeys.has(contextKey)) {
throw new TRPCError({
Expand Down
40 changes: 12 additions & 28 deletions services/kiloclaw/src/routes/platform-orphan-volume.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,17 @@ const VOLUME_NAME = volumeNameFromSandboxId(SANDBOX_ID);

/**
* Instance row for INSTANCE_ID that passes the identity / destroyed /
* grace gates: identity matches, destroyed long ago, and the sandbox's
* latest destruction (`latestSandboxDestroyedAt`) is also long ago.
* grace gates: identity matches, destroyed long ago, and the grace period
* (`gracePeriodElapsed`, computed in SQL by the endpoint's instance query)
* has elapsed.
*/
const DEFAULT_DESTROY_ROW = {
id: INSTANCE_ID,
userId: USER_ID,
sandboxId: SANDBOX_ID,
organizationId: null,
destroyedAt: new Date(Date.now() - 30 * 86_400_000).toISOString(),
latestSandboxDestroyedAt: new Date(Date.now() - 30 * 86_400_000).toISOString(),
gracePeriodElapsed: true,
};

/**
Expand Down Expand Up @@ -395,7 +396,7 @@ describe('POST /admin/orphan-volume-destroy', () => {
sandboxId: legacySandbox,
organizationId: null,
destroyedAt: new Date(Date.now() - 30 * 86_400_000).toISOString(),
latestSandboxDestroyedAt: new Date(Date.now() - 30 * 86_400_000).toISOString(),
gracePeriodElapsed: true,
});
vi.mocked(fly.listVolumes).mockResolvedValue([
flyVolume({ id: legacyVolumeId, name: volumeNameFromSandboxId(legacySandbox) }),
Expand Down Expand Up @@ -534,33 +535,16 @@ describe('POST /admin/orphan-volume-destroy', () => {
});

it('refuses (409) while the instance is within the grace period', async () => {
// `gracePeriodElapsed` is computed by the endpoint's instance query in
// SQL — `max(destroyed_at)` of the (user, sandbox) versus the grace
// window — so an older submitted row of a sandbox reprovisioned and
// destroyed again recently is still blocked. That SQL is exercised
// end-to-end against Postgres by the web router's `destroyOrphanVolume`
// test; here the worker just honors the precomputed flag.
const { env } = makeEnv();
mockDestroyLookup({
...DEFAULT_DESTROY_ROW,
destroyedAt: new Date(Date.now() - 2 * 86_400_000).toISOString(),
latestSandboxDestroyedAt: new Date(Date.now() - 2 * 86_400_000).toISOString(),
});

const response = await platform.request(
'/admin/orphan-volume-destroy',
destroyInit(validDestroyBody),
env
);
expect(response.status).toBe(409);
expect(fly.listVolumes).not.toHaveBeenCalled();
expect(fly.deleteVolume).not.toHaveBeenCalled();
});

it('refuses (409) when a newer destruction of the same sandbox is within grace', async () => {
// The submitted instance was destroyed long ago, but the sandbox was
// reprovisioned and destroyed again recently. The grace period must run
// from that latest destruction, so an older submitted row cannot reap
// the still-shared volume early.
const { env } = makeEnv();
mockDestroyLookup({
...DEFAULT_DESTROY_ROW,
destroyedAt: new Date(Date.now() - 30 * 86_400_000).toISOString(),
latestSandboxDestroyedAt: new Date(Date.now() - 2 * 86_400_000).toISOString(),
gracePeriodElapsed: false,
});

const response = await platform.request(
Expand Down
32 changes: 19 additions & 13 deletions services/kiloclaw/src/routes/platform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4043,17 +4043,22 @@ platform.post('/admin/orphan-volume-destroy', async c => {
sandboxId: kiloclaw_instances.sandbox_id,
organizationId: kiloclaw_instances.organization_id,
destroyedAt: kiloclaw_instances.destroyed_at,
// The latest `destroyed_at` across every destroyed row of this
// (user, sandbox). A reprovisioned sandbox has several destroyed rows
// sharing one Fly volume; the grace period must run from the most
// recent destruction, not whichever row the caller happened to submit.
latestSandboxDestroyedAt: sql<string | null>`(
select max(latest.destroyed_at)
from ${kiloclaw_instances} as latest
where latest.user_id = ${kiloclaw_instances.user_id}
and latest.sandbox_id = ${kiloclaw_instances.sandbox_id}
and latest.destroyed_at is not null
)`,
// Whether the orphan-volume grace period has elapsed, evaluated entirely
// in Postgres. Grace runs from the LATEST destruction of this
// (user, sandbox): a reprovisioned sandbox has several destroyed rows
// sharing one Fly volume, so the clock follows the most recent
// destruction, not whichever row the caller happened to submit.
// Computing this in SQL avoids parsing a database timestamp with the JS
// `Date` constructor, whose handling of Postgres timestamp text differs
// across the Vercel and Cloudflare runtimes.
gracePeriodElapsed: sql<boolean>`
extract(epoch from (now() - (
select max(latest.destroyed_at)
from ${kiloclaw_instances} as latest
where latest.user_id = ${kiloclaw_instances.user_id}
and latest.sandbox_id = ${kiloclaw_instances.sandbox_id}
and latest.destroyed_at is not null
))) * 1000 > ${ORPHAN_VOLUME_GRACE_PERIOD_MS}`,
})
.from(kiloclaw_instances)
.where(eq(kiloclaw_instances.id, instanceId))
Expand All @@ -4079,8 +4084,9 @@ platform.post('/admin/orphan-volume-destroy', async c => {
// sandbox. A reprovisioned sandbox has several destroyed rows sharing one
// Fly volume; the volume's cleanup clock runs from the most recent
// destruction, so an older submitted row must not shorten the grace.
const latestDestroyedAt = instance.latestSandboxDestroyedAt ?? instance.destroyedAt;
if (Date.now() - new Date(latestDestroyedAt).getTime() <= ORPHAN_VOLUME_GRACE_PERIOD_MS) {
// `gracePeriodElapsed` is computed by Postgres in the query above; `false`
// or `null` (no destroyed row, already ruled out by gate A) both fail closed.
if (instance.gracePeriodElapsed !== true) {
return c.json({ error: 'Instance is still within the orphan-volume grace period' }, 409);
}
// Gate C — never destroy data while this ownership context still has
Expand Down