Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ DIRECTORY_ENCRYPTION_KEY=change-me-to-a-long-random-32-char-secret
# re-encrypted (npm run reencrypt:directory). Remove it once rotation is done.
# DIRECTORY_ENCRYPTION_KEY_PREVIOUS=

# Optional. Secret for the scheduler tick endpoint (POST /api/cron). Set it,
# then have an external cron call the endpoint on an interval with header
# `authorization: Bearer <value>`. Without it, /api/cron returns 503.
# Generate with: openssl rand -base64 32
CRON_SECRET=

# Optional. Enables Have I Been Pwned breach lookups.
HIBP_API_KEY=

Expand Down
52 changes: 52 additions & 0 deletions docs/scheduler.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Scheduling (auto scan + sync)

DataShield does periodic work, exposure scans and directory syncs, without a
dedicated worker process. The model is "internal cron + table": schedule state
lives in the database, and a single tick endpoint advances it. An external
scheduler calls the endpoint on a fixed interval.

## Configuration

- **Directory sync** (per connection): `DirectoryConnection.autoSyncIntervalMinutes`
(null = disabled). Set it via `PATCH /api/directory/:id`
`{ "autoSyncIntervalMinutes": 60 }`. SCIM connections are push-based and
cannot be put on a pull schedule.
- **Exposure scan** (per company): `Company.scanIntervalMinutes` (null =
disabled). Set it via `PATCH /api/company` `{ "scanIntervalMinutes": 1440 }`.

Both intervals must be `null` or an integer >= 5 minutes.

## The tick endpoint

`POST /api/cron`, authenticated with `CRON_SECRET`:

```
curl -X POST -H "authorization: Bearer $CRON_SECRET" https://your-host/api/cron
```

Without `CRON_SECRET` set, the endpoint returns 503. Each tick:

1. Enqueues a `SyncJob` for every connection whose `autoSyncIntervalMinutes`
has elapsed since `lastSyncAt`.
2. Starts an exposure scan for every company whose `scanIntervalMinutes` has
elapsed since `lastScanAt` (stamped up front so a slow scan is not
re-triggered).
3. Drains the sync queue (`processSyncJobs`), which runs syncs with retry and
backoff (see #54).

## Driving the tick

Run the call on whatever cron you have, e.g. every 5 minutes:

- **System cron**: `*/5 * * * * curl -fsS -X POST -H "authorization: Bearer $CRON_SECRET" https://your-host/api/cron`
- **Vercel Cron / similar**: schedule a POST to `/api/cron` and inject the
header.

The tick is idempotent: nothing runs before its interval elapses, and sync
concurrency is guarded by the job claim, so overlapping ticks are safe.

## Notes

- Directory sync is durable (queued, retried). Auto-scan currently runs
detached (fire-and-forget) and is not yet retried; a failed scheduled scan is
retried on the next due tick.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- AlterTable
ALTER TABLE "Company" ADD COLUMN "scanIntervalMinutes" INTEGER,
ADD COLUMN "lastScanAt" TIMESTAMP(3);

-- AlterTable
ALTER TABLE "DirectoryConnection" ADD COLUMN "autoSyncIntervalMinutes" INTEGER;
7 changes: 7 additions & 0 deletions prisma/schema.prisma
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ model Company {
name String
domain String @unique
logoUrl String?
// Auto-scan cadence in minutes (null = disabled); lastScanAt tracks the last
// scheduler-triggered scan so the next one is only due after the interval.
scanIntervalMinutes Int?
lastScanAt DateTime?
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt

Expand All @@ -33,6 +37,9 @@ model DirectoryConnection {
lastSyncAt DateTime?
lastSyncCount Int?
errorMessage String?
// Auto-sync cadence in minutes (null = disabled). The scheduler enqueues a
// SyncJob once lastSyncAt is older than this interval.
autoSyncIntervalMinutes Int?
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt

Expand Down
49 changes: 49 additions & 0 deletions src/app/api/company/route.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import { describe, it, expect, vi, beforeEach } from "vitest"

const requireAdmin = vi.fn()
const update = vi.fn()

vi.mock("@/lib/apiAuth", () => ({ requireAdmin: () => requireAdmin() }))
vi.mock("@/lib/prisma", () => ({
prisma: { company: { update: (a: unknown) => update(a) } },
}))

import { PATCH } from "./route"

function patch(body: unknown): Request {
return new Request("http://localhost/api/company", {
method: "PATCH",
body: JSON.stringify(body),
})
}

beforeEach(() => {
vi.clearAllMocks()
requireAdmin.mockResolvedValue({ session: { user: { companyId: "co1" } }, error: null })
})

describe("PATCH /api/company", () => {
it("rejects a non-integer interval", async () => {
const res = await PATCH(patch({ scanIntervalMinutes: 2.5 }))
expect(res.status).toBe(400)
expect(update).not.toHaveBeenCalled()
})

it("sets the scan interval scoped to the caller's company", async () => {
const res = await PATCH(patch({ scanIntervalMinutes: 1440 }))
expect(res.status).toBe(200)
expect(update).toHaveBeenCalledWith({
where: { id: "co1" },
data: { scanIntervalMinutes: 1440 },
})
})

it("disables scheduled scans with null", async () => {
const res = await PATCH(patch({ scanIntervalMinutes: null }))
expect(res.status).toBe(200)
expect(update).toHaveBeenCalledWith({
where: { id: "co1" },
data: { scanIntervalMinutes: null },
})
})
})
32 changes: 32 additions & 0 deletions src/app/api/company/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { NextResponse } from "next/server"
import { requireAdmin } from "@/lib/apiAuth"
import { prisma } from "@/lib/prisma"

const MIN_INTERVAL_MINUTES = 5

function parseInterval(value: unknown): number | null | undefined {
if (value === null) return null
if (typeof value === "number" && Number.isInteger(value) && value >= MIN_INTERVAL_MINUTES)
return value
return undefined
}

// Set the company-wide auto-scan cadence. null disables scheduled scans.
export async function PATCH(req: Request) {
const { session, error } = await requireAdmin()
if (error) return error

const body = (await req.json()) as { scanIntervalMinutes?: unknown }
const interval = parseInterval(body.scanIntervalMinutes)
if (interval === undefined)
return NextResponse.json(
{ error: `scanIntervalMinutes must be null or an integer >= ${MIN_INTERVAL_MINUTES}` },
{ status: 400 }
)

await prisma.company.update({
where: { id: session.user.companyId },
data: { scanIntervalMinutes: interval },
})
return NextResponse.json({ scanIntervalMinutes: interval })
}
43 changes: 43 additions & 0 deletions src/app/api/cron/route.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import { describe, it, expect, vi, beforeEach } from "vitest"

const runDueSchedules = vi.fn()
vi.mock("@/lib/scheduler", () => ({ runDueSchedules: () => runDueSchedules() }))

import { POST } from "./route"

function req(auth?: string): Request {
const headers = new Headers()
if (auth !== undefined) headers.set("authorization", auth)
return new Request("http://localhost/api/cron", { method: "POST", headers })
}

beforeEach(() => {
vi.clearAllMocks()
runDueSchedules.mockResolvedValue({ syncsEnqueued: 1, scansStarted: 0, jobsProcessed: 1 })
})

describe("POST /api/cron", () => {
it("returns 503 when CRON_SECRET is unset", async () => {
delete process.env.CRON_SECRET
const res = await POST(req("Bearer x"))
expect(res.status).toBe(503)
expect(runDueSchedules).not.toHaveBeenCalled()
})

it("returns 401 on a missing or wrong secret", async () => {
process.env.CRON_SECRET = "right"
expect((await POST(req())).status).toBe(401)
expect((await POST(req("Bearer wrong"))).status).toBe(401)
expect(runDueSchedules).not.toHaveBeenCalled()
delete process.env.CRON_SECRET
})

it("runs the scheduler on a valid secret", async () => {
process.env.CRON_SECRET = "right"
const res = await POST(req("Bearer right"))
expect(res.status).toBe(200)
expect(await res.json()).toEqual({ syncsEnqueued: 1, scansStarted: 0, jobsProcessed: 1 })
expect(runDueSchedules).toHaveBeenCalled()
delete process.env.CRON_SECRET
})
})
26 changes: 26 additions & 0 deletions src/app/api/cron/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import { timingSafeEqual } from "crypto"
import { NextResponse } from "next/server"
import { runDueSchedules } from "@/lib/scheduler"

function safeEqual(a: string, b: string): boolean {
const bufA = Buffer.from(a)
const bufB = Buffer.from(b)
if (bufA.length !== bufB.length) return false
return timingSafeEqual(bufA, bufB)
}

// Scheduler tick. Meant to be called by an external cron (system cron, Vercel
// Cron, uptime pinger) on a fixed interval, authenticated with CRON_SECRET:
// curl -X POST -H "authorization: Bearer $CRON_SECRET" https://host/api/cron
export async function POST(req: Request) {
const secret = process.env.CRON_SECRET
if (!secret)
return NextResponse.json({ error: "Scheduler not configured" }, { status: 503 })

const provided = (req.headers.get("authorization") ?? "").match(/^Bearer\s+(\S.*)$/i)?.[1]
if (!provided || !safeEqual(provided, secret))
return NextResponse.json({ error: "Unauthorized" }, { status: 401 })

const result = await runDueSchedules()
return NextResponse.json(result)
}
69 changes: 69 additions & 0 deletions src/app/api/directory/[id]/route.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import { describe, it, expect, vi, beforeEach } from "vitest"

const requireAdmin = vi.fn()
const findFirst = vi.fn()
const update = vi.fn()

vi.mock("@/lib/apiAuth", () => ({ requireAdmin: () => requireAdmin() }))
vi.mock("@/lib/prisma", () => ({
prisma: {
directoryConnection: {
findFirst: (a: unknown) => findFirst(a),
update: (a: unknown) => update(a),
},
},
}))

import { PATCH } from "./route"

const params = Promise.resolve({ id: "c1" })
function patch(body: unknown): Request {
return new Request("http://localhost/api/directory/c1", {
method: "PATCH",
body: JSON.stringify(body),
})
}

beforeEach(() => {
vi.clearAllMocks()
requireAdmin.mockResolvedValue({ session: { user: { companyId: "co1" } }, error: null })
findFirst.mockResolvedValue({ type: "OKTA" })
})

describe("PATCH /api/directory/[id]", () => {
it("rejects an interval below the minimum", async () => {
const res = await PATCH(patch({ autoSyncIntervalMinutes: 1 }), { params })
expect(res.status).toBe(400)
expect(update).not.toHaveBeenCalled()
})

it("returns 404 for a connection outside the company", async () => {
findFirst.mockResolvedValue(null)
const res = await PATCH(patch({ autoSyncIntervalMinutes: 60 }), { params })
expect(res.status).toBe(404)
})

it("rejects a pull schedule on a SCIM connection", async () => {
findFirst.mockResolvedValue({ type: "SCIM" })
const res = await PATCH(patch({ autoSyncIntervalMinutes: 60 }), { params })
expect(res.status).toBe(400)
expect(update).not.toHaveBeenCalled()
})

it("sets the interval", async () => {
const res = await PATCH(patch({ autoSyncIntervalMinutes: 60 }), { params })
expect(res.status).toBe(200)
expect(update).toHaveBeenCalledWith(
expect.objectContaining({ where: { id: "c1" }, data: { autoSyncIntervalMinutes: 60 } })
)
})

it("disables auto-sync with null (allowed for SCIM too)", async () => {
findFirst.mockResolvedValue({ type: "SCIM" })
const res = await PATCH(patch({ autoSyncIntervalMinutes: null }), { params })
expect(res.status).toBe(200)
expect(update).toHaveBeenCalledWith(
expect.objectContaining({ data: { autoSyncIntervalMinutes: null } })
)
})
})
46 changes: 46 additions & 0 deletions src/app/api/directory/[id]/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,52 @@ import { NextResponse } from "next/server"
import { requireAdmin } from "@/lib/apiAuth"
import { prisma } from "@/lib/prisma"

const MIN_INTERVAL_MINUTES = 5

// Validate an interval field: null disables, otherwise an integer >= the floor.
function parseInterval(value: unknown): number | null | undefined {
if (value === null) return null
if (typeof value === "number" && Number.isInteger(value) && value >= MIN_INTERVAL_MINUTES)
return value
return undefined // invalid
}

// Set the auto-sync cadence for a connection. SCIM is push-based, so it cannot
// be put on a pull schedule.
export async function PATCH(
req: Request,
{ params }: { params: Promise<{ id: string }> }
) {
const { session, error } = await requireAdmin()
if (error) return error

const { id } = await params
const body = (await req.json()) as { autoSyncIntervalMinutes?: unknown }
const interval = parseInterval(body.autoSyncIntervalMinutes)
if (interval === undefined)
return NextResponse.json(
{ error: `autoSyncIntervalMinutes must be null or an integer >= ${MIN_INTERVAL_MINUTES}` },
{ status: 400 }
)

const connection = await prisma.directoryConnection.findFirst({
where: { id, companyId: session.user.companyId },
select: { type: true },
})
if (!connection) return NextResponse.json({ error: "Not found" }, { status: 404 })
if (connection.type === "SCIM" && interval !== null)
return NextResponse.json(
{ error: "SCIM connections are push-based and cannot be auto-synced" },
{ status: 400 }
)

await prisma.directoryConnection.update({
where: { id },
data: { autoSyncIntervalMinutes: interval },
})
return NextResponse.json({ id, autoSyncIntervalMinutes: interval })
}

export async function DELETE(
_req: Request,
{ params }: { params: Promise<{ id: string }> }
Expand Down
Loading