From 597fad9c206d9f8f1e0df781a5cd9cbe780261fa Mon Sep 17 00:00:00 2001 From: Harry Phan Date: Fri, 19 Jun 2026 23:19:06 +0700 Subject: [PATCH 1/3] feat(walrus): add Harbor + Seal encrypted storage client MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Port the proven Walrus Harbor + Seal flow (reference: benhaq/walrus-harbor-mcp) into packages/walrus as plain async/await TypeScript (no Effect). - HarborClient: Bearer REST surface (spaces/buckets/files) — reserve→sign→ finalize bucket creation, multipart upload, status poll, download. - SealCrypto: Seal encrypt/decrypt + sponsored-tx signing (@mysten/seal@1.1.3, pinned to match the existing @mysten/sui@2.17.0). - HarborStorage: createPrivateBucket / putEncrypted / getDecrypted, with a mirror_missing_grant post-finalize retry (the reference's retry was dead code — the server error code was being overwritten; preserved here). - harborConfigFromEnv() for Worker/Node env wiring. - scripts/harbor-smoke.ts: manual live testnet round-trip (CLI only, not CI). Verified end-to-end on Sui testnet: encrypt→upload→download→decrypt is byte-exact; bundles for a Cloudflare Worker at ~65KB gzip with no Node built-ins, no WASM, no eval. Refs #5, #6 --- .env.example | 6 + .gitignore | 3 + bun.lock | 1 + packages/walrus/package.json | 1 + packages/walrus/scripts/harbor-smoke.ts | 147 ++++++++++ packages/walrus/src/harbor/client.ts | 354 ++++++++++++++++++++++++ packages/walrus/src/harbor/constants.ts | 41 +++ packages/walrus/src/harbor/errors.ts | 103 +++++++ packages/walrus/src/harbor/index.ts | 199 +++++++++++++ packages/walrus/src/harbor/seal.ts | 183 ++++++++++++ packages/walrus/src/harbor/types.ts | 108 ++++++++ packages/walrus/src/index.ts | 1 + 12 files changed, 1147 insertions(+) create mode 100644 packages/walrus/scripts/harbor-smoke.ts create mode 100644 packages/walrus/src/harbor/client.ts create mode 100644 packages/walrus/src/harbor/constants.ts create mode 100644 packages/walrus/src/harbor/errors.ts create mode 100644 packages/walrus/src/harbor/index.ts create mode 100644 packages/walrus/src/harbor/seal.ts create mode 100644 packages/walrus/src/harbor/types.ts diff --git a/.env.example b/.env.example index 4dce330..fa2d4a3 100644 --- a/.env.example +++ b/.env.example @@ -60,3 +60,9 @@ CLOUDFLARE_API_TOKEN= # Workers + Pages edit, D1, # ── React Bits Pro (only if using its components) ─────────────────────────── REACTBITS_LICENSE_KEY= + +# --- Walrus Harbor (Seal-encrypted storage on Sui testnet) --- +HARBOR_BASE_URL=https://api.testnet.harbor.walrus.xyz +HARBOR_API_KEY=hbr_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +HARBOR_SERVICE_PRIVATE_KEY=suiprivkey1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +HARBOR_DEFAULT_SPACE_ID= diff --git a/.gitignore b/.gitignore index 3b92893..e16800d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,6 @@ coverage/ runs/ artifacts/ *.tsbuildinfo + +# Cloudflare local secrets +.dev.vars diff --git a/bun.lock b/bun.lock index 6664526..3e9513b 100644 --- a/bun.lock +++ b/bun.lock @@ -103,6 +103,7 @@ "version": "0.1.0", "dependencies": { "@contextmem/core": "workspace:*", + "@mysten/seal": "1.1.3", "fast-xml-parser": "^5.3.2", "p-limit": "^7.2.0", }, diff --git a/packages/walrus/package.json b/packages/walrus/package.json index 7706eb7..b68805d 100644 --- a/packages/walrus/package.json +++ b/packages/walrus/package.json @@ -12,6 +12,7 @@ }, "dependencies": { "@contextmem/core": "workspace:*", + "@mysten/seal": "1.1.3", "fast-xml-parser": "^5.3.2", "p-limit": "^7.2.0" } diff --git a/packages/walrus/scripts/harbor-smoke.ts b/packages/walrus/scripts/harbor-smoke.ts new file mode 100644 index 0000000..b322e29 --- /dev/null +++ b/packages/walrus/scripts/harbor-smoke.ts @@ -0,0 +1,147 @@ +/** + * Live round-trip smoke test for the Harbor + Seal client. + * + * MANUAL ONLY — this is NOT a vitest test. It needs real Harbor credentials and + * hits testnet, so CI (which has no creds) must never run it. + * + * Run: + * bun packages/walrus/scripts/harbor-smoke.ts + * + * Bun auto-loads the repo-root .env.local; if the env vars are not present we + * parse .env.local manually as a fallback. Required keys: + * HARBOR_BASE_URL, HARBOR_API_KEY, HARBOR_SERVICE_PRIVATE_KEY, HARBOR_DEFAULT_SPACE_ID + * + * Flow: listSpaces → createPrivateBucket → putEncrypted → getDecrypted + * (assert byte-exact) → cleanup (deleteBucketFile + deleteBucket). Prints + * "SMOKE PASS" / "SMOKE FAIL: " and the on-chain finalize digest. + */ +import { readFileSync } from "node:fs"; +import path from "node:path"; +import { Buffer } from "node:buffer"; + +import { HarborStorage, harborConfigFromEnv } from "../src/harbor/index.js"; + +const REPO_ROOT = path.resolve(import.meta.dirname, "..", "..", ".."); +const ENV_FILE = path.join(REPO_ROOT, ".env.local"); + +const REQUIRED_KEYS = [ + "HARBOR_BASE_URL", + "HARBOR_API_KEY", + "HARBOR_SERVICE_PRIVATE_KEY", + "HARBOR_DEFAULT_SPACE_ID", +] as const; + +/** Minimal .env parser — only populates keys not already in process.env. */ +function loadEnvFallback(): void { + const missing = REQUIRED_KEYS.filter((k) => !process.env[k]); + if (missing.length === 0) return; + let raw: string; + try { + raw = readFileSync(ENV_FILE, "utf8"); + } catch { + return; // nothing to backfill from; harborConfigFromEnv will report what's missing + } + for (const line of raw.split("\n")) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("#")) continue; + const eq = trimmed.indexOf("="); + if (eq === -1) continue; + const key = trimmed.slice(0, eq).trim(); + if (process.env[key] !== undefined) continue; + let value = trimmed.slice(eq + 1).trim(); + if ( + (value.startsWith('"') && value.endsWith('"')) || + (value.startsWith("'") && value.endsWith("'")) + ) { + value = value.slice(1, -1); + } + process.env[key] = value; + } +} + +function bytesEqual(a: Uint8Array, b: Uint8Array): boolean { + if (a.length !== b.length) return false; + for (let i = 0; i < a.length; i++) { + if (a[i] !== b[i]) return false; + } + return true; +} + +async function main(): Promise { + loadEnvFallback(); + + const config = harborConfigFromEnv(); + const storage = new HarborStorage(config); + + // 1. listSpaces — sanity check auth + assert a default space is configured. + const spaces = await storage.client.listSpaces(); + console.log(`listSpaces: ${spaces.length} space(s) visible`); + const spaceId = config.defaultSpaceId; + if (!spaceId) { + throw new Error("HARBOR_DEFAULT_SPACE_ID is not set"); + } + console.log(`using spaceId: ${spaceId}`); + + // 2. createPrivateBucket — reserve → sign → finalize. + const bucketName = `ctxm-smoke-${Date.now()}`; + const created = await storage.createPrivateBucket(spaceId, bucketName); + console.log(`createPrivateBucket: bucketId=${created.bucketId}`); + console.log(` sealPolicyId=${created.sealPolicyId}`); + console.log(` state=${created.state}`); + console.log(` finalize digest=${created.digest}`); + if (!created.sealPolicyId) { + throw new Error("finalize returned no sealPolicyId (bucket is not private?)"); + } + + let fileId: string | undefined; + try { + // 3. putEncrypted — encrypt + upload + poll to completed. + const plaintext = new Uint8Array(Buffer.from(`contextMEM harbor smoke ${Date.now()}`)); + fileId = await storage.putEncrypted( + created.bucketId, + created.sealPolicyId, + plaintext, + "smoke.txt", + ); + console.log(`putEncrypted: fileId=${fileId} (${plaintext.length} bytes plaintext)`); + + // 4. getDecrypted — download + decrypt, assert byte-exact round trip. + const decrypted = await storage.getDecrypted( + created.bucketId, + created.sealPolicyId, + fileId, + ); + if (!bytesEqual(decrypted, plaintext)) { + throw new Error( + `decrypted bytes do not match original (got ${decrypted.length} bytes, expected ${plaintext.length})`, + ); + } + console.log("getDecrypted: byte-exact round trip OK"); + } finally { + // 5. cleanup — this is throwaway test data; best-effort delete. + try { + if (fileId) { + await storage.client.deleteBucketFile(created.bucketId, fileId); + console.log(`cleanup: deleted file ${fileId}`); + } + } catch (err) { + console.warn(`cleanup: deleteBucketFile failed: ${String(err)}`); + } + try { + await storage.client.deleteBucket(created.bucketId); + console.log(`cleanup: deleted bucket ${created.bucketId}`); + } catch (err) { + console.warn(`cleanup: deleteBucket failed: ${String(err)}`); + } + } + + console.log(`finalize digest: ${created.digest}`); + console.log("SMOKE PASS"); +} + +main().catch((err) => { + const reason = err instanceof Error ? err.message : String(err); + console.error(`SMOKE FAIL: ${reason}`); + if (err instanceof Error && err.stack) console.error(err.stack); + process.exitCode = 1; +}); diff --git a/packages/walrus/src/harbor/client.ts b/packages/walrus/src/harbor/client.ts new file mode 100644 index 0000000..1a9c05a --- /dev/null +++ b/packages/walrus/src/harbor/client.ts @@ -0,0 +1,354 @@ +import { HarborAuthError, HarborError } from "./errors.js"; +import type { + Bucket, + BucketId, + BucketListResponse, + CreateBucketReserveResponse, + FileId, + FileListResponse, + FileStatusResponse, + FileUploadResponse, + FinalizeBucketResponse, + HarborConfig, + SpaceId, + SpaceListItem, +} from "./types.js"; + +// Harbor stores a file's mime_type from the multipart part's content-type (it does NOT +// sniff the ciphertext or read the extension server-side). The UI keys preview/rendering +// off that stored mime, so an octet-stream type makes images/PDFs un-previewable even +// though they decrypt fine. Derive the real type from the file name. +const EXT_MIME: Record = { + apng: "image/apng", + avif: "image/avif", + bmp: "image/bmp", + gif: "image/gif", + jpg: "image/jpeg", + jpeg: "image/jpeg", + png: "image/png", + svg: "image/svg+xml", + webp: "image/webp", + pdf: "application/pdf", + csv: "text/csv", + htm: "text/html", + html: "text/html", + md: "text/markdown", + mdx: "text/markdown", + txt: "text/plain", + json: "application/json", + mp4: "video/mp4", + webm: "video/webm", + mov: "video/quicktime", + mp3: "audio/mpeg", + wav: "audio/wav", + ogg: "audio/ogg", +}; + +function contentTypeFromName(fileName: string): string { + const ext = fileName.slice(fileName.lastIndexOf(".") + 1).toLowerCase(); + return EXT_MIME[ext] ?? "application/octet-stream"; +} + +/** Harbor wraps single-resource GETs as `{ data: }`. */ +interface DataEnvelope { + readonly data: T; +} + +/** Harbor error bodies arrive as `{ error: "msg" }` or `{ error: { code, message } }`. */ +interface HarborErrorBody { + readonly code?: string; + readonly message?: string; + readonly error?: string | { readonly code?: string; readonly message?: string }; +} + +/** + * Typed Harbor REST API client (plain async/await port of the Effect + * `HarborApiClient` service). Uses native `fetch` with a Bearer header and the + * `contentTypeFromName` helper for multipart uploads. + * + * Only the curated external surface (Bearer-only) is implemented. + */ +export class HarborClient { + private readonly baseUrl: string; + private readonly apiKey: string; + + constructor(config: Pick) { + this.baseUrl = config.baseUrl.replace(/\/+$/, ""); + this.apiKey = config.apiKey; + } + + // --- internals --- + + private authHeaders(extra?: Record): Record { + return { + Authorization: `Bearer ${this.apiKey}`, + Accept: "application/json", + ...extra, + }; + } + + // A 401/403 is only an *auth* failure when its server code is auth-related (or + // absent). Other 403s — notably the post-finalize `mirror_missing_grant` — must + // keep their original server code so callers (HarborStorage.putEncrypted) can + // detect & retry them instead of treating them as a dead API key. + private isAuthCode(code: string | undefined): boolean { + return ( + code === undefined || + code === "missing_api_key" || + code === "invalid_api_key" || + code === "read_only_api_key" + ); + } + + private async parseErrorBody(res: Response): Promise { + try { + return (await res.json()) as HarborErrorBody; + } catch { + return {}; + } + } + + /** Build a typed error from a non-OK JSON response and throw it. */ + private async fail(res: Response, endpoint: string): Promise { + const body = await this.parseErrorBody(res); + // Harbor error bodies come as either `{ error: "msg" }` or `{ error: { code, message } }`, + // so pull the string out of both shapes — otherwise String(message) yields "[object Object]". + const errBody = body.error; + const code = + body.code ?? (errBody && typeof errBody === "object" ? errBody.code : undefined); + const message = + (typeof errBody === "string" ? errBody : errBody?.message) ?? + body.message ?? + `HTTP ${res.status}`; + + if ((res.status === 401 || res.status === 403) && this.isAuthCode(code)) { + throw new HarborAuthError({ + message: String(message), + code: code === "read_only_api_key" ? "read_only_api_key" : "invalid_api_key", + status: res.status, + }); + } + throw new HarborError({ + message: String(message), + ...(code !== undefined ? { code } : {}), + status: res.status, + endpoint, + }); + } + + /** + * Throw a typed error from a raw (non-JSON) failure, preserving any server + * `code` so callers can detect e.g. the post-finalize `mirror_missing_grant`. + */ + private async failRaw(res: Response, endpoint: string): Promise { + const text = await res.text().catch(() => ""); + let code: string | undefined; + let parsedMessage: string | undefined; + if (text) { + try { + const body = JSON.parse(text) as HarborErrorBody; + const errBody = body.error; + code = + body.code ?? (errBody && typeof errBody === "object" ? errBody.code : undefined); + parsedMessage = + (typeof errBody === "string" ? errBody : errBody?.message) ?? body.message; + } catch { + // non-JSON body; keep the raw text in the message below + } + } + if ((res.status === 401 || res.status === 403) && this.isAuthCode(code)) { + throw new HarborAuthError({ + message: parsedMessage ?? text ?? `HTTP ${res.status}`, + code: code === "read_only_api_key" ? "read_only_api_key" : "invalid_api_key", + status: res.status, + }); + } + throw new HarborError({ + message: parsedMessage ?? `HTTP ${res.status}: ${text}`, + ...(code !== undefined ? { code } : {}), + status: res.status, + endpoint, + }); + } + + private url(path: string): string { + return `${this.baseUrl}${path}`; + } + + // === Read flows === + + async listSpaces(filter?: { type?: "personal" | "team" }): Promise { + const path = filter?.type ? `/api/v1/spaces?type=${filter.type}` : "/api/v1/spaces"; + const res = await fetch(this.url(path), { headers: this.authHeaders() }); + if (res.status !== 200) return this.fail(res, path); + const json = (await res.json()) as DataEnvelope; + return json.data; + } + + async listBuckets(args: { + spaceId: SpaceId; + limit?: number; + cursor?: string; + q?: string; + visibility?: "public" | "private"; + }): Promise { + const params = new URLSearchParams(); + if (args.limit !== undefined) params.set("limit", String(args.limit)); + if (args.cursor) params.set("cursor", args.cursor); + if (args.q) params.set("q", args.q); + if (args.visibility) params.set("visibility", args.visibility); + + const path = `/api/v1/spaces/${args.spaceId}/buckets?${params.toString()}`; + const res = await fetch(this.url(path), { headers: this.authHeaders() }); + if (res.status !== 200) return this.fail(res, path); + const json = (await res.json()) as BucketListResponse; + return { buckets: json.buckets, next_cursor: json.next_cursor }; + } + + async getBucketById(bucketId: BucketId): Promise { + const path = `/api/v1/buckets/${bucketId}`; + const res = await fetch(this.url(path), { headers: this.authHeaders() }); + if (res.status !== 200) return this.fail(res, path); + const json = (await res.json()) as DataEnvelope; + return json.data; + } + + async listBucketFiles( + bucketId: BucketId, + limit?: number, + cursor?: string, + q?: string, + ): Promise { + const params = new URLSearchParams(); + if (limit !== undefined) params.set("limit", String(limit)); + if (cursor) params.set("cursor", cursor); + if (q) params.set("q", q); + + const path = `/api/v1/buckets/${bucketId}/files?${params.toString()}`; + const res = await fetch(this.url(path), { headers: this.authHeaders() }); + if (res.status !== 200) return this.fail(res, path); + return (await res.json()) as FileListResponse; + } + + // === Write flows === + + async createBucket(spaceId: SpaceId, name: string): Promise { + const path = `/api/v1/spaces/${spaceId}/buckets`; + const res = await fetch(this.url(path), { + method: "POST", + headers: this.authHeaders({ "Content-Type": "application/json" }), + body: JSON.stringify({ name, scope: "private" }), + }); + if (res.status !== 201) return this.fail(res, path); + return (await res.json()) as CreateBucketReserveResponse; + } + + async finalizeBucket(bucketId: BucketId, signature: string): Promise { + const path = `/api/v1/buckets/${bucketId}/finalize`; + const res = await fetch(this.url(path), { + method: "POST", + headers: this.authHeaders({ "Content-Type": "application/json" }), + body: JSON.stringify({ signature }), + }); + if (res.status !== 200) return this.fail(res, path); + return (await res.json()) as FinalizeBucketResponse; + } + + async updateBucket( + bucketId: BucketId, + body: { name: string; visibility?: "public" | "private"; sealPolicyId?: string | null }, + ): Promise { + const path = `/api/v1/buckets/${bucketId}`; + const res = await fetch(this.url(path), { + method: "PUT", + headers: this.authHeaders({ "Content-Type": "application/json" }), + body: JSON.stringify(body), + }); + if (res.status !== 200) return this.fail(res, path); + const json = (await res.json()) as Bucket & { data?: Bucket }; + return json.data ?? json; + } + + // PUT /buckets/{id} is a partial update; visibility/sealPolicyId are immutable + // server-side (sending visibility returns 403), so a rename sends only the name. + async renameBucket(bucketId: BucketId, newName: string): Promise { + return this.updateBucket(bucketId, { name: newName }); + } + + async deleteBucket(bucketId: BucketId): Promise<{ id: BucketId; deleted: true }> { + // Harbor guards bucket deletion behind ?confirm=true (it deletes all contained files). + const path = `/api/v1/buckets/${bucketId}?confirm=true`; + const res = await fetch(this.url(path), { method: "DELETE", headers: this.authHeaders() }); + // Harbor returns 204 No Content on success; tolerate 200 with a body too. + if (res.status !== 200 && res.status !== 204) return this.fail(res, path); + return { id: bucketId, deleted: true }; + } + + async uploadBucketFile( + bucketId: BucketId, + fileBytes: Uint8Array, + fileName: string, + metadata?: Record, + ): Promise { + const form = new FormData(); + const blob = new Blob([fileBytes as unknown as BlobPart], { + type: contentTypeFromName(fileName), + }); + form.append("file", blob, fileName); + if (metadata) form.append("metadata", JSON.stringify(metadata)); + + const path = `/api/v1/buckets/${bucketId}/files`; + let res: Response; + try { + res = await fetch(this.url(path), { + method: "POST", + headers: { Authorization: `Bearer ${this.apiKey}` }, + body: form, + }); + } catch (cause) { + throw new HarborError({ + message: `Multipart upload failed: ${String(cause)}`, + endpoint: path, + }); + } + + if (res.status !== 202) return this.failRaw(res, path); + return (await res.json()) as FileUploadResponse; + } + + async getFileUploadStatus(bucketId: BucketId, fileId: FileId): Promise { + const path = `/api/v1/buckets/${bucketId}/files/${fileId}/status`; + const res = await fetch(this.url(path), { headers: this.authHeaders() }); + if (res.status !== 200) return this.fail(res, path); + return (await res.json()) as FileStatusResponse; + } + + async downloadBucketFile(bucketId: BucketId, fileId: FileId): Promise { + const path = `/api/v1/buckets/${bucketId}/files/${fileId}/download`; + let res: Response; + try { + res = await fetch(this.url(path), { + method: "GET", + headers: { Authorization: `Bearer ${this.apiKey}` }, + }); + } catch (cause) { + throw new HarborError({ message: `Download failed: ${String(cause)}`, endpoint: path }); + } + if (res.status !== 200) return this.failRaw(res, path); + const arrayBuffer = await res.arrayBuffer(); + return new Uint8Array(arrayBuffer); + } + + async deleteBucketFile( + bucketId: BucketId, + fileId: FileId, + ): Promise<{ id: FileId; deleted: true }> { + const path = `/api/v1/buckets/${bucketId}/files/${fileId}`; + const res = await fetch(this.url(path), { method: "DELETE", headers: this.authHeaders() }); + // Harbor returns 204 No Content on success; tolerate 200 with a body too. + if (res.status !== 200 && res.status !== 204) return this.fail(res, path); + return { id: fileId, deleted: true }; + } +} + +export { contentTypeFromName }; diff --git a/packages/walrus/src/harbor/constants.ts b/packages/walrus/src/harbor/constants.ts new file mode 100644 index 0000000..d94b56c --- /dev/null +++ b/packages/walrus/src/harbor/constants.ts @@ -0,0 +1,41 @@ +/** + * Testnet constants for Walrus Harbor + Seal. + * Sourced directly from: + * - https://api.testnet.harbor.walrus.xyz/docs/quickstart.md + * - harbor/alpha-docs/Harbor API Quickstart.md + * + * These are pinned for the alpha testnet deployment. + * DO NOT change without updating the corresponding on-chain objects. + */ + +// BCS schema for Seal identity (must exactly match the on-chain `seal_approve` check). +import { bcs } from "@mysten/sui/bcs"; + +// Original (immutable) package id of the Harbor bucket-policy package. +// Seal derives identity using the ORIGINAL package id, even after upgrades. +export const HARBOR_ORIGINAL_PACKAGE_ID = + "0x8b2429358e9b0f005b69fe8ad3cbd1268ad87f35047a21612e082c64824faf8d"; + +// Latest (upgradable) package id that hosts the `seal_approve` entry function. +export const HARBOR_LATEST_PACKAGE_ID = + "0xc11d875481544e9b6c616f7d6704266e1633b4034eab7ed76626dc25ebfcd506"; + +// Seal key servers on testnet (threshold = 2 out of 3). +export const SEAL_KEY_SERVER_OBJECT_IDS = [ + "0x6068c0acb197dddbacd4746a9de7f025b2ed5a5b6c1b1ab44dade4426d141da2", + "0x164ac3d2b3b8694b8181c13f671950004765c23f270321a45fdd04d40cccf0f2", + "0x9c949e53c36ab7a9c484ed9e8b43267a77d4b8d70e79aa6b39042e3d4c434105", +] as const; + +// Fullnode used for Seal + Sui operations. +export const SUI_TESTNET_FULLNODE = "https://fullnode.testnet.sui.io:443"; + +export const SealIdentity = bcs.struct("SealIdentity", { + policyObjectId: bcs.Address, + nonce: bcs.fixedArray(32, bcs.u8()), +}); + +export type SealIdentityInput = { + policyObjectId: string; + nonce: number[]; +}; diff --git a/packages/walrus/src/harbor/errors.ts b/packages/walrus/src/harbor/errors.ts new file mode 100644 index 0000000..d85d3a4 --- /dev/null +++ b/packages/walrus/src/harbor/errors.ts @@ -0,0 +1,103 @@ +/** + * Harbor API + Seal crypto domain errors. + * + * Ported from the Effect reference (errors.ts) which used `Data.TaggedError`. + * Here they are plain `Error` subclasses carrying the same structured fields, + * so callers can branch on `instanceof` / `.code` without pulling in effect. + */ + +/** Thrown for any non-OK Harbor REST response. Carries HTTP status + server code. */ +export class HarborError extends Error { + readonly code?: string; + readonly status?: number; + readonly endpoint?: string; + + constructor(args: { + message: string; + code?: string; + status?: number; + endpoint?: string; + }) { + super(args.message); + this.name = "HarborError"; + this.code = args.code; + this.status = args.status; + this.endpoint = args.endpoint; + } +} + +/** 401/403 auth failures. Mirrors the reference HarborAuthError code union. */ +export class HarborAuthError extends HarborError { + declare readonly code: "missing_api_key" | "invalid_api_key" | "read_only_api_key"; + + constructor(args: { + message: string; + code: "missing_api_key" | "invalid_api_key" | "read_only_api_key"; + status?: number; + }) { + super(args); + this.name = "HarborAuthError"; + } +} + +export type SealCryptoStep = + | "load_keypair" + | "encrypt" + | "decrypt" + | "build_ptb" + | "session_key" + | "sign"; + +/** Any failure inside the client-side Seal encrypt/decrypt/sign pipeline. */ +export class SealCryptoError extends Error { + readonly step: SealCryptoStep; + + constructor(args: { message: string; step: SealCryptoStep; cause?: unknown }) { + super(args.message, args.cause !== undefined ? { cause: args.cause } : undefined); + this.name = "SealCryptoError"; + this.step = args.step; + } +} + +/** + * Raised when uploads keep returning the post-finalize `mirror_missing_grant` + * 403 after the retry budget is exhausted. + */ +export class MirrorGrantMissingError extends Error { + readonly bucketId: string; + readonly fileId?: string; + readonly attempt: number; + + constructor(args: { bucketId: string; fileId?: string; attempt: number }) { + super( + `Bucket ${args.bucketId} still missing mirror grant after ${args.attempt} attempts`, + ); + this.name = "MirrorGrantMissingError"; + this.bucketId = args.bucketId; + this.fileId = args.fileId; + this.attempt = args.attempt; + } +} + +/** Upload finished in a non-completed state (failed / timed out while polling). */ +export class FileStatusError extends Error { + readonly fileId: string; + readonly state: string; + readonly error?: { code: string; message: string }; + + constructor(args: { + fileId: string; + state: string; + error?: { code: string; message: string }; + }) { + super( + `File ${args.fileId} ended in state "${args.state}"${ + args.error ? `: ${args.error.code} ${args.error.message}` : "" + }`, + ); + this.name = "FileStatusError"; + this.fileId = args.fileId; + this.state = args.state; + this.error = args.error; + } +} diff --git a/packages/walrus/src/harbor/index.ts b/packages/walrus/src/harbor/index.ts new file mode 100644 index 0000000..0956a80 --- /dev/null +++ b/packages/walrus/src/harbor/index.ts @@ -0,0 +1,199 @@ +import { HarborClient } from "./client.js"; +import { FileStatusError, HarborError, MirrorGrantMissingError } from "./errors.js"; +import { SealCrypto } from "./seal.js"; +import type { + BucketId, + FileId, + FileUploadResponse, + FinalizeBucketResponse, + HarborConfig, + SpaceId, +} from "./types.js"; + +// Re-exports — the harbor public surface. +export { HarborClient, contentTypeFromName } from "./client.js"; +export { SealCrypto } from "./seal.js"; +export * from "./constants.js"; +export * from "./errors.js"; +export * from "./types.js"; + +const sleep = (ms: number): Promise => new Promise((resolve) => setTimeout(resolve, ms)); + +/** + * After a bucket is finalized there is a ~3s window where the storage mirror has + * not yet been granted, so uploads 403 with `mirror_missing_grant`. Detect it on + * either the structured server code or the message text (the upload path may not + * always surface a parseable code). + */ +function isMirrorMissingGrant(err: unknown): boolean { + if (!(err instanceof HarborError)) return false; + if (err.status !== 403) return false; + return err.code === "mirror_missing_grant" || /mirror_missing_grant/i.test(err.message); +} + +export interface CreatePrivateBucketResult { + readonly bucketId: BucketId; + readonly sealPolicyId: string | null; + readonly state: string; + /** On-chain digest of the sponsored finalize transaction (from the reserve step). */ + readonly digest: string; +} + +export interface PutEncryptedOptions { + /** Max upload attempts while tolerating the post-finalize mirror_missing_grant 403. */ + readonly uploadAttempts?: number; + /** Backoff between upload retries (ms). */ + readonly uploadBackoffMs?: number; + /** Max status polls before timing out. */ + readonly pollAttempts?: number; + /** Delay between status polls (ms). */ + readonly pollIntervalMs?: number; + /** Optional metadata stored alongside the file. */ + readonly metadata?: Record; +} + +/** + * High-level Harbor + Seal operations: compose {@link HarborClient} and + * {@link SealCrypto} into "ggdrive"-style private-bucket flows. All heavy + * crypto + signing + retry logic lives here. + * + * Plain async/await port of the Effect `HarborStorageService`. + */ +export class HarborStorage { + readonly client: HarborClient; + readonly seal: SealCrypto; + readonly defaultSpaceId?: string; + + constructor(config: HarborConfig) { + this.client = new HarborClient(config); + this.seal = new SealCrypto({ servicePrivateKey: config.servicePrivateKey }); + if (config.defaultSpaceId) this.defaultSpaceId = config.defaultSpaceId; + } + + /** + * Full create-bucket flow (private + Seal): reserve → sign bytes → finalize. + * Done back-to-back because the sponsor signature expires fast. + */ + async createPrivateBucket(spaceId: SpaceId, name: string): Promise { + // 1. Reserve (sponsored tx bytes + digest) + const reserve = await this.client.createBucket(spaceId, name); + // 2. Sign locally + const signature = await this.seal.signTransactionBytes(reserve.bytes); + // 3. Finalize (immediately — the sponsor signature is short-lived) + const finalized: FinalizeBucketResponse = await this.client.finalizeBucket( + reserve.bucket_id, + signature, + ); + return { + bucketId: finalized.bucket_id, + sealPolicyId: finalized.seal_policy_id, + state: finalized.state, + digest: reserve.digest, + }; + } + + /** + * Encrypt bytes with Seal, upload (tolerating the post-finalize + * `mirror_missing_grant` 403 via short backoff), then poll until the upload is + * `completed`. Returns the new file id. + */ + async putEncrypted( + bucketId: BucketId, + sealPolicyId: string, + bytes: Uint8Array, + fileName: string, + options: PutEncryptedOptions = {}, + ): Promise { + const uploadAttempts = options.uploadAttempts ?? 12; + const uploadBackoffMs = options.uploadBackoffMs ?? 3000; + const pollAttempts = options.pollAttempts ?? 40; + const pollIntervalMs = options.pollIntervalMs ?? 2000; + + const encrypted = await this.seal.encrypt(bytes, sealPolicyId); + + // Upload with a retry loop on the post-finalize mirror_missing_grant 403. + let uploadResult: FileUploadResponse | undefined; + for (let attempt = 0; attempt < uploadAttempts; attempt++) { + try { + uploadResult = await this.client.uploadBucketFile( + bucketId, + encrypted, + fileName, + options.metadata, + ); + break; + } catch (err) { + if (isMirrorMissingGrant(err) && attempt < uploadAttempts - 1) { + await sleep(uploadBackoffMs); + continue; + } + throw err; + } + } + if (!uploadResult) { + throw new MirrorGrantMissingError({ bucketId, attempt: uploadAttempts }); + } + + const fileId = uploadResult.data.id; + + // Poll until completed or failed. + let lastState = "queued"; + for (let i = 0; i < pollAttempts; i++) { + const status = await this.client.getFileUploadStatus(bucketId, fileId); + lastState = status.data.state; + if (status.data.state === "completed") return fileId; + if (status.data.state === "failed") { + throw new FileStatusError({ + fileId, + state: status.data.state, + error: status.data.error ?? { code: "unknown", message: "Upload failed" }, + }); + } + await sleep(pollIntervalMs); + } + + throw new FileStatusError({ + fileId, + state: lastState, + error: { code: "timeout", message: "Upload did not complete in time" }, + }); + } + + /** Download a file and decrypt it with the bucket's sealPolicyId. */ + async getDecrypted( + bucketId: BucketId, + sealPolicyId: string, + fileId: FileId, + ): Promise { + const ciphertext = await this.client.downloadBucketFile(bucketId, fileId); + return this.seal.decrypt(ciphertext, sealPolicyId); + } +} + +/** + * Build a {@link HarborConfig} from environment variables. + * + * Reads HARBOR_BASE_URL, HARBOR_API_KEY, HARBOR_SERVICE_PRIVATE_KEY, and + * HARBOR_DEFAULT_SPACE_ID. Secrets live only in the gitignored env files and are + * never hardcoded. Throws if the API key is missing. + */ +export function harborConfigFromEnv( + env: Record = process.env, +): HarborConfig { + const apiKey = env.HARBOR_API_KEY; + if (!apiKey) { + throw new HarborError({ + message: + "HARBOR_API_KEY is not set. Add it to .env.local / .dev.vars before using Harbor.", + }); + } + const baseUrl = env.HARBOR_BASE_URL ?? "https://api.testnet.harbor.walrus.xyz"; + const servicePrivateKey = env.HARBOR_SERVICE_PRIVATE_KEY ?? ""; + const defaultSpaceId = env.HARBOR_DEFAULT_SPACE_ID; + return { + baseUrl, + apiKey, + servicePrivateKey, + ...(defaultSpaceId ? { defaultSpaceId } : {}), + }; +} diff --git a/packages/walrus/src/harbor/seal.ts b/packages/walrus/src/harbor/seal.ts new file mode 100644 index 0000000..f7d0e42 --- /dev/null +++ b/packages/walrus/src/harbor/seal.ts @@ -0,0 +1,183 @@ +import { EncryptedObject, SealClient, SessionKey } from "@mysten/seal"; +import { decodeSuiPrivateKey } from "@mysten/sui/cryptography"; +import { SuiJsonRpcClient } from "@mysten/sui/jsonRpc"; +import { Ed25519Keypair } from "@mysten/sui/keypairs/ed25519"; +import { Transaction } from "@mysten/sui/transactions"; +import { fromBase64, fromHex } from "@mysten/sui/utils"; + +import { + HARBOR_LATEST_PACKAGE_ID, + HARBOR_ORIGINAL_PACKAGE_ID, + SEAL_KEY_SERVER_OBJECT_IDS, + SealIdentity, + type SealIdentityInput, + SUI_TESTNET_FULLNODE, +} from "./constants.js"; +import { SealCryptoError } from "./errors.js"; + +// The seal SDK types its `suiClient` as a structural `SealCompatibleClient`. +// `SuiJsonRpcClient` satisfies it at runtime (this is exactly the quickstart +// pairing) but the nominal generics don't always line up across @mysten releases, +// so we keep one narrow cast at the construction boundary. +type SealCompatibleClient = ConstructorParameters[0]["suiClient"]; + +/** + * SealCrypto — the heart of private (encrypted) Harbor operations. + * + * All client-side Seal encryption, decryption, and Sui signing happens here. + * This **must** run locally (never on a remote server) because it holds the + * user's service private key. + * + * Plain async/await port of the Effect `SealCryptoService`. + */ +export class SealCrypto { + private readonly servicePrivateKey: string; + private keypair: Ed25519Keypair | undefined; + + // SuiJsonRpcClient + SealClient are stateless config holders (no network I/O + // until a call is made), so build them once. The keypair stays lazy so a + // missing service key never fails construction. + private readonly suiClient: SuiJsonRpcClient; + private readonly sealClient: SealClient; + + constructor(config: { servicePrivateKey: string }) { + this.servicePrivateKey = config.servicePrivateKey; + this.suiClient = new SuiJsonRpcClient({ + url: SUI_TESTNET_FULLNODE, + network: "testnet", + }); + this.sealClient = new SealClient({ + suiClient: this.suiClient as unknown as SealCompatibleClient, + serverConfigs: SEAL_KEY_SERVER_OBJECT_IDS.map((objectId) => ({ + objectId, + weight: 1, + })), + verifyKeyServers: false, // testnet convenience (matches quickstart) + }); + } + + /** Lazily decode the service private key into an Ed25519 keypair. */ + getKeypair(): Ed25519Keypair { + if (this.keypair) return this.keypair; + const raw = this.servicePrivateKey; + if (!raw || raw.length < 20) { + throw new SealCryptoError({ + message: "HARBOR_SERVICE_PRIVATE_KEY is missing or invalid", + step: "load_keypair", + }); + } + try { + const { secretKey } = decodeSuiPrivateKey(raw); + this.keypair = Ed25519Keypair.fromSecretKey(secretKey); + return this.keypair; + } catch (cause) { + throw new SealCryptoError({ + message: "Failed to decode service private key", + cause, + step: "load_keypair", + }); + } + } + + /** + * Encrypt plaintext for a private bucket. + * Returns the full encrypted object bytes ready for multipart upload. + */ + async encrypt(plaintext: Uint8Array, sealPolicyId: string): Promise { + // Each file gets a fresh 32-byte nonce. + const nonce = Array.from(crypto.getRandomValues(new Uint8Array(32))); + + const idInput: SealIdentityInput = { policyObjectId: sealPolicyId, nonce }; + const id = SealIdentity.serialize(idInput).toHex(); + + try { + const { encryptedObject } = await this.sealClient.encrypt({ + threshold: 2, + packageId: HARBOR_ORIGINAL_PACKAGE_ID, + id, + data: plaintext, + }); + return encryptedObject; + } catch (cause) { + throw new SealCryptoError({ message: "Seal encryption failed", cause, step: "encrypt" }); + } + } + + /** Decrypt a downloaded ciphertext using the bucket's sealPolicyId. */ + async decrypt(ciphertext: Uint8Array, sealPolicyId: string): Promise { + const keypair = this.getKeypair(); + + let txBytes: Uint8Array; + let sessionKey: SessionKey; + try { + const parsed = EncryptedObject.parse(ciphertext); + const idHex = parsed.id.startsWith("0x") ? parsed.id : `0x${parsed.id}`; + const idBytes = fromHex(idHex); + + // Build the access-check transaction kind (never broadcast). + const tx = new Transaction(); + tx.moveCall({ + target: `${HARBOR_LATEST_PACKAGE_ID}::bucket_policy::seal_approve`, + arguments: [tx.pure.vector("u8", Array.from(idBytes)), tx.object(sealPolicyId)], + }); + try { + txBytes = await tx.build({ client: this.suiClient, onlyTransactionKind: true }); + } catch (cause) { + throw new SealCryptoError({ + message: "Failed to build seal_approve PTB", + cause, + step: "build_ptb", + }); + } + + // SessionKey lets Seal key servers verify the caller. + try { + sessionKey = await SessionKey.create({ + address: keypair.toSuiAddress(), + packageId: HARBOR_ORIGINAL_PACKAGE_ID, + ttlMin: 10, + suiClient: this.suiClient as unknown as SealCompatibleClient, + signer: keypair, + }); + } catch (cause) { + throw new SealCryptoError({ + message: "Failed to create Seal SessionKey", + cause, + step: "session_key", + }); + } + } catch (cause) { + if (cause instanceof SealCryptoError) throw cause; + throw new SealCryptoError({ + message: "Decryption pipeline failed", + cause, + step: "decrypt", + }); + } + + try { + return await this.sealClient.decrypt({ data: ciphertext, sessionKey, txBytes }); + } catch (cause) { + throw new SealCryptoError({ message: "Seal decryption failed", cause, step: "decrypt" }); + } + } + + /** + * Sign the base64-encoded sponsored transaction bytes returned by + * POST /api/v1/spaces/{id}/buckets (reserve step). + * Returns the signature in the format Harbor expects for /finalize. + */ + async signTransactionBytes(bytesBase64: string): Promise { + const keypair = this.getKeypair(); + try { + const { signature } = await keypair.signTransaction(fromBase64(bytesBase64)); + return signature; // base64 string ready for /finalize + } catch (cause) { + throw new SealCryptoError({ + message: "Failed to sign sponsored transaction bytes", + cause, + step: "sign", + }); + } + } +} diff --git a/packages/walrus/src/harbor/types.ts b/packages/walrus/src/harbor/types.ts new file mode 100644 index 0000000..f97f6ca --- /dev/null +++ b/packages/walrus/src/harbor/types.ts @@ -0,0 +1,108 @@ +/** + * Plain-TS types for the Harbor external REST API + Seal client. + * + * Ported from the Effect reference (harbor/types.ts + HarborApiClient.ts) but + * stripped of `effect` Schema/branding — these are the same DTO shapes the + * Harbor OpenAPI returns, expressed as plain interfaces. + */ + +// Harbor IDs are opaque strings. The reference brands them via effect Schema; +// here we keep readable aliases so the DTOs document intent without a runtime cost. +export type SpaceId = string; +export type BucketId = string; +export type FileId = string; + +/** Config for {@link HarborClient} / {@link SealCrypto}. Secrets come from env at runtime. */ +export interface HarborConfig { + /** Harbor API base URL, e.g. https://api.testnet.harbor.walrus.xyz */ + readonly baseUrl: string; + /** Bearer API key (HARBOR_API_KEY). */ + readonly apiKey: string; + /** Sui service private key (suiprivkey1... / HARBOR_SERVICE_PRIVATE_KEY). Empty for metadata-only use. */ + readonly servicePrivateKey: string; + /** Optional default space (HARBOR_DEFAULT_SPACE_ID). */ + readonly defaultSpaceId?: string; +} + +// === Resource DTOs (ported from reference types.ts) === + +export interface SpaceListItem { + readonly id: SpaceId; + readonly type: "personal" | "team"; + readonly name: string; + readonly plan: "free" | "starter" | "pro" | "business"; + readonly storage_used: number; + readonly storage_cap: number; + readonly bucket_count: number; + readonly role: "owner" | "admin" | "editor" | "viewer"; + readonly created_at: string; +} + +export interface Bucket { + readonly id: BucketId; + readonly space_id: SpaceId; + readonly name: string; + readonly visibility: "public" | "private"; + readonly seal_policy_id: string | null; + readonly storage_used: number; + readonly created_at: string; + readonly updated_at: string; +} + +export interface FileSummary { + readonly id: FileId; + readonly bucket_id: BucketId; + readonly name: string; + readonly size: number; + readonly status: string; + readonly is_private: boolean; + readonly mime_type: string | null; + readonly metadata: Record | null; + readonly created_at: string; + readonly updated_at: string; +} + +// === Response envelopes (ported from reference HarborApiClient.ts) === + +export interface CreateBucketReserveResponse { + readonly bucket_id: BucketId; + readonly bytes: string; // base64 sponsored tx + readonly digest: string; + readonly state: "pending_policy"; +} + +export interface FinalizeBucketResponse { + readonly bucket_id: BucketId; + readonly seal_policy_id: string | null; + readonly state: string; +} + +export interface FileUploadResponse { + readonly data: { + readonly id: FileId; + }; +} + +export type FileUploadState = "queued" | "active" | "completed" | "failed"; + +export interface FileStatusResponse { + readonly data: { + readonly state: FileUploadState; + readonly progress?: number; + readonly error?: { code: string; message: string }; + }; +} + +export interface FileListResponse { + readonly data: readonly FileSummary[]; + readonly pagination: { + readonly limit: number; + readonly has_more: boolean; + readonly next_cursor: string | null; + }; +} + +export interface BucketListResponse { + readonly buckets: readonly Bucket[]; + readonly next_cursor: string | null; +} diff --git a/packages/walrus/src/index.ts b/packages/walrus/src/index.ts index adb72fc..38a9ad3 100644 --- a/packages/walrus/src/index.ts +++ b/packages/walrus/src/index.ts @@ -5,3 +5,4 @@ export * from "./resources.js"; export * from "./proof.js"; export * from "./storage.js"; export * from "./history.js"; +export * as harbor from "./harbor/index.js"; From c41d603ff528d75a81bf1101cee405d7c4f58a9c Mon Sep 17 00:00:00 2001 From: Harry Phan Date: Sat, 20 Jun 2026 19:15:57 +0700 Subject: [PATCH 2/3] feat(api): wire Harbor/Seal private storage into the Worker (#7) Route private-namespace artifacts through Seal encryption + Harbor storage in the hosted Worker, mirroring how MEMWAL secrets are handled: - storeNamespaceImport / readArtifact / updateNamespaceArtifact encrypt-on-write and decrypt-on-read via HarborStorage when a namespace is Harbor-backed; public namespaces keep the plaintext R2 path unchanged. - Version-scope the artifact write/classify path to current_version_id so a private edit never misclassifies a stale R2 row and leaks plaintext. - Persist-before-delete on rotation: commit the new harbor_file_id before dropping old ciphertext (no dangling pointer / silent loss). - putEncrypted: bounded settlement-retry for transient upload_funding_timeout; storeNamespaceImport: orphan-cleanup rolls back a freshly-created bucket on upload failure so a failed run leaks no bucket (live-validated). - migration 0007: add storage_provider + seal_identity_salt columns and the per-namespace Harbor bucket / Seal policy / per-artifact file pointer columns. - wrangler.example.jsonc: document HARBOR_* vars and required secrets. - scripts: live + FakeD1 round-trip smoke harness (reads creds from env only). --- apps/api/cloudflare/wrangler.example.jsonc | 7 +- .../0007_harbor_namespace_storage.sql | 24 ++ apps/api/scripts/cf-virtual-stubs.preload.ts | 33 ++ apps/api/scripts/harbor-wiring-smoke.ts | 390 ++++++++++++++++++ apps/api/src/worker.ts | 312 ++++++++++++-- packages/walrus/src/harbor/index.ts | 131 ++++-- 6 files changed, 816 insertions(+), 81 deletions(-) create mode 100644 apps/api/migrations/0007_harbor_namespace_storage.sql create mode 100644 apps/api/scripts/cf-virtual-stubs.preload.ts create mode 100644 apps/api/scripts/harbor-wiring-smoke.ts diff --git a/apps/api/cloudflare/wrangler.example.jsonc b/apps/api/cloudflare/wrangler.example.jsonc index aeecbb4..44cccf2 100644 --- a/apps/api/cloudflare/wrangler.example.jsonc +++ b/apps/api/cloudflare/wrangler.example.jsonc @@ -5,7 +5,9 @@ "compatibility_flags": ["nodejs_compat"], "main": "../src/worker.ts", "vars": { - "CONTEXTMEM_WORKER_BASE_URL": "http://localhost:8787" + "CONTEXTMEM_WORKER_BASE_URL": "http://localhost:8787", + "HARBOR_BASE_URL": "https://api.testnet.harbor.walrus.xyz", + "HARBOR_DEFAULT_SPACE_ID": "replace-with-harbor-space-id" }, "d1_databases": [ { @@ -34,6 +36,7 @@ ] }, "secrets": { - "required": ["CONTEXTMEM_NAMESPACE_IMPORT_TOKEN", "FIRECRAWL_API_KEY", "MEMWAL_ACCOUNT_ID", "MEMWAL_API_URL", "MEMWAL_PRIVATE_KEY"] + "required": ["CONTEXTMEM_NAMESPACE_IMPORT_TOKEN", "FIRECRAWL_API_KEY", "MEMWAL_ACCOUNT_ID", "MEMWAL_API_URL", "MEMWAL_PRIVATE_KEY"], + "harbor": ["HARBOR_API_KEY", "HARBOR_SERVICE_PRIVATE_KEY"] } } diff --git a/apps/api/migrations/0007_harbor_namespace_storage.sql b/apps/api/migrations/0007_harbor_namespace_storage.sql new file mode 100644 index 0000000..baec179 --- /dev/null +++ b/apps/api/migrations/0007_harbor_namespace_storage.sql @@ -0,0 +1,24 @@ +-- Harbor (Walrus) private storage for namespaces. +-- PRIVATE namespaces are Seal-encrypted in the Worker and their ciphertext is +-- stored in a per-namespace Harbor bucket instead of plaintext R2. All columns +-- are nullable so existing/public artifacts (r2_key only, no harbor_file_id) +-- keep working unchanged. D1/SQLite requires one ALTER per statement. + +-- Per-namespace Harbor bucket + Seal policy (resolved/created once, reused across versions). +ALTER TABLE contextmem_namespaces ADD COLUMN harbor_space_id TEXT; +ALTER TABLE contextmem_namespaces ADD COLUMN harbor_bucket_id TEXT; +ALTER TABLE contextmem_namespaces ADD COLUMN harbor_seal_policy_id TEXT; + +-- StorageProvider seam (#19): which backend holds this namespace's artifacts +-- ('harbor' = private/encrypted, NULL or 'r2' = public/plaintext), and the +-- per-namespace Seal identity salt (32-byte nonce, hex) so the encryption identity +-- stays stable across versions. Nullable so existing namespaces keep working. +ALTER TABLE contextmem_namespaces ADD COLUMN storage_provider TEXT; +ALTER TABLE contextmem_namespaces ADD COLUMN seal_identity_salt TEXT; + +-- Per-artifact Harbor file pointer. When set, the read path decrypts from Harbor +-- and ignores r2_key (which holds a `harbor:` sentinel to satisfy NOT NULL). +ALTER TABLE contextmem_namespace_artifacts ADD COLUMN harbor_file_id TEXT; +ALTER TABLE contextmem_namespace_artifacts ADD COLUMN harbor_bucket_id TEXT; + +CREATE INDEX IF NOT EXISTS contextmem_artifacts_harbor_idx ON contextmem_namespace_artifacts(harbor_bucket_id, harbor_file_id); diff --git a/apps/api/scripts/cf-virtual-stubs.preload.ts b/apps/api/scripts/cf-virtual-stubs.preload.ts new file mode 100644 index 0000000..3daf3ae --- /dev/null +++ b/apps/api/scripts/cf-virtual-stubs.preload.ts @@ -0,0 +1,33 @@ +/** + * Bun preload: stub Cloudflare Workers virtual modules (`cloudflare:workers`, + * `cloudflare:email`) so the worker module (which transitively imports `agents`) + * can be loaded under plain Bun for the Harbor wiring smoke test. These symbols + * are never exercised by storeNamespaceImport / readArtifact — they only need to + * resolve at import-link time. Test-only; not shipped to the Worker. + */ +import { plugin } from "bun"; + +plugin({ + name: "cf-virtual-stubs", + setup(build) { + build.module("cloudflare:workers", () => ({ + loader: "object", + exports: { + DurableObject: class DurableObject {}, + WorkflowEntrypoint: class WorkflowEntrypoint {}, + RpcTarget: class RpcTarget {}, + WorkerEntrypoint: class WorkerEntrypoint {}, + exports: {}, + env: {}, + }, + })); + build.module("cloudflare:email", () => ({ + loader: "object", + exports: { EmailMessage: class EmailMessage {} }, + })); + build.module("cloudflare:sockets", () => ({ + loader: "object", + exports: { connect: () => { throw new Error("cloudflare:sockets stub"); } }, + })); + }, +}); diff --git a/apps/api/scripts/harbor-wiring-smoke.ts b/apps/api/scripts/harbor-wiring-smoke.ts new file mode 100644 index 0000000..68a153d --- /dev/null +++ b/apps/api/scripts/harbor-wiring-smoke.ts @@ -0,0 +1,390 @@ +/** + * Harbor wiring smoke test (fallback integration harness). + * + * Drives the WORKER's OWN code path: + * storeNamespaceImport() (write) -> CloudflareNamespaceStore.readArtifact() (read) + * with in-memory fakes for D1 (bun:sqlite-backed, runs the REAL migration schema + + * REAL SQL) and R2 (Map + put-counter), and the REAL Harbor creds from .env.local + * (so the worker's own resolveHarborConfig(env) runs against real Harbor + Seal). + * + * Asserts: + * 1. PRIVATE import -> ciphertext goes to Harbor; readArtifact decrypts to EXACT plaintext. + * 2. R2 received ZERO puts for the private file (no plaintext leak). + * 3. BACKWARD-COMPAT: a legacy artifact row (r2_key set, harbor_file_id NULL) still + * reads back from R2. + * + * Cleans up the real Harbor bucket it created. + * + * Run: bun apps/api/scripts/harbor-wiring-smoke.ts (from repo root) + * + * SAFETY: reads creds only from env files; never commits/pushes/deploys; never + * writes secrets anywhere. Public-namespace / unconfigured behavior is untouched. + */ +import { Database } from "bun:sqlite"; +import { readFileSync, readdirSync } from "node:fs"; +import { join } from "node:path"; + +import { harbor } from "@contextmem/walrus"; +import { storeNamespaceImport, updateNamespaceArtifact, CloudflareNamespaceStore, type WorkerEnv } from "../src/worker.ts"; + +type ImportInput = Parameters[0]; + +const logs: string[] = []; +const log = (...parts: unknown[]) => { + const line = parts.map((p) => (typeof p === "string" ? p : JSON.stringify(p))).join(" "); + logs.push(line); + console.log(line); +}; + +function fail(msg: string): never { + log("FAIL:", msg); + console.error("\n----- LOGS -----\n" + logs.join("\n")); + process.exit(1); +} + +// ----- dotenv (read real creds; never hardcode) ----- +function loadDotenv(path: string): Record { + const out: Record = {}; + let txt: string; + try { + txt = readFileSync(path, "utf8"); + } catch { + return out; + } + for (const raw of txt.split(/\r?\n/)) { + const m = raw.match(/^\s*([A-Za-z0-9_]+)\s*=\s*(.*)\s*$/); + if (!m) continue; + let v = m[2]!; + if ((v.startsWith('"') && v.endsWith('"')) || (v.startsWith("'") && v.endsWith("'"))) v = v.slice(1, -1); + out[m[1]!] = v; + } + return out; +} + +// ----- in-memory D1 backed by a real SQLite engine (bun:sqlite) ----- +function normParam(v: unknown): unknown { + if (v === undefined) return null; + if (typeof v === "boolean") return v ? 1 : 0; + return v; +} + +class FakeStmt { + params: unknown[] = []; + constructor(private readonly db: Database, public readonly sql: string) {} + bind(...values: unknown[]): FakeStmt { + this.params = values.map(normParam); + return this; + } + async first>(): Promise { + return (this.db.query(this.sql).get(...(this.params as never[])) as T) ?? null; + } + async all>(): Promise<{ results: T[] }> { + return { results: this.db.query(this.sql).all(...(this.params as never[])) as T[] }; + } + async run(): Promise { + return this.db.query(this.sql).run(...(this.params as never[])); + } +} + +class FakeD1 { + constructor(public readonly db: Database) {} + prepare(sql: string): FakeStmt { + return new FakeStmt(this.db, sql); + } + async batch(statements: FakeStmt[]): Promise { + const tx = this.db.transaction((list: FakeStmt[]) => { + for (const s of list) this.db.query(s.sql).run(...(s.params as never[])); + }); + tx(statements); + return []; + } +} + +// ----- in-memory R2 with a put-counter ----- +class FakeR2 { + store = new Map(); + putLog: string[] = []; + async get(key: string) { + const o = this.store.get(key); + if (!o) return null; + const ab = o.bytes.buffer.slice(o.bytes.byteOffset, o.bytes.byteOffset + o.bytes.byteLength); + return { + text: async () => new TextDecoder().decode(o.bytes), + arrayBuffer: async () => ab, + }; + } + async put(key: string, value: string | ArrayBuffer | Uint8Array, opts?: { httpMetadata?: { contentType?: string } }) { + this.putLog.push(key); + let bytes: Uint8Array; + if (typeof value === "string") bytes = new TextEncoder().encode(value); + else if (value instanceof Uint8Array) bytes = value; + else bytes = new Uint8Array(value); + this.store.set(key, { bytes, contentType: opts?.httpMetadata?.contentType }); + return {}; + } + seed(key: string, bytes: Uint8Array, contentType?: string) { + this.store.set(key, { bytes, contentType }); + } +} + +function bytesEqual(a: Uint8Array, b: Uint8Array): boolean { + if (a.byteLength !== b.byteLength) return false; + for (let i = 0; i < a.byteLength; i++) if (a[i] !== b[i]) return false; + return true; +} + +async function main() { + const repoRoot = join(import.meta.dir, "..", "..", ".."); + const fileEnv = { + ...loadDotenv(join(repoRoot, "apps/api/cloudflare/.dev.vars")), + ...loadDotenv(join(repoRoot, ".env.local")), + }; + // Prefer real creds from files; fall back to process.env if present. + const get = (k: string) => fileEnv[k] ?? process.env[k]; + const HARBOR_API_KEY = get("HARBOR_API_KEY"); + const HARBOR_SERVICE_PRIVATE_KEY = get("HARBOR_SERVICE_PRIVATE_KEY"); + if (!HARBOR_API_KEY || !HARBOR_SERVICE_PRIVATE_KEY) { + fail("Harbor creds missing (HARBOR_API_KEY / HARBOR_SERVICE_PRIVATE_KEY) in .env.local / .dev.vars — cannot run round-trip."); + } + + // Build a real SQLite DB and apply the real migrations (0001..0007). + const db = new Database(":memory:"); + db.exec("PRAGMA foreign_keys = ON;"); + const migDir = join(repoRoot, "apps/api/migrations"); + const migFiles = readdirSync(migDir).filter((f) => f.endsWith(".sql")).sort(); + for (const f of migFiles) { + db.exec(readFileSync(join(migDir, f), "utf8")); + } + log("migrations applied:", migFiles.join(", ")); + + const r2 = new FakeR2(); + const env: WorkerEnv = { + CONTEXTMEM_DB: new FakeD1(db) as unknown as WorkerEnv["CONTEXTMEM_DB"], + CONTEXTMEM_CONTEXT_BUCKET: r2 as unknown as WorkerEnv["CONTEXTMEM_CONTEXT_BUCKET"], + CONTEXTMEM_WORKER_BASE_URL: "https://harbor-smoke.local", + HARBOR_BASE_URL: get("HARBOR_BASE_URL"), + HARBOR_API_KEY, + HARBOR_SERVICE_PRIVATE_KEY, + HARBOR_DEFAULT_SPACE_ID: get("HARBOR_DEFAULT_SPACE_ID"), + }; + log("HARBOR_BASE_URL:", env.HARBOR_BASE_URL ?? "(default)"); + log("HARBOR_DEFAULT_SPACE_ID set:", Boolean(env.HARBOR_DEFAULT_SPACE_ID)); + + // ============ 1) PRIVATE import round-trip ============ + // Use a /site/*.md artifact so the SAME private bucket also exercises the + // inline edit path (updateNamespaceArtifact only edits /site/*.md). + const namespace = `harbor-smoke-${Date.now()}`; + const artifactPath = "/site/page.md"; + const PLAINTEXT = `harbor-smoke PRIVATE payload :: ${crypto.randomUUID()} :: unicode 🔐✓ :: ${new Date().toISOString()}`; + const plaintextBytes = new TextEncoder().encode(PLAINTEXT); + + const input: ImportInput = { + namespace, + visibility: "private", + ownerId: "anonymous", + tags: [], + sourceType: "import", + directoryEnabled: false, + target: "https://example.com/harbor-smoke", + buildKind: "single", + manifest: { schemaVersion: 1, generatedBy: "harbor-wiring-smoke" }, + files: [{ path: artifactPath, contentType: "text/plain; charset=utf-8", encoding: "utf8", content: PLAINTEXT }], + } as ImportInput; + + const request = new Request("https://harbor-smoke.local/api/namespaces/import", { method: "POST" }); + + log("\n[1] storeNamespaceImport (private)... encrypting + uploading to Harbor (may take ~10-60s for finalize+mirror-grant+poll)"); + let importResult: Awaited>; + try { + importResult = await storeNamespaceImport(input, request, env); + } catch (err) { + fail(`storeNamespaceImport threw: ${(err as Error).stack ?? String(err)}`); + } + log("import response versionId:", importResult.versionId, "visibility:", importResult.visibility); + + // Inspect persisted rows. + const nsRow = db + .query("SELECT harbor_space_id, harbor_bucket_id, harbor_seal_policy_id FROM contextmem_namespaces WHERE namespace = ?") + .get(namespace) as { harbor_space_id: string | null; harbor_bucket_id: string | null; harbor_seal_policy_id: string | null } | null; + const artRow = db + .query("SELECT path, r2_key, harbor_file_id, harbor_bucket_id FROM contextmem_namespace_artifacts WHERE namespace = ?") + .get(namespace) as { path: string; r2_key: string; harbor_file_id: string | null; harbor_bucket_id: string | null } | null; + + log("namespace row harbor:", nsRow); + log("artifact row harbor:", artRow); + + const bucketId = nsRow?.harbor_bucket_id ?? null; + const fileId = artRow?.harbor_file_id ?? null; + const sealPolicyId = nsRow?.harbor_seal_policy_id ?? null; + + if (!bucketId) fail("namespace row has no harbor_bucket_id after private import"); + if (!sealPolicyId) fail("namespace row has no harbor_seal_policy_id after private import"); + if (!fileId) fail("artifact row has no harbor_file_id after private import"); + if (artRow!.r2_key !== `harbor:${fileId}`) fail(`artifact r2_key sentinel wrong: ${artRow!.r2_key} (expected harbor:${fileId})`); + if (artRow!.harbor_bucket_id !== bucketId) fail("artifact harbor_bucket_id != namespace harbor_bucket_id"); + + // No-plaintext-leak assertion. + if (r2.putLog.length !== 0) fail(`R2 received ${r2.putLog.length} put(s) during private import (expected 0): ${r2.putLog.join(", ")}`); + log("R2 put count during private import:", r2.putLog.length, "(expected 0) -> NO PLAINTEXT LEAK"); + + // ============ 2) read back through worker code -> decrypt -> exact plaintext ============ + log("\n[2] CloudflareNamespaceStore.readArtifact (private)... downloading + Seal-decrypt"); + const store = new CloudflareNamespaceStore(env); + let readBack: Awaited>; + try { + readBack = await store.readArtifact(namespace, artifactPath); + } catch (err) { + fail(`readArtifact threw: ${(err as Error).stack ?? String(err)}`); + } + if (!readBack) fail("readArtifact returned undefined for the private Harbor artifact"); + if (readBack.encoding !== "utf8") fail(`expected utf8 encoding, got ${readBack.encoding}`); + const readBytes = new TextEncoder().encode(readBack.content); + const exact = readBack.content === PLAINTEXT; + const byteEq = bytesEqual(readBytes, plaintextBytes); + log("readback content === sent:", exact, "| byte-equal:", byteEq, "| bytes:", plaintextBytes.byteLength); + if (!exact || !byteEq) { + log("SENT :", JSON.stringify(PLAINTEXT)); + log("GOT :", JSON.stringify(readBack.content)); + fail("round-trip byte mismatch"); + } + + // Crucial: R2 was NOT touched by the private read either. + log("R2 put count after private read:", r2.putLog.length, "(still 0)"); + + // ============ 2.5) PRIVATE inline edit (updateNamespaceArtifact) ============ + // The edit MUST re-encrypt to Harbor and produce ZERO R2 puts (no plaintext + // leak), rotate harbor_file_id, and read back as the EDITED plaintext. + log("\n[2.5] updateNamespaceArtifact (private /site/*.md edit)... re-encrypt + re-upload to Harbor"); + + // Regression seed for the version-scoping fix (adversarial review must-fix #1/#2): + // a STALE older-version row for the SAME path, R2-only (harbor_file_id NULL), with + // a version_id that sorts BEFORE the current one. An UNSCOPED classification + // .first() walks the (namespace,version_id,path) PK index oldest-first and would + // return THIS row -> misclassify the private edit as plaintext-to-R2 (the leak). + const staleVersion = `0000-stale-${Date.now()}`; + const staleR2Key = `namespaces/stale/${staleVersion}${artifactPath}`; + r2.seed(staleR2Key, new TextEncoder().encode("STALE old-version plaintext"), "text/markdown; charset=utf-8"); + const staleIso = new Date().toISOString(); + db.query( + "INSERT INTO contextmem_namespace_versions (id, namespace, manifest_json, artifact_count, byte_length, created_at) VALUES (?,?,?,?,?,?)" + ).run(staleVersion, namespace, JSON.stringify({ schemaVersion: 1 }), 1, 27, staleIso); + db.query( + "INSERT INTO contextmem_namespace_artifacts (namespace, version_id, path, r2_key, content_type, kind, size, sha256, updated_at) VALUES (?,?,?,?,?,?,?,?,?)" + ).run(namespace, staleVersion, artifactPath, staleR2Key, "text/markdown; charset=utf-8", "text", 27, null, staleIso); + log("seeded stale older-version R2 row at version", staleVersion, "(unscoped .first() would pick this)"); + + const r2PutsBeforeEdit = r2.putLog.length; // expected 0 so far (seed uses .seed, not .put) + const EDITED = `harbor-smoke EDITED payload :: ${crypto.randomUUID()} :: 🔁🔐 :: ${new Date().toISOString()}`; + const editReq = new Request( + "https://harbor-smoke.local/api/namespaces/" + encodeURIComponent(namespace) + "/artifact-edit", + { + method: "POST", + headers: { "content-type": "application/json", "x-memwal-account-id": "anonymous" }, + body: JSON.stringify({ path: artifactPath, content: EDITED }), + } + ); + let editResp: Response; + try { + editResp = await updateNamespaceArtifact(editReq, env, namespace); + } catch (err) { + fail(`updateNamespaceArtifact threw: ${(err as Error).stack ?? String(err)}`); + } + const editJson = (await editResp.json().catch(() => ({}))) as { ok?: boolean; error?: string }; + log("edit response status:", editResp.status, "body:", editJson); + if (editResp.status !== 200 || editJson.ok !== true) { + fail(`edit did not succeed: status=${editResp.status} body=${JSON.stringify(editJson)}`); + } + + // No-plaintext-leak assertion for the EDIT path (the bug this fix closes). + const r2PutsFromEdit = r2.putLog.length - r2PutsBeforeEdit; + if (r2PutsFromEdit !== 0) fail(`R2 received ${r2PutsFromEdit} put(s) during private edit (expected 0): ${r2.putLog.join(", ")}`); + log("R2 put count from edit:", r2PutsFromEdit, "(expected 0) -> NO PLAINTEXT LEAK ON EDIT"); + + // harbor_file_id must rotate (new ciphertext) and the r2_key sentinel must follow. + const artRowAfter = db + .query("SELECT r2_key, harbor_file_id, harbor_bucket_id, size FROM contextmem_namespace_artifacts WHERE namespace = ? AND path = ?") + .get(namespace, artifactPath) as { r2_key: string; harbor_file_id: string | null; harbor_bucket_id: string | null; size: number } | null; + log("artifact row after edit:", artRowAfter); + if (!artRowAfter?.harbor_file_id) fail("artifact has no harbor_file_id after edit"); + if (artRowAfter.harbor_file_id === fileId) fail("harbor_file_id did NOT rotate on edit (same ciphertext file id)"); + if (artRowAfter.r2_key !== `harbor:${artRowAfter.harbor_file_id}`) fail(`edit r2_key sentinel wrong: ${artRowAfter.r2_key}`); + if (artRowAfter.harbor_bucket_id !== bucketId) fail("edit changed harbor_bucket_id unexpectedly"); + + // The stale older-version row must be UNTOUCHED (UPDATE is version-scoped, must-fix #2). + const staleAfter = db + .query("SELECT r2_key, harbor_file_id FROM contextmem_namespace_artifacts WHERE namespace = ? AND version_id = ? AND path = ?") + .get(namespace, staleVersion, artifactPath) as { r2_key: string; harbor_file_id: string | null } | null; + if (!staleAfter || staleAfter.r2_key !== staleR2Key || staleAfter.harbor_file_id !== null) { + fail(`edit corrupted the stale older-version row (UPDATE not version-scoped): ${JSON.stringify(staleAfter)}`); + } + log("stale older-version row untouched by edit (UPDATE version-scoped):", staleAfter.r2_key); + + // Read back the EDITED plaintext through the worker (downloads + Seal-decrypt). + const editedRead = await store.readArtifact(namespace, artifactPath); + if (!editedRead) fail("readArtifact returned undefined after edit"); + const editExact = editedRead.content === EDITED; + log("edited readback content === EDITED:", editExact, "| new fileId:", artRowAfter.harbor_file_id); + if (!editExact) { + log("EXPECTED:", JSON.stringify(EDITED)); + log("GOT :", JSON.stringify(editedRead.content)); + fail("edited round-trip mismatch"); + } + + // ============ 3) BACKWARD-COMPAT: legacy R2-only artifact (no harbor_file_id) ============ + log("\n[3] backward-compat: legacy R2 artifact (r2_key set, harbor_file_id NULL)"); + const legacyNs = `legacy-r2-${Date.now()}`; + const legacyVersion = `v-legacy-${Date.now()}`; + const legacyPath = "/legacy.txt"; + const legacyR2Key = `namespaces/legacy/${legacyVersion}${legacyPath}`; + const legacyContent = `LEGACY r2 plaintext :: ${crypto.randomUUID()}`; + r2.seed(legacyR2Key, new TextEncoder().encode(legacyContent), "text/plain; charset=utf-8"); + const nowIso = new Date().toISOString(); + db.query( + "INSERT INTO contextmem_namespaces (namespace, target, visibility, current_version_id, manifest_json, artifact_count, byte_length, created_at, updated_at) VALUES (?,?,?,?,?,?,?,?,?)" + ).run(legacyNs, "https://example.com/legacy", "public", legacyVersion, JSON.stringify({ schemaVersion: 1 }), 1, legacyContent.length, nowIso, nowIso); + db.query( + "INSERT INTO contextmem_namespace_versions (id, namespace, manifest_json, artifact_count, byte_length, created_at) VALUES (?,?,?,?,?,?)" + ).run(legacyVersion, legacyNs, JSON.stringify({ schemaVersion: 1 }), 1, legacyContent.length, nowIso); + // harbor_file_id / harbor_bucket_id intentionally omitted -> NULL. + db.query( + "INSERT INTO contextmem_namespace_artifacts (namespace, version_id, path, r2_key, content_type, kind, size, sha256, updated_at) VALUES (?,?,?,?,?,?,?,?,?)" + ).run(legacyNs, legacyVersion, legacyPath, legacyR2Key, "text/plain; charset=utf-8", "text", legacyContent.length, null, nowIso); + + const legacyRead = await store.readArtifact(legacyNs, legacyPath); + if (!legacyRead) fail("legacy readArtifact returned undefined"); + const legacyOk = legacyRead.encoding === "utf8" && legacyRead.content === legacyContent; + log("legacy read from R2 ok:", legacyOk, "| encoding:", legacyRead.encoding); + if (!legacyOk) { + log("legacy SENT:", JSON.stringify(legacyContent), "GOT:", JSON.stringify(legacyRead.content)); + fail("backward-compat (R2 fallback) failed"); + } + + // ============ cleanup: delete the real Harbor bucket we created ============ + log("\n[cleanup] deleting Harbor bucket", bucketId); + let cleanup = "skipped"; + try { + const cfg = harbor.harborConfigFromEnv({ + HARBOR_BASE_URL: env.HARBOR_BASE_URL, + HARBOR_API_KEY: env.HARBOR_API_KEY, + HARBOR_SERVICE_PRIVATE_KEY: env.HARBOR_SERVICE_PRIVATE_KEY, + HARBOR_DEFAULT_SPACE_ID: env.HARBOR_DEFAULT_SPACE_ID, + }); + const res = await new harbor.HarborStorage(cfg).client.deleteBucket(bucketId); + cleanup = `deleted ${JSON.stringify(res)}`; + } catch (err) { + cleanup = `cleanup-failed (non-fatal): ${(err as Error).message}`; + } + log("[cleanup]", cleanup); + + log("\n===== RESULT: PASS ====="); + log("harbor bucketId:", bucketId); + log("harbor fileId:", fileId); + log("harbor sealPolicyId:", sealPolicyId); + log("harbor spaceId:", nsRow?.harbor_space_id ?? "(null)"); + log("round-trip byte-equal:", byteEq); + log("R2 plaintext puts for private file:", 0); + log("inline-edit re-encrypted to Harbor (0 R2 puts), fileId rotated:", artRowAfter?.harbor_file_id ?? "(null)"); + log("backward-compat R2 fallback:", legacyOk); +} + +main().catch((err) => fail((err as Error).stack ?? String(err))); diff --git a/apps/api/src/worker.ts b/apps/api/src/worker.ts index cf352e9..b4405be 100644 --- a/apps/api/src/worker.ts +++ b/apps/api/src/worker.ts @@ -12,6 +12,11 @@ import { type HostedNamespaceVisibility } from "@contextmem/mcp/hosted"; import { MemWalMcpClient } from "@contextmem/memwal"; +// Harbor + Seal client (on-chain-verified). Used to encrypt private-namespace +// artifacts in the Worker and store the ciphertext in a per-namespace Harbor +// bucket instead of writing plaintext to R2. See resolveHarborConfig + the +// private branch of storeNamespaceImport / readArtifact below. +import { harbor } from "@contextmem/walrus"; // Runtime fns imported from leaf SUBPATHS (not the "@contextmem/core" barrel) so // esbuild does NOT bundle web.ts/html.ts -> cheerio + @mozilla/readability, whose // top-level `__dirname` reference is undefined in the Workers runtime. @@ -52,6 +57,14 @@ export type WorkerEnv = { OPENAI_API_KEY?: string; OPENAI_BASE_URL?: string; OPENAI_MODEL?: string; + // Harbor (Walrus) private storage. When HARBOR_API_KEY + HARBOR_SERVICE_PRIVATE_KEY + // are set, PRIVATE namespaces are Seal-encrypted in the Worker and stored to a + // Harbor bucket instead of R2. All optional → graceful degradation to R2 when unset. + // Mirrors the MEMWAL_* secret pattern (HARBOR_SERVICE_PRIVATE_KEY is a Worker secret). + HARBOR_BASE_URL?: string; + HARBOR_API_KEY?: string; + HARBOR_SERVICE_PRIVATE_KEY?: string; + HARBOR_DEFAULT_SPACE_ID?: string; }; type WorkersAiBinding = { @@ -144,6 +157,9 @@ type ArtifactRow = { size: number; sha256?: string | null; updated_at: string; + // Harbor (private, Seal-encrypted) storage. Null for legacy/public R2 artifacts. + harbor_file_id?: string | null; + harbor_bucket_id?: string | null; }; type ExtractionJobRow = { @@ -456,14 +472,14 @@ export class CloudflareNamespaceStore implements HostedNamespaceStore { const versionId = this.versionFor(namespace); const statement = versionId ? this.env.CONTEXTMEM_DB.prepare( - `SELECT a.namespace, a.version_id, a.path, a.r2_key, a.content_type, a.kind, a.size, a.sha256, a.updated_at + `SELECT a.namespace, a.version_id, a.path, a.r2_key, a.content_type, a.kind, a.size, a.sha256, a.updated_at, a.harbor_file_id, a.harbor_bucket_id FROM contextmem_namespace_artifacts a WHERE a.namespace = ? AND a.version_id = ? AND a.path = ?` ).bind(namespace, versionId, normalizedPath) : this.env.CONTEXTMEM_DB.prepare( - `SELECT a.namespace, a.version_id, a.path, a.r2_key, a.content_type, a.kind, a.size, a.sha256, a.updated_at + `SELECT a.namespace, a.version_id, a.path, a.r2_key, a.content_type, a.kind, a.size, a.sha256, a.updated_at, a.harbor_file_id, a.harbor_bucket_id FROM contextmem_namespace_artifacts a JOIN contextmem_namespaces n ON n.namespace = a.namespace @@ -474,6 +490,36 @@ export class CloudflareNamespaceStore implements HostedNamespaceStore { const row = await statement.first(); if (!row) return undefined; + // Harbor-backed (private, Seal-encrypted) artifact: decrypt in the Worker + // instead of reading from R2. Keys off harbor_file_id FIRST so legacy/public + // R2 artifacts (no harbor_file_id) fall through to the unchanged R2 path below. + if (row.harbor_file_id && row.harbor_bucket_id) { + const cfg = resolveHarborConfig(this.env); + // Fail closed: artifact is Harbor-only, so we cannot serve it without creds. + if (!cfg) return undefined; + // seal_policy_id lives on the namespace row, not the artifact row. + const policyRow = await this.env.CONTEXTMEM_DB.prepare( + `SELECT harbor_seal_policy_id FROM contextmem_namespaces WHERE namespace = ?` + ) + .bind(namespace) + .first<{ harbor_seal_policy_id: string | null }>(); + const sealPolicyId = policyRow?.harbor_seal_policy_id; + if (!sealPolicyId) return undefined; + const bytes = await new harbor.HarborStorage(cfg).getDecrypted(row.harbor_bucket_id, sealPolicyId, row.harbor_file_id); + const record = { + path: row.path, + contentType: row.content_type, + kind: row.kind, + size: Number(row.size), + sha256: row.sha256 ?? undefined, + updatedAt: row.updated_at + }; + if (isHostedTextArtifact(record)) { + return { ...record, encoding: "utf8", content: new TextDecoder().decode(bytes) }; + } + return { ...record, encoding: "base64", content: arrayBufferToBase64(toArrayBuffer(bytes)) }; + } + const object = await this.env.CONTEXTMEM_CONTEXT_BUCKET.get(row.r2_key); if (!object) return undefined; @@ -1101,7 +1147,7 @@ async function importNamespace(request: Request, env: WorkerEnv): Promise { +export async function updateNamespaceArtifact(request: Request, env: WorkerEnv, namespace: string): Promise { const body = await request.json().catch(() => null); const parsed = z.object({ path: z.string().min(1), content: z.string() }).safeParse(body); if (!parsed.success) return json({ error: "Body must be { path: string, content: string }." }, 400); @@ -1111,10 +1157,10 @@ async function updateNamespaceArtifact(request: Request, env: WorkerEnv, namespa } const row = await env.CONTEXTMEM_DB.prepare( - `SELECT namespace, owner_id, current_version_id, byte_length, manifest_json FROM contextmem_namespaces WHERE namespace = ?` + `SELECT namespace, owner_id, current_version_id, byte_length, manifest_json, harbor_seal_policy_id FROM contextmem_namespaces WHERE namespace = ?` ) .bind(namespace) - .first<{ namespace: string; owner_id: string; current_version_id: string; byte_length: number; manifest_json: string | null }>(); + .first<{ namespace: string; owner_id: string; current_version_id: string; byte_length: number; manifest_json: string | null; harbor_seal_policy_id: string | null }>(); if (!row) return json({ error: "Namespace not found." }, 404); const providedOwner = request.headers.get("x-memwal-account-id") ?? new URL(request.url).searchParams.get("ownerId") ?? ""; @@ -1129,20 +1175,73 @@ async function updateNamespaceArtifact(request: Request, env: WorkerEnv, namespa } const store = new CloudflareNamespaceStore(env); - const existing = await store.readArtifact(namespace, artifactPath); + let existing: Awaited>; + try { + existing = await store.readArtifact(namespace, artifactPath); + } catch { + // Harbor-backed reads hit the network + Seal decrypt; a transient blip should + // surface as 503, not an uncaught 500. + return json({ error: "Could not read the current artifact (storage temporarily unavailable). Try again shortly." }, 503); + } if (!existing) return json({ error: "Artifact not found in this namespace." }, 404); const newBytes = new TextEncoder().encode(parsed.data.content); - const r2Key = `namespaces/${await sha256Hex(namespace)}/${row.current_version_id}${artifactPath}`; - await env.CONTEXTMEM_CONTEXT_BUCKET.put(r2Key, newBytes, { - httpMetadata: { contentType: existing.contentType ?? "text/markdown; charset=utf-8" }, - customMetadata: { - namespace, - versionId: row.current_version_id, - path: artifactPath, - sha256: await sha256Hex(newBytes) + const newSha = await sha256Hex(newBytes); + + // Harbor-aware write: if this artifact lives in a Seal-encrypted Harbor bucket, + // the edit MUST be re-encrypted and re-uploaded to Harbor — never written as + // plaintext to R2. A plaintext R2 write here would both (a) leak the private + // content and (b) be silently ignored, since readArtifact keys off + // harbor_file_id first. Public / legacy artifacts keep the R2 path. + // Classify against the SAME row readArtifact serves: scope to the current + // version. The artifacts PK is (namespace, version_id, path) and old-version + // rows are never pruned, so an unscoped .first() can return a stale R2 row and + // misclassify a private Harbor edit as plaintext-to-R2 (the original leak). + const artStorage = await env.CONTEXTMEM_DB.prepare( + `SELECT harbor_file_id, harbor_bucket_id FROM contextmem_namespace_artifacts WHERE namespace = ? AND version_id = ? AND path = ?` + ) + .bind(namespace, row.current_version_id, artifactPath) + .first<{ harbor_file_id: string | null; harbor_bucket_id: string | null }>(); + const harborBacked = Boolean(artStorage?.harbor_file_id && artStorage?.harbor_bucket_id); + const harborCfg = harborBacked ? resolveHarborConfig(env) : null; + + let harborNewFileId: string | null = null; + let harborStorage: harbor.HarborStorage | null = null; + let oldHarborFileId: string | null = null; + let oldHarborBucketId: string | null = null; + if (harborBacked) { + if (!harborCfg || !row.harbor_seal_policy_id) { + // Fail closed: encrypted artifact but Harbor unavailable — do NOT fall back + // to a plaintext R2 write. + return json( + { error: "This namespace is Seal-encrypted (Harbor) but Harbor is not configured on this server; cannot edit." }, + 503 + ); } - }); + oldHarborBucketId = artStorage!.harbor_bucket_id as string; + oldHarborFileId = artStorage!.harbor_file_id as string; + harborStorage = new harbor.HarborStorage(harborCfg); + const harborFileName = artifactPath.replace(/^\/+/, "").replace(/\/+/g, "_") || "artifact"; + harborNewFileId = await harborStorage.putEncrypted( + oldHarborBucketId, + row.harbor_seal_policy_id, + newBytes, + harborFileName + ); + // NB: the superseded ciphertext is deleted only AFTER the new harbor_file_id is + // durably persisted to D1 (below). Deleting here would risk a dangling pointer. + } else { + const r2Key = `namespaces/${await sha256Hex(namespace)}/${row.current_version_id}${artifactPath}`; + await env.CONTEXTMEM_CONTEXT_BUCKET.put(r2Key, newBytes, { + httpMetadata: { contentType: existing.contentType ?? "text/markdown; charset=utf-8" }, + customMetadata: { + namespace, + versionId: row.current_version_id, + path: artifactPath, + sha256: newSha + } + }); + } let manifestJson = row.manifest_json; if (manifestJson) { @@ -1170,12 +1269,38 @@ async function updateNamespaceArtifact(request: Request, env: WorkerEnv, namespa .run(); void updates; - await env.CONTEXTMEM_DB.prepare( - `UPDATE contextmem_namespace_artifacts SET size = ?, sha256 = ?, updated_at = ? WHERE namespace = ? AND path = ?` - ) - .bind(newBytes.byteLength, await sha256Hex(newBytes), new Date().toISOString(), namespace, artifactPath) - .run() - .catch(() => undefined); + if (harborNewFileId) { + // Persist the new ciphertext pointer BEFORE deleting the old file, and do NOT + // swallow this error: a dangling harbor_file_id would make the private artifact + // permanently unreadable. Until this commits, D1 still points at the old file + // (still present), so readArtifact keeps serving the pre-edit content. + try { + await env.CONTEXTMEM_DB.prepare( + `UPDATE contextmem_namespace_artifacts SET size = ?, sha256 = ?, harbor_file_id = ?, r2_key = ?, updated_at = ? WHERE namespace = ? AND version_id = ? AND path = ?` + ) + .bind(newBytes.byteLength, newSha, harborNewFileId, `harbor:${harborNewFileId}`, new Date().toISOString(), namespace, row.current_version_id, artifactPath) + .run(); + } catch { + // Rotation failed: the new file is an orphan, the old file + pointer are intact, + // so the artifact still reads back as the pre-edit content. Surface the failure. + return json({ error: "Failed to persist the encrypted edit; the previous version is unchanged." }, 500); + } + // Pointer durably rotated -> the superseded ciphertext is now safe to remove. + if (harborStorage && oldHarborBucketId && oldHarborFileId) { + try { + await harborStorage.client.deleteBucketFile(oldHarborBucketId, oldHarborFileId); + } catch { + // orphaned old file is harmless; non-fatal + } + } + } else { + await env.CONTEXTMEM_DB.prepare( + `UPDATE contextmem_namespace_artifacts SET size = ?, sha256 = ?, updated_at = ? WHERE namespace = ? AND version_id = ? AND path = ?` + ) + .bind(newBytes.byteLength, newSha, new Date().toISOString(), namespace, row.current_version_id, artifactPath) + .run() + .catch(() => undefined); + } return json({ ok: true, path: artifactPath, size: newBytes.byteLength }); } @@ -1752,7 +1877,7 @@ async function listAlerts(request: Request, env: WorkerEnv): Promise { return json({ alerts: allResults(result).map(publicAlert) }); } -async function storeNamespaceImport(input: NamespaceImportInput, request: Request, env: WorkerEnv) { +export async function storeNamespaceImport(input: NamespaceImportInput, request: Request, env: WorkerEnv) { const namespace = normalizeNamespace(input.namespace); const now = new Date().toISOString(); const versionId = createVersionId(); @@ -1781,24 +1906,96 @@ async function storeNamespaceImport(input: NamespaceImportInput, request: Reques ); const totalBytes = files.reduce((sum, file) => sum + file.size, 0); - await Promise.all( - files.map((file) => - env.CONTEXTMEM_CONTEXT_BUCKET.put(file.r2Key, file.bytes, { - httpMetadata: { contentType: file.contentType }, - customMetadata: { - namespace, - versionId, - path: file.path, - sha256: file.sha256 - } - }) + // --- Storage backend: Harbor (private + encrypted) vs R2 (public/plaintext) --- + // For PRIVATE namespaces, when Harbor is configured we Seal-encrypt each artifact + // in the Worker and upload the ciphertext to a per-namespace Harbor bucket — and + // we do NOT write plaintext bytes to R2. Public namespaces (and any namespace when + // Harbor is not configured) keep using R2 exactly as before. Fail-closed: a Harbor + // bucket-create/upload error propagates and aborts the import rather than silently + // leaking plaintext to R2. + const isPrivate = input.visibility === "private"; + const harborCfg = isPrivate ? resolveHarborConfig(env) : null; + let harborSpaceId: string | null = null; + let harborBucketId: string | null = null; + let harborSealPolicyId: string | null = null; + // harbor_file_id per artifact, aligned to `files` by index (null when stored in R2). + const harborFileIds: Array = files.map(() => null); + + if (harborCfg) { + const storage = new harbor.HarborStorage(harborCfg); + // Track whether THIS call created the bucket, so a later upload failure can roll + // it back (a leaked bucket counts against the space's bucket cap). Never delete a + // reused/pre-existing bucket — it may hold other versions' ciphertext. + let createdNewBucket = false; + // Reuse the namespace's existing bucket across re-imports so every version lands + // under the SAME Seal policy; only create a bucket on first import. + const existing = await env.CONTEXTMEM_DB.prepare( + `SELECT harbor_space_id, harbor_bucket_id, harbor_seal_policy_id FROM contextmem_namespaces WHERE namespace = ?` ) - ); + .bind(namespace) + .first<{ harbor_space_id: string | null; harbor_bucket_id: string | null; harbor_seal_policy_id: string | null }>(); + if (existing?.harbor_bucket_id && existing.harbor_seal_policy_id) { + harborSpaceId = existing.harbor_space_id ?? harborCfg.defaultSpaceId ?? null; + harborBucketId = existing.harbor_bucket_id; + harborSealPolicyId = existing.harbor_seal_policy_id; + } else { + // Resolve the space: configured default, else the first personal space. + let spaceId = harborCfg.defaultSpaceId; + if (!spaceId) { + const spaces = await storage.client.listSpaces({ type: "personal" }); + spaceId = spaces[0]?.id; + if (!spaceId) throw new Error("Harbor: no personal space available to create a private bucket."); + } + const created = await storage.createPrivateBucket(spaceId, `ctxm-ns-${await sha256Hex(namespace)}`); + if (!created.sealPolicyId) throw new Error(`Harbor: bucket ${created.bucketId} has no seal policy id (private bucket expected).`); + harborSpaceId = spaceId; + harborBucketId = created.bucketId; + harborSealPolicyId = created.sealPolicyId; + createdNewBucket = true; + } + // Encrypt + upload each artifact (ciphertext only; NO plaintext to R2). + // Harbor rejects file names containing slashes, but artifact paths are nested + // (e.g. "/context/facts.json"). Flatten to a slash-free name — the real path + // lives in D1 and retrieval is keyed by harbor_file_id, so this name is only a + // display/content-type hint. Keep the extension so contentTypeFromName works. + try { + for (let i = 0; i < files.length; i += 1) { + const harborFileName = files[i]!.path.replace(/^\/+/, "").replace(/\/+/g, "_") || "artifact"; + harborFileIds[i] = await storage.putEncrypted(harborBucketId, harborSealPolicyId, files[i]!.bytes, harborFileName); + } + } catch (err) { + // Roll back a freshly-created bucket so a failed first import doesn't leak a + // bucket against the space cap. Reused buckets are left intact. + if (createdNewBucket && harborBucketId) { + try { + await storage.client.deleteBucket(harborBucketId); + } catch { + // best-effort; surfacing the original upload error matters more + } + } + throw err; + } + } else { + // Public namespace, or Harbor not configured: store plaintext bytes in R2 as today. + await Promise.all( + files.map((file) => + env.CONTEXTMEM_CONTEXT_BUCKET.put(file.r2Key, file.bytes, { + httpMetadata: { contentType: file.contentType }, + customMetadata: { + namespace, + versionId, + path: file.path, + sha256: file.sha256 + } + }) + ) + ); + } const statements = [ env.CONTEXTMEM_DB.prepare( - `INSERT INTO contextmem_namespaces (namespace, target, visibility, owner_id, display_name, description, tags_json, source_type, directory_enabled, current_version_id, source_run_id, build_kind, sources_json, source_count, manifest_json, artifact_count, byte_length, created_at, updated_at) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + `INSERT INTO contextmem_namespaces (namespace, target, visibility, owner_id, display_name, description, tags_json, source_type, directory_enabled, current_version_id, source_run_id, build_kind, sources_json, source_count, manifest_json, artifact_count, byte_length, harbor_space_id, harbor_bucket_id, harbor_seal_policy_id, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(namespace) DO UPDATE SET target = excluded.target, visibility = excluded.visibility, @@ -1816,6 +2013,9 @@ async function storeNamespaceImport(input: NamespaceImportInput, request: Reques manifest_json = excluded.manifest_json, artifact_count = excluded.artifact_count, byte_length = excluded.byte_length, + harbor_space_id = COALESCE(excluded.harbor_space_id, harbor_space_id), + harbor_bucket_id = COALESCE(excluded.harbor_bucket_id, harbor_bucket_id), + harbor_seal_policy_id = COALESCE(excluded.harbor_seal_policy_id, harbor_seal_policy_id), updated_at = excluded.updated_at` ).bind( namespace, @@ -1835,6 +2035,9 @@ async function storeNamespaceImport(input: NamespaceImportInput, request: Reques JSON.stringify(input.manifest), files.length, totalBytes, + harborSpaceId, + harborBucketId, + harborSealPolicyId, now, now ), @@ -1847,12 +2050,16 @@ async function storeNamespaceImport(input: NamespaceImportInput, request: Reques VALUES (?, ?, ?, ?, ?)` ).bind(tokenHash, tokenId, namespace, `import:${versionId}`, now), env.CONTEXTMEM_DB.prepare(`DELETE FROM contextmem_namespace_artifacts WHERE namespace = ? AND version_id = ?`).bind(namespace, versionId), - ...files.map((file) => - env.CONTEXTMEM_DB.prepare( - `INSERT INTO contextmem_namespace_artifacts (namespace, version_id, path, r2_key, content_type, kind, size, sha256, updated_at) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)` - ).bind(namespace, versionId, file.path, file.r2Key, file.contentType, file.kind, file.size, file.sha256, now) - ) + ...files.map((file, i) => { + const harborFileId = harborFileIds[i]; + // r2_key is NOT NULL: for Harbor artifacts store a `harbor:` sentinel + // (the read path keys off harbor_file_id first, never this value). + const r2KeyValue = harborFileId ? `harbor:${harborFileId}` : file.r2Key; + return env.CONTEXTMEM_DB.prepare( + `INSERT INTO contextmem_namespace_artifacts (namespace, version_id, path, r2_key, content_type, kind, size, sha256, harbor_file_id, harbor_bucket_id, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` + ).bind(namespace, versionId, file.path, r2KeyValue, file.contentType, file.kind, file.size, file.sha256, harborFileId, harborFileId ? harborBucketId : null, now); + }) ]; if (env.CONTEXTMEM_DB.batch) await env.CONTEXTMEM_DB.batch(statements); @@ -3351,6 +3558,27 @@ function resolveMemwalCreds(request: Request, env: WorkerEnv): { url: string; pr return { url, privateKey: headerKey ?? envKey, accountId: headerAccount ?? envAccount }; } +// Harbor (Walrus private storage) is "configured" only when BOTH the API key and +// the Seal service private key are present. baseUrl + defaultSpaceId are optional. +// Gate ALL Harbor behavior on this so an unconfigured Worker degrades to plain R2. +function isHarborConfigured(env: WorkerEnv): boolean { + return Boolean(env.HARBOR_API_KEY && env.HARBOR_SERVICE_PRIVATE_KEY); +} + +// Build a HarborConfig from the Worker `env` binding (NOT process.env, which is +// empty in the Workers runtime). Reuses harbor.harborConfigFromEnv but feeds it the +// per-request env. Returns null (instead of throwing) when Harbor is not configured +// so callers can transparently fall back to R2. +function resolveHarborConfig(env: WorkerEnv): harbor.HarborConfig | null { + if (!isHarborConfigured(env)) return null; + return harbor.harborConfigFromEnv({ + HARBOR_BASE_URL: env.HARBOR_BASE_URL, + HARBOR_API_KEY: env.HARBOR_API_KEY, + HARBOR_SERVICE_PRIVATE_KEY: env.HARBOR_SERVICE_PRIVATE_KEY, + HARBOR_DEFAULT_SPACE_ID: env.HARBOR_DEFAULT_SPACE_ID + }); +} + // GET /api/memwal/namespaces — curated namespace picker for the Memory explorer. // Override via the MEMWAL_NAMESPACES env (comma-separated "namespace=Label"). function listMemwalNamespaces(request: Request, env: WorkerEnv): Response { diff --git a/packages/walrus/src/harbor/index.ts b/packages/walrus/src/harbor/index.ts index 0956a80..1b8a3cb 100644 --- a/packages/walrus/src/harbor/index.ts +++ b/packages/walrus/src/harbor/index.ts @@ -31,6 +31,22 @@ function isMirrorMissingGrant(err: unknown): boolean { return err.code === "mirror_missing_grant" || /mirror_missing_grant/i.test(err.message); } +/** + * Walrus/Sui testnet occasionally lands an upload in `failed` state with a + * transient on-chain settlement code (e.g. `upload_funding_timeout` — "Funding + * settlement timed out. Please try again shortly."). These are not data errors: + * re-uploading a fresh file usually succeeds. Detect them so {@link + * HarborStorage.putEncrypted} can retry instead of surfacing a hard failure. + */ +function isTransientSettlement(err: unknown): boolean { + if (!(err instanceof FileStatusError)) return false; + const code = err.error?.code ?? ""; + // Only transient *timeouts* (e.g. upload_funding_timeout, settlement_timeout) — + // NOT permanent billing failures like insufficient_funding / settlement_rejected, + // which must surface immediately instead of burning the whole retry budget. + return (/funding|settlement/i.test(code) && /timeout/i.test(code)) || /try again shortly/i.test(err.message); +} + export interface CreatePrivateBucketResult { readonly bucketId: BucketId; readonly sealPolicyId: string | null; @@ -50,6 +66,10 @@ export interface PutEncryptedOptions { readonly pollIntervalMs?: number; /** Optional metadata stored alongside the file. */ readonly metadata?: Record; + /** Extra re-upload attempts when the upload ends in a transient settlement failure. */ + readonly settleAttempts?: number; + /** Backoff before a settlement re-upload (ms). */ + readonly settleBackoffMs?: number; } /** @@ -108,55 +128,92 @@ export class HarborStorage { const uploadBackoffMs = options.uploadBackoffMs ?? 3000; const pollAttempts = options.pollAttempts ?? 40; const pollIntervalMs = options.pollIntervalMs ?? 2000; + const settleAttempts = options.settleAttempts ?? 3; + const settleBackoffMs = options.settleBackoffMs ?? 4000; const encrypted = await this.seal.encrypt(bytes, sealPolicyId); - // Upload with a retry loop on the post-finalize mirror_missing_grant 403. - let uploadResult: FileUploadResponse | undefined; - for (let attempt = 0; attempt < uploadAttempts; attempt++) { - try { - uploadResult = await this.client.uploadBucketFile( - bucketId, - encrypted, - fileName, - options.metadata, - ); - break; - } catch (err) { - if (isMirrorMissingGrant(err) && attempt < uploadAttempts - 1) { - await sleep(uploadBackoffMs); - continue; + // Outer loop: tolerate transient on-chain settlement failures (e.g. + // upload_funding_timeout) by dropping the dead file and re-uploading. + let lastError: FileStatusError | undefined; + for (let settle = 0; settle <= settleAttempts; settle++) { + // Upload with a retry loop on the post-finalize mirror_missing_grant 403. + let uploadResult: FileUploadResponse | undefined; + for (let attempt = 0; attempt < uploadAttempts; attempt++) { + try { + uploadResult = await this.client.uploadBucketFile( + bucketId, + encrypted, + fileName, + options.metadata, + ); + break; + } catch (err) { + if (isMirrorMissingGrant(err) && attempt < uploadAttempts - 1) { + await sleep(uploadBackoffMs); + continue; + } + throw err; } - throw err; } - } - if (!uploadResult) { - throw new MirrorGrantMissingError({ bucketId, attempt: uploadAttempts }); - } + if (!uploadResult) { + throw new MirrorGrantMissingError({ bucketId, attempt: uploadAttempts }); + } - const fileId = uploadResult.data.id; + const fileId = uploadResult.data.id; - // Poll until completed or failed. - let lastState = "queued"; - for (let i = 0; i < pollAttempts; i++) { - const status = await this.client.getFileUploadStatus(bucketId, fileId); - lastState = status.data.state; - if (status.data.state === "completed") return fileId; - if (status.data.state === "failed") { - throw new FileStatusError({ + // Poll until completed or failed (or our own poll budget runs out). + let lastState = "queued"; + let failure: FileStatusError | undefined; + for (let i = 0; i < pollAttempts; i++) { + const status = await this.client.getFileUploadStatus(bucketId, fileId); + lastState = status.data.state; + if (status.data.state === "completed") return fileId; + if (status.data.state === "failed") { + failure = new FileStatusError({ + fileId, + state: status.data.state, + error: status.data.error ?? { code: "unknown", message: "Upload failed" }, + }); + break; + } + await sleep(pollIntervalMs); + } + if (!failure) { + failure = new FileStatusError({ fileId, - state: status.data.state, - error: status.data.error ?? { code: "unknown", message: "Upload failed" }, + state: lastState, + error: { code: "timeout", message: "Upload did not complete in time" }, }); } - await sleep(pollIntervalMs); + + // A transient settlement failure leaves a dead file either way — drop it to + // avoid orphan accumulation, then retry while the budget remains. + if (isTransientSettlement(failure)) { + try { + await this.client.deleteBucketFile(bucketId, fileId); + } catch { + // failed file is orphaned but harmless; non-fatal + } + if (settle < settleAttempts) { + lastError = failure; + await sleep(settleBackoffMs); + continue; + } + } + throw failure; } - throw new FileStatusError({ - fileId, - state: lastState, - error: { code: "timeout", message: "Upload did not complete in time" }, - }); + // Unreachable under normal config: the loop returns on success or throws on + // failure. Retained to satisfy the type checker and guard settleAttempts < 0. + throw ( + lastError ?? + new FileStatusError({ + fileId: "unknown", + state: "failed", + error: { code: "upload_funding_timeout", message: "Upload failed after settlement retries" }, + }) + ); } /** Download a file and decrypt it with the bucket's sealPolicyId. */ From 0f6fe009bc5d484d187cf1528b9e30a724f704aa Mon Sep 17 00:00:00 2001 From: Harry Phan Date: Sat, 20 Jun 2026 20:37:02 +0700 Subject: [PATCH 3/3] fix(walrus): declare @mysten/sui dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit packages/walrus imports @mysten/sui across proof/bcs/resources/resolve and harbor/{constants,seal}, but only @mysten/seal was declared in package.json — @mysten/sui resolved via workspace hoist only. Pin it explicitly (^2.9.1, matching apps/api) so the package is self-contained and survives a non-hoisted install. Surfaced by the open-issue audit (#6). --- bun.lock | 1 + packages/walrus/package.json | 1 + 2 files changed, 2 insertions(+) diff --git a/bun.lock b/bun.lock index 3e9513b..288b042 100644 --- a/bun.lock +++ b/bun.lock @@ -104,6 +104,7 @@ "dependencies": { "@contextmem/core": "workspace:*", "@mysten/seal": "1.1.3", + "@mysten/sui": "^2.9.1", "fast-xml-parser": "^5.3.2", "p-limit": "^7.2.0", }, diff --git a/packages/walrus/package.json b/packages/walrus/package.json index b68805d..68664ef 100644 --- a/packages/walrus/package.json +++ b/packages/walrus/package.json @@ -13,6 +13,7 @@ "dependencies": { "@contextmem/core": "workspace:*", "@mysten/seal": "1.1.3", + "@mysten/sui": "^2.9.1", "fast-xml-parser": "^5.3.2", "p-limit": "^7.2.0" }