From 9eb5d602206b30bfa4e82caba25e086ec8f29f27 Mon Sep 17 00:00:00 2001 From: Jiwon Kwon Date: Wed, 10 Jun 2026 00:27:29 +0900 Subject: [PATCH] Add context activity backfill Add context-auto, context-objects, and context-activities strategy handling for context collection backfill. The auto strategy classifies ordinary objects directly and extracts objects from supported Create activities while preserving shared request budgets, deduplication, abort, and interval behavior. Document the strategy behavior and add coverage for explicit, automatic, empty, overlapping, and duplicate strategy configurations. Assisted-by: Codex:gpt-5.5 --- packages/backfill/README.md | 20 ++ packages/backfill/src/backfill.test.ts | 459 ++++++++++++++++++++++++- packages/backfill/src/backfill.ts | 169 +++++++-- packages/backfill/src/types.ts | 14 +- 4 files changed, 620 insertions(+), 42 deletions(-) diff --git a/packages/backfill/README.md b/packages/backfill/README.md index 7c8a4ceab..b7f905a34 100644 --- a/packages/backfill/README.md +++ b/packages/backfill/README.md @@ -61,3 +61,23 @@ for await ( The seed object itself is not yielded. If it appears in the discovered collection, it is skipped by ID. + +By default, `backfill()` uses the `context-auto` strategy. In this mode, +collection items are treated as backfillable objects by default. If an item is +recognized as an Activity, `backfill()` extracts the activity's object instead. + +To read only FEP-f228 activity collections, enable the `context-activities` +strategy: + +~~~~ typescript +for await ( + const item of backfill({ documentLoader }, note, { + strategies: ["context-activities"], + }) +) { + console.log(item.object); +} +~~~~ + +The `context-activities` strategy currently supports `Create` activities and +yields the activity's object, not the activity itself. diff --git a/packages/backfill/src/backfill.test.ts b/packages/backfill/src/backfill.test.ts index fe957187d..12dc0928d 100644 --- a/packages/backfill/src/backfill.test.ts +++ b/packages/backfill/src/backfill.test.ts @@ -1,7 +1,7 @@ import { deepStrictEqual, ok, rejects, strictEqual } from "node:assert/strict"; import test, { describe } from "node:test"; import { backfill, type BackfillContext, MaxRequestsExceeded } from "./mod.ts"; -import { Collection, Create, Note } from "@fedify/vocab"; +import { Announce, Collection, Create, Note } from "@fedify/vocab"; async function collect( context: BackfillContext, @@ -73,10 +73,39 @@ describe("backfill", () => { strictEqual(items.length, 1); strictEqual(items[0].object, item); deepStrictEqual(items[0].id, item.id); - strictEqual(items[0].strategy, "context-posts"); + strictEqual(items[0].strategy, "context-auto"); strictEqual(items[0].origin, "collection"); }); + test("context object strategy yields embedded objects", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ + id: new URL("https://example.com/notes/2"), + content: "hello", + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [item], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-objects"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, item); + strictEqual(items[0].strategy, "context-objects"); + }); + test("embedded object without id is yielded without id", async () => { const contextId = new URL("https://example.com/contexts/1"); const item = new Note({ content: "anonymous" }); @@ -101,7 +130,7 @@ describe("backfill", () => { strictEqual(items[0].id, undefined); }); - test("activity objects in collection are skipped", async () => { + test("context object strategy skips activity objects", async () => { const contextId = new URL("https://example.com/contexts/1"); const activity = new Create({ id: new URL("https://example.com/activities/1"), @@ -121,7 +150,429 @@ describe("backfill", () => { ), }; - deepStrictEqual(await collect(context, note), []); + deepStrictEqual( + await collect(context, note, { strategies: ["context-objects"] }), + [], + ); + }); + + test("context auto strategy yields object from embedded Create", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ + id: new URL("https://example.com/notes/2"), + content: "hello", + }); + const activity = new Create({ + id: new URL("https://example.com/activities/1"), + object: item, + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [activity], + }), + ), + }; + + const items = await collect(context, note); + + strictEqual(items.length, 1); + strictEqual(items[0].object, item); + strictEqual(items[0].strategy, "context-auto"); + }); + + test("empty strategies yield nothing without dereferencing context", async () => { + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [new URL("https://example.com/contexts/1")], + }); + const context: BackfillContext = { + documentLoader: () => { + throw new Error("documentLoader should not be called"); + }, + }; + + deepStrictEqual(await collect(context, note, { strategies: [] }), []); + }); + + test("context auto overrides overlapping strategies", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ content: "anonymous" }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [item], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-auto", "context-objects"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, item); + strictEqual(items[0].strategy, "context-auto"); + }); + + test("duplicate strategies are ignored", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ content: "anonymous" }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [item], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-objects", "context-objects"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, item); + strictEqual(items[0].strategy, "context-objects"); + }); + + test("context activity collection yields object from embedded Create", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ + id: new URL("https://example.com/notes/2"), + content: "hello", + }); + const activity = new Create({ + id: new URL("https://example.com/activities/1"), + object: item, + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [activity], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-activities"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, item); + strictEqual(items[0].id?.href, item.id?.href); + strictEqual(items[0].strategy, "context-activities"); + strictEqual(items[0].origin, "collection"); + }); + + test("combined context strategies yield posts and activity objects", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const post = new Note({ + id: new URL("https://example.com/notes/2"), + }); + const activityObject = new Note({ + id: new URL("https://example.com/notes/3"), + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [ + post, + new Create({ + id: new URL("https://example.com/activities/1"), + object: activityObject, + }), + ], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-objects", "context-activities"], + }); + + strictEqual(items.length, 2); + strictEqual(items[0].object, post); + strictEqual(items[0].strategy, "context-objects"); + strictEqual(items[1].object, activityObject); + strictEqual(items[1].strategy, "context-activities"); + }); + + test("context activity collection dereferences activity object URL", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const itemId = new URL("https://example.com/notes/2"); + const item = new Note({ id: itemId, content: "hello" }); + const activity = new Create({ + id: new URL("https://example.com/activities/1"), + object: itemId, + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const requests: URL[] = []; + const context: BackfillContext = { + documentLoader: (iri) => { + requests.push(iri); + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [activity], + }), + ); + } + if (iri.href === itemId.href) return Promise.resolve(item); + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note, { + strategies: ["context-activities"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object.id?.href, item.id?.href); + deepStrictEqual(requests.map((url) => url.href), [ + contextId.href, + itemId.href, + ]); + }); + + test("context activity collection dereferences activity URL", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const activityId = new URL("https://example.com/activities/1"); + const item = new Note({ + id: new URL("https://example.com/notes/2"), + content: "hello", + }); + const activity = new Create({ id: activityId, object: item }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const requests: URL[] = []; + const context: BackfillContext = { + documentLoader: (iri) => { + requests.push(iri); + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [activityId], + }), + ); + } + if (iri.href === activityId.href) return Promise.resolve(activity); + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note, { + strategies: ["context-activities"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object.id?.href, item.id?.href); + deepStrictEqual(requests.map((url) => url.href), [ + contextId.href, + activityId.href, + ]); + }); + + test("context activity collection deduplicates by extracted object ID", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const itemId = new URL("https://example.com/notes/2"); + const first = new Create({ + id: new URL("https://example.com/activities/1"), + object: new Note({ id: itemId, content: "first" }), + }); + const second = new Create({ + id: new URL("https://example.com/activities/2"), + object: new Note({ id: itemId, content: "second" }), + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [first, second], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-activities"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].id?.href, itemId.href); + }); + + test("context activity collection skips missing object", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const activity = new Create({ + id: new URL("https://example.com/activities/1"), + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [activity], + }), + ), + }; + + deepStrictEqual( + await collect(context, note, { strategies: ["context-activities"] }), + [], + ); + }); + + test("context activity collection skips unsupported activity type", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ id: new URL("https://example.com/notes/2") }); + const activity = new Announce({ + id: new URL("https://example.com/activities/1"), + object: item, + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [activity], + }), + ), + }; + + deepStrictEqual( + await collect(context, note, { strategies: ["context-activities"] }), + [], + ); + }); + + test("maxRequests limits activity object dereferencing", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const activityId = new URL("https://example.com/activities/1"); + const itemId = new URL("https://example.com/notes/2"); + const activity = new Create({ id: activityId, object: itemId }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const requests: URL[] = []; + const context: BackfillContext = { + documentLoader: (iri) => { + requests.push(iri); + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [activityId], + }), + ); + } + if (iri.href === activityId.href) return Promise.resolve(activity); + if (iri.href === itemId.href) { + return Promise.resolve( + new Note({ + id: itemId, + }), + ); + } + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note, { + maxRequests: 2, + strategies: ["context-activities"], + }); + + deepStrictEqual(items, []); + deepStrictEqual(requests.map((url) => url.href), [ + contextId.href, + activityId.href, + ]); + }); + + test("maxItems limits context activity items", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const first = new Note({ id: new URL("https://example.com/notes/2") }); + const second = new Note({ id: new URL("https://example.com/notes/3") }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [ + new Create({ + id: new URL("https://example.com/activities/1"), + object: first, + }), + new Create({ + id: new URL("https://example.com/activities/2"), + object: second, + }), + ], + }), + ), + }; + + const items = await collect(context, note, { + maxItems: 1, + strategies: ["context-activities"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].id?.href, first.id?.href); }); test("context collection with URL items loads and yields objects", async () => { diff --git a/packages/backfill/src/backfill.ts b/packages/backfill/src/backfill.ts index 5e3968b82..3a0b4361d 100644 --- a/packages/backfill/src/backfill.ts +++ b/packages/backfill/src/backfill.ts @@ -2,6 +2,7 @@ import { Activity, Collection, CollectionPage, + Create, type Link, Object as APObject, OrderedCollection, @@ -12,8 +13,13 @@ import type { BackfillContext, BackfillItem, BackfillOptions, + BackfillStrategy, } from "./types.ts"; +const defaultStrategies = [ + "context-auto", +] as const satisfies readonly BackfillStrategy[]; + /** * Thrown when backfill traversal exceeds the configured request budget. * @@ -42,6 +48,8 @@ export async function* backfill< options: BackfillOptions = {}, ): AsyncGenerator, void, void> { if (options.maxItems != null && options.maxItems <= 0) return; + const strategies = normalizeStrategies(options.strategies); + if (strategies.length < 1) return; const contextId = note.contextIds[0]; if (contextId == null) return; @@ -61,24 +69,33 @@ export async function* backfill< for await ( const object of getCollectionItems(context, collection, options, budget) ) { - if (!isContextPostObject(object)) continue; - const id = object.id ?? undefined; - if (id != null) { - if (seenIds.has(id.href)) continue; - seenIds.add(id.href); - } + for await ( + const item of getBackfillItems( + context, + object, + strategies, + options, + budget, + ) + ) { + const id = item.object.id ?? undefined; + if (id != null) { + if (seenIds.has(id.href)) continue; + seenIds.add(id.href); + } + + options.signal?.throwIfAborted(); + yield { + object: item.object as TObject, + id, + strategy: item.strategy, + origin: "collection", + depth: 0, + }; - options.signal?.throwIfAborted(); - yield { - object: object as TObject, - id, - strategy: "context-posts", - origin: "collection", - depth: 0, - }; - - yielded++; - if (options.maxItems != null && yielded >= options.maxItems) return; + yielded++; + if (options.maxItems != null && yielded >= options.maxItems) return; + } } } catch (error) { if (error instanceof MaxRequestsExceeded) return; @@ -86,39 +103,117 @@ export async function* backfill< } } -async function* getCollectionItems( +function normalizeStrategies( + strategies: readonly BackfillStrategy[] = defaultStrategies, +): readonly BackfillStrategy[] { + if (strategies.includes("context-auto")) return ["context-auto"]; + return Array.from(new Set(strategies)); +} + +async function* getBackfillItems( context: BackfillContext, - collection: BackfillCollection, + object: APObject | Link, + strategies: readonly BackfillStrategy[], options: BackfillOptions, budget: RequestBudget, -): AsyncIterable { - yield* collection.getItems({ - documentLoader: async (url) => { - let object: APObject | null; - try { - object = await loadObject( +): AsyncIterable<{ + readonly object: APObject; + readonly strategy: BackfillStrategy; +}> { + for (const strategy of strategies) { + if (strategy === "context-objects" && isContextPostObject(object)) { + yield { object, strategy }; + } else if (strategy === "context-activities") { + const activityObject = await getCreateActivityObject( + context, + object, + options, + budget, + ); + if (activityObject != null && isContextPostObject(activityObject)) { + yield { object: activityObject, strategy }; + } + } else if (strategy === "context-auto") { + if (object instanceof Activity) { + const activityObject = await getCreateActivityObject( context, - new URL(url), + object, options, budget, - true, ); - } catch (error) { - if (error instanceof MaxRequestsExceeded) throw error; - budget.signal?.throwIfAborted(); - return skippedCollectionItemDocument(url); + if (activityObject != null && isContextPostObject(activityObject)) { + yield { object: activityObject, strategy }; + } + } else if (isContextPostObject(object)) { + yield { object, strategy }; } - if (object == null) return skippedCollectionItemDocument(url); - return { - contextUrl: null, - documentUrl: url, - document: await object.toJsonLd(), - }; + } + } +} + +async function* getCollectionItems( + context: BackfillContext, + collection: BackfillCollection, + options: BackfillOptions, + budget: RequestBudget, +): AsyncIterable { + yield* collection.getItems({ + documentLoader: async (url) => { + return await loadCollectionItemDocument(context, url, options, budget); }, crossOrigin: "trust", }); } +async function getCreateActivityObject( + context: BackfillContext, + object: APObject | Link, + options: BackfillOptions, + budget: RequestBudget, +): Promise { + if (!(object instanceof Create)) return null; + try { + return await object.getObject({ + documentLoader: async (url) => { + return await loadCollectionItemDocument(context, url, options, budget); + }, + crossOrigin: "trust", + }); + } catch (error) { + if (error instanceof MaxRequestsExceeded) throw error; + budget.signal?.throwIfAborted(); + return null; + } +} + +async function loadCollectionItemDocument( + context: BackfillContext, + url: string, + options: BackfillOptions, + budget: RequestBudget, +) { + let object: APObject | null; + try { + object = await loadObject( + context, + new URL(url), + options, + budget, + true, + ); + } catch (error) { + if (error instanceof MaxRequestsExceeded) throw error; + budget.signal?.throwIfAborted(); + return skippedCollectionItemDocument(url); + } + if (object == null) return skippedCollectionItemDocument(url); + return { + contextUrl: null, + documentUrl: url, + document: await object.toJsonLd(), + }; +} + function skippedCollectionItemDocument(url: string) { return { contextUrl: null, diff --git a/packages/backfill/src/types.ts b/packages/backfill/src/types.ts index 25555f30e..9ab43ffbf 100644 --- a/packages/backfill/src/types.ts +++ b/packages/backfill/src/types.ts @@ -5,7 +5,10 @@ import type { Object as APObject } from "@fedify/vocab"; * * @since 2.x.0 */ -export type BackfillStrategy = "context-posts"; +export type BackfillStrategy = + | "context-objects" + | "context-activities" + | "context-auto"; /** * Source relation that produced a backfilled object. @@ -56,6 +59,15 @@ export interface BackfillContext { export interface BackfillOptions< TObject extends APObject = APObject, > { + /** + * Backfill strategies to run. + * + * Defaults to `["context-auto"]`. + * + * @since 2.x.0 + */ + readonly strategies?: readonly BackfillStrategy[]; + /** * Maximum number of items to yield. Skipped duplicates do not count. */