diff --git a/packages/backfill/README.md b/packages/backfill/README.md index 7c8a4ceab..b7f905a34 100644 --- a/packages/backfill/README.md +++ b/packages/backfill/README.md @@ -61,3 +61,23 @@ for await ( The seed object itself is not yielded. If it appears in the discovered collection, it is skipped by ID. + +By default, `backfill()` uses the `context-auto` strategy. In this mode, +collection items are treated as backfillable objects by default. If an item is +recognized as an Activity, `backfill()` extracts the activity's object instead. + +To read only FEP-f228 activity collections, enable the `context-activities` +strategy: + +~~~~ typescript +for await ( + const item of backfill({ documentLoader }, note, { + strategies: ["context-activities"], + }) +) { + console.log(item.object); +} +~~~~ + +The `context-activities` strategy currently supports `Create` activities and +yields the activity's object, not the activity itself. diff --git a/packages/backfill/src/backfill.test.ts b/packages/backfill/src/backfill.test.ts index fe957187d..12dc0928d 100644 --- a/packages/backfill/src/backfill.test.ts +++ b/packages/backfill/src/backfill.test.ts @@ -1,7 +1,7 @@ import { deepStrictEqual, ok, rejects, strictEqual } from "node:assert/strict"; import test, { describe } from "node:test"; import { backfill, type BackfillContext, MaxRequestsExceeded } from "./mod.ts"; -import { Collection, Create, Note } from "@fedify/vocab"; +import { Announce, Collection, Create, Note } from "@fedify/vocab"; async function collect( context: BackfillContext, @@ -73,10 +73,39 @@ describe("backfill", () => { strictEqual(items.length, 1); strictEqual(items[0].object, item); deepStrictEqual(items[0].id, item.id); - strictEqual(items[0].strategy, "context-posts"); + strictEqual(items[0].strategy, "context-auto"); strictEqual(items[0].origin, "collection"); }); + test("context object strategy yields embedded objects", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ + id: new URL("https://example.com/notes/2"), + content: "hello", + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [item], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-objects"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, item); + strictEqual(items[0].strategy, "context-objects"); + }); + test("embedded object without id is yielded without id", async () => { const contextId = new URL("https://example.com/contexts/1"); const item = new Note({ content: "anonymous" }); @@ -101,7 +130,7 @@ describe("backfill", () => { strictEqual(items[0].id, undefined); }); - test("activity objects in collection are skipped", async () => { + test("context object strategy skips activity objects", async () => { const contextId = new URL("https://example.com/contexts/1"); const activity = new Create({ id: new URL("https://example.com/activities/1"), @@ -121,7 +150,429 @@ describe("backfill", () => { ), }; - deepStrictEqual(await collect(context, note), []); + deepStrictEqual( + await collect(context, note, { strategies: ["context-objects"] }), + [], + ); + }); + + test("context auto strategy yields object from embedded Create", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ + id: new URL("https://example.com/notes/2"), + content: "hello", + }); + const activity = new Create({ + id: new URL("https://example.com/activities/1"), + object: item, + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [activity], + }), + ), + }; + + const items = await collect(context, note); + + strictEqual(items.length, 1); + strictEqual(items[0].object, item); + strictEqual(items[0].strategy, "context-auto"); + }); + + test("empty strategies yield nothing without dereferencing context", async () => { + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [new URL("https://example.com/contexts/1")], + }); + const context: BackfillContext = { + documentLoader: () => { + throw new Error("documentLoader should not be called"); + }, + }; + + deepStrictEqual(await collect(context, note, { strategies: [] }), []); + }); + + test("context auto overrides overlapping strategies", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ content: "anonymous" }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [item], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-auto", "context-objects"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, item); + strictEqual(items[0].strategy, "context-auto"); + }); + + test("duplicate strategies are ignored", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ content: "anonymous" }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [item], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-objects", "context-objects"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, item); + strictEqual(items[0].strategy, "context-objects"); + }); + + test("context activity collection yields object from embedded Create", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ + id: new URL("https://example.com/notes/2"), + content: "hello", + }); + const activity = new Create({ + id: new URL("https://example.com/activities/1"), + object: item, + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [activity], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-activities"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, item); + strictEqual(items[0].id?.href, item.id?.href); + strictEqual(items[0].strategy, "context-activities"); + strictEqual(items[0].origin, "collection"); + }); + + test("combined context strategies yield posts and activity objects", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const post = new Note({ + id: new URL("https://example.com/notes/2"), + }); + const activityObject = new Note({ + id: new URL("https://example.com/notes/3"), + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [ + post, + new Create({ + id: new URL("https://example.com/activities/1"), + object: activityObject, + }), + ], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-objects", "context-activities"], + }); + + strictEqual(items.length, 2); + strictEqual(items[0].object, post); + strictEqual(items[0].strategy, "context-objects"); + strictEqual(items[1].object, activityObject); + strictEqual(items[1].strategy, "context-activities"); + }); + + test("context activity collection dereferences activity object URL", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const itemId = new URL("https://example.com/notes/2"); + const item = new Note({ id: itemId, content: "hello" }); + const activity = new Create({ + id: new URL("https://example.com/activities/1"), + object: itemId, + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const requests: URL[] = []; + const context: BackfillContext = { + documentLoader: (iri) => { + requests.push(iri); + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [activity], + }), + ); + } + if (iri.href === itemId.href) return Promise.resolve(item); + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note, { + strategies: ["context-activities"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object.id?.href, item.id?.href); + deepStrictEqual(requests.map((url) => url.href), [ + contextId.href, + itemId.href, + ]); + }); + + test("context activity collection dereferences activity URL", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const activityId = new URL("https://example.com/activities/1"); + const item = new Note({ + id: new URL("https://example.com/notes/2"), + content: "hello", + }); + const activity = new Create({ id: activityId, object: item }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const requests: URL[] = []; + const context: BackfillContext = { + documentLoader: (iri) => { + requests.push(iri); + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [activityId], + }), + ); + } + if (iri.href === activityId.href) return Promise.resolve(activity); + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note, { + strategies: ["context-activities"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object.id?.href, item.id?.href); + deepStrictEqual(requests.map((url) => url.href), [ + contextId.href, + activityId.href, + ]); + }); + + test("context activity collection deduplicates by extracted object ID", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const itemId = new URL("https://example.com/notes/2"); + const first = new Create({ + id: new URL("https://example.com/activities/1"), + object: new Note({ id: itemId, content: "first" }), + }); + const second = new Create({ + id: new URL("https://example.com/activities/2"), + object: new Note({ id: itemId, content: "second" }), + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [first, second], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-activities"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].id?.href, itemId.href); + }); + + test("context activity collection skips missing object", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const activity = new Create({ + id: new URL("https://example.com/activities/1"), + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [activity], + }), + ), + }; + + deepStrictEqual( + await collect(context, note, { strategies: ["context-activities"] }), + [], + ); + }); + + test("context activity collection skips unsupported activity type", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ id: new URL("https://example.com/notes/2") }); + const activity = new Announce({ + id: new URL("https://example.com/activities/1"), + object: item, + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [activity], + }), + ), + }; + + deepStrictEqual( + await collect(context, note, { strategies: ["context-activities"] }), + [], + ); + }); + + test("maxRequests limits activity object dereferencing", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const activityId = new URL("https://example.com/activities/1"); + const itemId = new URL("https://example.com/notes/2"); + const activity = new Create({ id: activityId, object: itemId }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const requests: URL[] = []; + const context: BackfillContext = { + documentLoader: (iri) => { + requests.push(iri); + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [activityId], + }), + ); + } + if (iri.href === activityId.href) return Promise.resolve(activity); + if (iri.href === itemId.href) { + return Promise.resolve( + new Note({ + id: itemId, + }), + ); + } + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note, { + maxRequests: 2, + strategies: ["context-activities"], + }); + + deepStrictEqual(items, []); + deepStrictEqual(requests.map((url) => url.href), [ + contextId.href, + activityId.href, + ]); + }); + + test("maxItems limits context activity items", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const first = new Note({ id: new URL("https://example.com/notes/2") }); + const second = new Note({ id: new URL("https://example.com/notes/3") }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [ + new Create({ + id: new URL("https://example.com/activities/1"), + object: first, + }), + new Create({ + id: new URL("https://example.com/activities/2"), + object: second, + }), + ], + }), + ), + }; + + const items = await collect(context, note, { + maxItems: 1, + strategies: ["context-activities"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].id?.href, first.id?.href); }); test("context collection with URL items loads and yields objects", async () => { diff --git a/packages/backfill/src/backfill.ts b/packages/backfill/src/backfill.ts index 5e3968b82..3a0b4361d 100644 --- a/packages/backfill/src/backfill.ts +++ b/packages/backfill/src/backfill.ts @@ -2,6 +2,7 @@ import { Activity, Collection, CollectionPage, + Create, type Link, Object as APObject, OrderedCollection, @@ -12,8 +13,13 @@ import type { BackfillContext, BackfillItem, BackfillOptions, + BackfillStrategy, } from "./types.ts"; +const defaultStrategies = [ + "context-auto", +] as const satisfies readonly BackfillStrategy[]; + /** * Thrown when backfill traversal exceeds the configured request budget. * @@ -42,6 +48,8 @@ export async function* backfill< options: BackfillOptions = {}, ): AsyncGenerator, void, void> { if (options.maxItems != null && options.maxItems <= 0) return; + const strategies = normalizeStrategies(options.strategies); + if (strategies.length < 1) return; const contextId = note.contextIds[0]; if (contextId == null) return; @@ -61,24 +69,33 @@ export async function* backfill< for await ( const object of getCollectionItems(context, collection, options, budget) ) { - if (!isContextPostObject(object)) continue; - const id = object.id ?? undefined; - if (id != null) { - if (seenIds.has(id.href)) continue; - seenIds.add(id.href); - } + for await ( + const item of getBackfillItems( + context, + object, + strategies, + options, + budget, + ) + ) { + const id = item.object.id ?? undefined; + if (id != null) { + if (seenIds.has(id.href)) continue; + seenIds.add(id.href); + } + + options.signal?.throwIfAborted(); + yield { + object: item.object as TObject, + id, + strategy: item.strategy, + origin: "collection", + depth: 0, + }; - options.signal?.throwIfAborted(); - yield { - object: object as TObject, - id, - strategy: "context-posts", - origin: "collection", - depth: 0, - }; - - yielded++; - if (options.maxItems != null && yielded >= options.maxItems) return; + yielded++; + if (options.maxItems != null && yielded >= options.maxItems) return; + } } } catch (error) { if (error instanceof MaxRequestsExceeded) return; @@ -86,39 +103,117 @@ export async function* backfill< } } -async function* getCollectionItems( +function normalizeStrategies( + strategies: readonly BackfillStrategy[] = defaultStrategies, +): readonly BackfillStrategy[] { + if (strategies.includes("context-auto")) return ["context-auto"]; + return Array.from(new Set(strategies)); +} + +async function* getBackfillItems( context: BackfillContext, - collection: BackfillCollection, + object: APObject | Link, + strategies: readonly BackfillStrategy[], options: BackfillOptions, budget: RequestBudget, -): AsyncIterable { - yield* collection.getItems({ - documentLoader: async (url) => { - let object: APObject | null; - try { - object = await loadObject( +): AsyncIterable<{ + readonly object: APObject; + readonly strategy: BackfillStrategy; +}> { + for (const strategy of strategies) { + if (strategy === "context-objects" && isContextPostObject(object)) { + yield { object, strategy }; + } else if (strategy === "context-activities") { + const activityObject = await getCreateActivityObject( + context, + object, + options, + budget, + ); + if (activityObject != null && isContextPostObject(activityObject)) { + yield { object: activityObject, strategy }; + } + } else if (strategy === "context-auto") { + if (object instanceof Activity) { + const activityObject = await getCreateActivityObject( context, - new URL(url), + object, options, budget, - true, ); - } catch (error) { - if (error instanceof MaxRequestsExceeded) throw error; - budget.signal?.throwIfAborted(); - return skippedCollectionItemDocument(url); + if (activityObject != null && isContextPostObject(activityObject)) { + yield { object: activityObject, strategy }; + } + } else if (isContextPostObject(object)) { + yield { object, strategy }; } - if (object == null) return skippedCollectionItemDocument(url); - return { - contextUrl: null, - documentUrl: url, - document: await object.toJsonLd(), - }; + } + } +} + +async function* getCollectionItems( + context: BackfillContext, + collection: BackfillCollection, + options: BackfillOptions, + budget: RequestBudget, +): AsyncIterable { + yield* collection.getItems({ + documentLoader: async (url) => { + return await loadCollectionItemDocument(context, url, options, budget); }, crossOrigin: "trust", }); } +async function getCreateActivityObject( + context: BackfillContext, + object: APObject | Link, + options: BackfillOptions, + budget: RequestBudget, +): Promise { + if (!(object instanceof Create)) return null; + try { + return await object.getObject({ + documentLoader: async (url) => { + return await loadCollectionItemDocument(context, url, options, budget); + }, + crossOrigin: "trust", + }); + } catch (error) { + if (error instanceof MaxRequestsExceeded) throw error; + budget.signal?.throwIfAborted(); + return null; + } +} + +async function loadCollectionItemDocument( + context: BackfillContext, + url: string, + options: BackfillOptions, + budget: RequestBudget, +) { + let object: APObject | null; + try { + object = await loadObject( + context, + new URL(url), + options, + budget, + true, + ); + } catch (error) { + if (error instanceof MaxRequestsExceeded) throw error; + budget.signal?.throwIfAborted(); + return skippedCollectionItemDocument(url); + } + if (object == null) return skippedCollectionItemDocument(url); + return { + contextUrl: null, + documentUrl: url, + document: await object.toJsonLd(), + }; +} + function skippedCollectionItemDocument(url: string) { return { contextUrl: null, diff --git a/packages/backfill/src/types.ts b/packages/backfill/src/types.ts index 25555f30e..9ab43ffbf 100644 --- a/packages/backfill/src/types.ts +++ b/packages/backfill/src/types.ts @@ -5,7 +5,10 @@ import type { Object as APObject } from "@fedify/vocab"; * * @since 2.x.0 */ -export type BackfillStrategy = "context-posts"; +export type BackfillStrategy = + | "context-objects" + | "context-activities" + | "context-auto"; /** * Source relation that produced a backfilled object. @@ -56,6 +59,15 @@ export interface BackfillContext { export interface BackfillOptions< TObject extends APObject = APObject, > { + /** + * Backfill strategies to run. + * + * Defaults to `["context-auto"]`. + * + * @since 2.x.0 + */ + readonly strategies?: readonly BackfillStrategy[]; + /** * Maximum number of items to yield. Skipped duplicates do not count. */