diff --git a/CHANGES.md b/CHANGES.md index 64ecb98ed..41d455989 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -346,6 +346,19 @@ To be released. [#804]: https://github.com/fedify-dev/fedify/pull/804 [#818]: https://github.com/fedify-dev/fedify/pull/818 +### @fedify/backfill + + - Added *@fedify/backfill* for reconstructing ActivityPub conversations. + It supports FEP-f228 context collections containing post-like objects or + `Create` activities, optional reply-tree traversal, ordered hybrid + strategies, shared safety budgets, deduplication, and traversal-local + document caching. [[#275], [#779], [#801], [#807], [#816] by Jiwon Kwon] + +[#275]: https://github.com/fedify-dev/fedify/issues/275 +[#779]: https://github.com/fedify-dev/fedify/pull/779 +[#807]: https://github.com/fedify-dev/fedify/pull/807 +[#816]: https://github.com/fedify-dev/fedify/pull/816 + ### @fedify/fixture - Added `createTestMeterProvider()` and `TestMetricRecorder` helpers for diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8ff275fe8..5384b0311 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -442,6 +442,8 @@ The repository is organized as a monorepo with the following packages: creating new Fedify projects. Wraps @fedify/init. - *packages/amqp/*: AMQP/RabbitMQ driver (@fedify/amqp) for Fedify. - *packages/astro/*: Astro integration (@fedify/astro) for Fedify. + - *packages/backfill/*: ActivityPub conversation backfill support + (@fedify/backfill) for Fedify. - *packages/cfworkers/*: Cloudflare Workers integration (@fedify/cfworkers) for Fedify. - *packages/debugger/*: Embedded ActivityPub debug dashboard diff --git a/FEDERATION.md b/FEDERATION.md index d0ac97e14..71c7a68bd 100644 --- a/FEDERATION.md +++ b/FEDERATION.md @@ -25,6 +25,7 @@ Supported FEPs -------------- - [FEP-67ff][]: FEDERATION.md + - [FEP-f228][]: Backfilling conversations - [FEP-8fcf][]: Followers collection synchronization across servers - [FEP-9091][]: Export Actor Service Endpoint - [FEP-f1d5][]: NodeInfo in Fediverse Software @@ -40,6 +41,7 @@ Supported FEPs - [FEP-ae0c][]: Fediverse Relay Protocols: Mastodon and LitePub [FEP-67ff]: https://w3id.org/fep/67ff +[FEP-f228]: https://w3id.org/fep/f228 [FEP-8fcf]: https://w3id.org/fep/8fcf [FEP-9091]: https://w3id.org/fep/9091 [FEP-f1d5]: https://w3id.org/fep/f1d5 diff --git a/deno.json b/deno.json index 03f1f929b..b77061d7b 100644 --- a/deno.json +++ b/deno.json @@ -2,6 +2,7 @@ "workspace": [ "./packages/amqp", "./packages/astro", + "./packages/backfill", "./packages/cfworkers", "./packages/cli", "./packages/debugger", diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index 7e3f512b1..945933fb3 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -142,6 +142,7 @@ const MANUAL = { { text: "Outbox listeners", link: "/manual/outbox.md" }, { text: "Sending activities", link: "/manual/send.md" }, { text: "Collections", link: "/manual/collections.md" }, + { text: "Conversation backfill", link: "/manual/backfill.md" }, { text: "Object dispatcher", link: "/manual/object.md" }, { text: "Access control", link: "/manual/access-control.md" }, { text: "WebFinger", link: "/manual/webfinger.md" }, diff --git a/docs/manual/backfill.md b/docs/manual/backfill.md new file mode 100644 index 000000000..74f52280f --- /dev/null +++ b/docs/manual/backfill.md @@ -0,0 +1,203 @@ +--- +description: >- + Reconstruct ActivityPub conversations from FEP-f228 context collections or + reply relationships using the @fedify/backfill package. +--- + +Conversation backfill +===================== + +*This API is available since Fedify 2.3.0.* + +Fedify provides the *@fedify/backfill* package for reconstructing ActivityPub +conversations that may be incomplete on the local server. It can retrieve +post-like objects from [FEP-f228] context collections and optionally crawl +`inReplyTo` ancestors and `replies` descendants. + +[FEP-f228]: https://w3id.org/fep/f228 + + +Installation +------------ + +::: code-group + +~~~~ sh [Deno] +deno add jsr:@fedify/backfill +~~~~ + +~~~~ sh [npm] +npm add @fedify/backfill +~~~~ + +~~~~ sh [pnpm] +pnpm add @fedify/backfill +~~~~ + +~~~~ sh [Yarn] +yarn add @fedify/backfill +~~~~ + +~~~~ sh [Bun] +bun add @fedify/backfill +~~~~ + +::: + + +Backfilling a conversation +-------------------------- + +The `backfill()` function accepts a backfill context, a seed object, and +traversal options. The context supplies a `documentLoader` for dereferencing +context collections, collection items, reply targets, and replies collections: + +~~~~ typescript twoslash +import { backfill, type BackfillDocumentLoader } from "@fedify/backfill"; +import { lookupObject, Note } from "@fedify/vocab"; + +declare const note: Note; +// ---cut-before--- +const documentLoader: BackfillDocumentLoader = (iri, options) => + lookupObject(iri, { signal: options?.signal }); + +for await ( + const item of backfill({ documentLoader }, note, { + maxItems: 20, + maxRequests: 50, + }) +) { + console.log(item.id?.href); +} +~~~~ + +The seed object itself is not yielded. If the same object appears in a +discovered collection, it is skipped by ID. + +By default, `backfill()` uses the `"context-auto"` strategy. It expects the +seed's `context` to dereference to a `Collection`, `OrderedCollection`, +`CollectionPage`, or `OrderedCollectionPage`. Ordinary post-like items are +yielded directly, while supported `Create` activities are unwrapped and their +objects are yielded. + +If the seed has no context, or its context resolves to a non-collection, +context strategies yield nothing. + + +Strategies +---------- + +Strategies run in the configured order. They share request and item budgets, +abort state, document caching, and object ID deduplication. If multiple +strategies discover the same object, the first one keeps its `BackfillItem` +metadata. + +`"context-auto"` +: Handles both direct post-like objects and supported `Create` activities + from a context collection. This is the default strategy. + +`"context-objects"` +: Accepts only post-like objects contained directly in a context collection: + + ~~~~ typescript twoslash + import { backfill, type BackfillContext } from "@fedify/backfill"; + import { Note } from "@fedify/vocab"; + + declare const context: BackfillContext; + declare const note: Note; + // ---cut-before--- + for await ( + const item of backfill(context, note, { + strategies: ["context-objects"], + }) + ) { + console.log(item.object); + } + ~~~~ + +`"context-activities"` +: Accepts supported activities from a context collection. It currently + supports `Create` and yields the activity's object rather than the activity + itself: + + ~~~~ typescript twoslash + import { backfill, type BackfillContext } from "@fedify/backfill"; + import { Note } from "@fedify/vocab"; + + declare const context: BackfillContext; + declare const note: Note; + // ---cut-before--- + for await ( + const item of backfill(context, note, { + strategies: ["context-activities"], + }) + ) { + console.log(item.object); + } + ~~~~ + +`"reply-tree"` +: Walks `inReplyTo` ancestors and `replies` descendants. It yields + post-like objects only and does not unwrap Activity objects. This strategy + is opt-in because it can require substantially more network requests than + a context collection. + +For hybrid coverage, run the FEP-f228 path first and use reply-tree traversal +after it: + +~~~~ typescript twoslash +import { backfill, type BackfillContext } from "@fedify/backfill"; +import { Note } from "@fedify/vocab"; + +declare const context: BackfillContext; +declare const note: Note; +// ---cut-before--- +for await ( + const item of backfill(context, note, { + strategies: ["context-auto", "reply-tree"], + maxDepth: 4, + }) +) { + console.log(item.origin, item.depth, item.object); +} +~~~~ + + +Traversal controls +------------------ + +`maxItems` +: Limits the number of yielded objects. Skipped duplicates do not count. + +`maxRequests` +: Limits calls to `documentLoader`. Embedded objects and collections do not + count as requests. + +`maxDepth` +: Limits reply-tree traversal and defaults to 10. Immediate parents and + direct replies have depth 1; their next-level parents or replies have depth + 2, and so on. Context collection items have depth 0 and are not limited by + this option. + +`interval` +: Adds a delay between `documentLoader` requests. A callback receives the + zero-based request index. String durations require the global `Temporal` + API or a polyfill; `Temporal.DurationLike` objects work without the global + API. + +`signal` +: Cancels traversal before requests and yields. The signal is also passed to + `documentLoader`. + + +Caching and failures +-------------------- + +Dereferenced documents are cached in memory for one `backfill()` traversal. +Applications that need persistent or shared caching can implement it in the +provided `documentLoader`. + +Failed external dereferences are skipped so other conversation items can still +be discovered. Failed loads are not retained in the traversal cache, allowing +the same IRI to be retried if another traversal path reaches it. Aborting the +provided signal stops traversal instead of skipping the request. diff --git a/docs/package.json b/docs/package.json index 1b47419f7..6af010382 100644 --- a/docs/package.json +++ b/docs/package.json @@ -5,6 +5,7 @@ "@deno/kv": "^0.8.4", "@fedify/amqp": "workspace:^", "@fedify/astro": "workspace:^", + "@fedify/backfill": "workspace:^", "@fedify/cfworkers": "workspace:^", "@fedify/debugger": "workspace:^", "@fedify/express": "workspace:^", diff --git a/packages/backfill/README.md b/packages/backfill/README.md new file mode 100644 index 000000000..a9e4e06c5 --- /dev/null +++ b/packages/backfill/README.md @@ -0,0 +1,152 @@ + + +@fedify/backfill: ActivityPub backfill for Fedify +================================================= + +[![JSR][JSR badge]][JSR] +[![npm][npm badge]][npm] +[![Follow @fedify@hollo.social][@fedify@hollo.social badge]][@fedify@hollo.social] + +*This package is available since Fedify 2.3.0.* + +This package provides ActivityPub conversation backfill support for the +[Fedify] ecosystem. It can retrieve post-like objects from a seed object's +context collection, following the direct [FEP-f228] path where the +context dereferences to a `Collection`, `OrderedCollection`, `CollectionPage`, +or `OrderedCollectionPage`. It can also use an opt-in reply-tree strategy to +walk `inReplyTo` ancestors and `replies` descendants when context collections +are unavailable or incomplete. + +[JSR badge]: https://jsr.io/badges/@fedify/backfill +[JSR]: https://jsr.io/@fedify/backfill +[npm badge]: https://img.shields.io/npm/v/@fedify/backfill?logo=npm +[npm]: https://www.npmjs.com/package/@fedify/backfill +[@fedify@hollo.social badge]: https://fedi-badge.deno.dev/@fedify@hollo.social/followers.svg +[@fedify@hollo.social]: https://hollo.social/@fedify +[Fedify]: https://fedify.dev/ +[FEP-f228]: https://w3id.org/fep/f228 + + +Installation +------------ + +~~~~ sh +deno add jsr:@fedify/backfill +npm add @fedify/backfill +pnpm add @fedify/backfill +yarn add @fedify/backfill +bun add @fedify/backfill +~~~~ + + +Usage +----- + +The `backfill()` function accepts a backfill context, a seed object, and +traversal options: + +~~~~ typescript +import { backfill } from "@fedify/backfill"; +import { lookupObject } from "@fedify/vocab"; + +const documentLoader = (iri: URL, options?: { signal?: AbortSignal }) => + lookupObject(iri, { signal: options?.signal }); + +for await ( + const item of backfill({ documentLoader }, note, { + maxItems: 20, + maxRequests: 50, + }) +) { + console.log(item.id?.href); +} +~~~~ + +The seed object itself is not yielded. If it appears in the discovered +collection, it is skipped by ID. + +Configured strategies run in order. They share `maxItems`, `maxRequests`, +abort state, and object ID deduplication; if two strategies discover the same +object, the first strategy keeps its `BackfillItem` metadata. + +By default, `backfill()` uses the `context-auto` strategy. In this mode, +collection items are treated as backfillable objects by default. If an item is +recognized as a supported `Create` activity, `backfill()` extracts the +activity's object instead. + +To accept only post-like objects directly contained in the context collection, +use the `context-objects` strategy: + +~~~~ typescript +for await ( + const item of backfill({ documentLoader }, note, { + strategies: ["context-objects"], + }) +) { + console.log(item.object); +} +~~~~ + +To read only FEP-f228 activity collections, enable the `context-activities` +strategy: + +~~~~ typescript +for await ( + const item of backfill({ documentLoader }, note, { + strategies: ["context-activities"], + }) +) { + console.log(item.object); +} +~~~~ + +The `context-activities` strategy currently supports `Create` activities and +yields the activity's object, not the activity itself. + +To combine the FEP-f228 context collection path with traditional reply-tree +crawling, add the `reply-tree` strategy after `context-auto`: + +~~~~ typescript +for await ( + const item of backfill({ documentLoader }, note, { + strategies: ["context-auto", "reply-tree"], + maxDepth: 4, + }) +) { + console.log(item.origin, item.depth, item.object); +} +~~~~ + +The `reply-tree` strategy walks `inReplyTo` ancestors and `replies` +descendants. It yields discovered post-like objects only; it does not extract +objects from Activity wrappers. Immediate parents and direct replies have +depth 1, their next-level parents or replies have depth 2, and so on. +Reply-tree traversal defaults to a maximum depth of 10; set `maxDepth` to use a +different limit. + + +Traversal controls +------------------ + +All configured strategies share the same traversal controls: + + - `maxItems` limits the number of yielded objects. Skipped duplicates do + not count. + - `maxRequests` limits calls to `documentLoader`. Embedded objects and + collections do not count. + - `maxDepth` limits reply-tree traversal and defaults to 10. It does not + limit context collection items. + - `interval` adds a delay between loader requests. Its callback receives + the zero-based request index. + - `signal` cancels traversal and is forwarded to `documentLoader`. + +An `interval` string requires the global `Temporal` API or a polyfill. +`Temporal.DurationLike` objects work without the global API. + +If the seed has no context, or its context resolves to a non-collection, +context strategies yield nothing. Loader failures are skipped unless +traversal is aborted. + +Dereferenced documents are cached in memory for one `backfill()` traversal. +Applications that need persistent or shared caching can provide it through +the `documentLoader`. diff --git a/packages/backfill/deno.json b/packages/backfill/deno.json new file mode 100644 index 000000000..cd151e493 --- /dev/null +++ b/packages/backfill/deno.json @@ -0,0 +1,22 @@ +{ + "name": "@fedify/backfill", + "version": "2.3.0", + "license": "MIT", + "exports": { + ".": "./src/mod.ts" + }, + "exclude": [ + "dist/", + "node_modules/" + ], + "publish": { + "exclude": [ + "**/*.test.ts", + "tsdown.config.ts" + ] + }, + "tasks": { + "check": "deno fmt --check && deno lint && deno check src/**/*.ts", + "test": "deno test" + } +} diff --git a/packages/backfill/package.json b/packages/backfill/package.json new file mode 100644 index 000000000..1e80dd9d8 --- /dev/null +++ b/packages/backfill/package.json @@ -0,0 +1,68 @@ +{ + "name": "@fedify/backfill", + "version": "2.3.0", + "description": "ActivityPub backfill support for Fedify", + "keywords": [ + "Fedify", + "ActivityPub", + "Fediverse", + "Backfill" + ], + "author": { + "name": "Jiwon Kwon", + "email": "work@kwonjiwon.org" + }, + "homepage": "https://fedify.dev/", + "repository": { + "type": "git", + "url": "git+https://github.com/fedify-dev/fedify.git", + "directory": "packages/backfill" + }, + "license": "MIT", + "bugs": { + "url": "https://github.com/fedify-dev/fedify/issues" + }, + "funding": [ + "https://opencollective.com/fedify", + "https://github.com/sponsors/dahlia" + ], + "type": "module", + "main": "./dist/mod.cjs", + "module": "./dist/mod.js", + "types": "./dist/mod.d.ts", + "exports": { + ".": { + "types": { + "import": "./dist/mod.d.ts", + "require": "./dist/mod.d.cts", + "default": "./dist/mod.d.ts" + }, + "import": "./dist/mod.js", + "require": "./dist/mod.cjs", + "default": "./dist/mod.js" + }, + "./package.json": "./package.json" + }, + "files": [ + "dist/", + "package.json", + "README.md" + ], + "dependencies": { + "@fedify/vocab": "workspace:*" + }, + "devDependencies": { + "tsdown": "catalog:", + "typescript": "catalog:" + }, + "scripts": { + "build:self": "tsdown", + "build": "pnpm --filter @fedify/backfill... run build:self", + "prepack": "pnpm build", + "prepublish": "pnpm build", + "pretest": "pnpm build", + "test": "cd dist/ && node --test", + "pretest:bun": "pnpm build", + "test:bun": "cd dist/ && bun test --timeout 60000" + } +} diff --git a/packages/backfill/src/backfill.test.ts b/packages/backfill/src/backfill.test.ts new file mode 100644 index 000000000..3875eea7e --- /dev/null +++ b/packages/backfill/src/backfill.test.ts @@ -0,0 +1,1774 @@ +import { deepStrictEqual, ok, rejects, strictEqual } from "node:assert/strict"; +import test, { describe } from "node:test"; +import { backfill, type BackfillContext, MaxRequestsExceeded } from "./mod.ts"; +import { Announce, Collection, Create, Note } from "@fedify/vocab"; + +async function collect( + context: BackfillContext, + note: Note, + options: Parameters[2] = {}, +) { + return await Array.fromAsync(backfill(context, note, options)); +} + +describe("backfill", () => { + test("package exports backfill", () => { + strictEqual(typeof backfill, "function"); + strictEqual(typeof MaxRequestsExceeded, "function"); + }); + + test("context missing yields nothing", async () => { + const note = new Note({ + id: new URL("https://example.com/notes/1"), + }); + const context: BackfillContext = { + documentLoader: () => { + throw new Error("documentLoader should not be called"); + }, + }; + + deepStrictEqual(await collect(context, note), []); + }); + + test("context resolves to non-collection yields nothing", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Note({ + id: new URL("https://example.com/notes/2"), + }), + ), + }; + + deepStrictEqual(await collect(context, note), []); + }); + + test("context collection with embedded objects yields items", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ + id: new URL("https://example.com/notes/2"), + content: "hello", + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [item], + }), + ), + }; + + const items = await collect(context, note); + + strictEqual(items.length, 1); + strictEqual(items[0].object, item); + deepStrictEqual(items[0].id, item.id); + strictEqual(items[0].strategy, "context-auto"); + strictEqual(items[0].origin, "collection"); + }); + + test("context object strategy yields embedded objects", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ + id: new URL("https://example.com/notes/2"), + content: "hello", + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [item], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-objects"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, item); + strictEqual(items[0].strategy, "context-objects"); + }); + + test("embedded object without id is yielded without id", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ content: "anonymous" }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [item], + }), + ), + }; + + const items = await collect(context, note); + + strictEqual(items.length, 1); + strictEqual(items[0].object, item); + strictEqual(items[0].id, undefined); + }); + + test("context object strategy skips activity objects", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const activity = new Create({ + id: new URL("https://example.com/activities/1"), + object: new Note({ id: new URL("https://example.com/notes/2") }), + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [activity], + }), + ), + }; + + deepStrictEqual( + await collect(context, note, { strategies: ["context-objects"] }), + [], + ); + }); + + test("context auto strategy yields object from embedded Create", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ + id: new URL("https://example.com/notes/2"), + content: "hello", + }); + const activity = new Create({ + id: new URL("https://example.com/activities/1"), + object: item, + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [activity], + }), + ), + }; + + const items = await collect(context, note); + + strictEqual(items.length, 1); + strictEqual(items[0].object, item); + strictEqual(items[0].strategy, "context-auto"); + }); + + test("empty strategies yield nothing without dereferencing context", async () => { + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [new URL("https://example.com/contexts/1")], + }); + const context: BackfillContext = { + documentLoader: () => { + throw new Error("documentLoader should not be called"); + }, + }; + + deepStrictEqual(await collect(context, note, { strategies: [] }), []); + }); + + test("reply tree strategy does not require context collection", async () => { + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [new URL("https://example.com/contexts/1")], + }); + const context: BackfillContext = { + documentLoader: () => { + throw new Error("documentLoader should not be called"); + }, + }; + + deepStrictEqual( + await collect(context, note, { strategies: ["reply-tree"] }), + [], + ); + }); + + test("reply tree yields embedded ancestor", async () => { + const parent = new Note({ + id: new URL("https://example.com/notes/1"), + content: "parent", + }); + const note = new Note({ + id: new URL("https://example.com/notes/2"), + replyTarget: parent, + }); + const context: BackfillContext = { + documentLoader: () => { + throw new Error("documentLoader should not be called"); + }, + }; + + const items = await collect(context, note, { + strategies: ["reply-tree"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, parent); + deepStrictEqual(items[0].id, parent.id); + strictEqual(items[0].strategy, "reply-tree"); + strictEqual(items[0].origin, "in-reply-to"); + strictEqual(items[0].depth, 1); + }); + + test("reply tree dereferences ancestor URL", async () => { + const parentId = new URL("https://example.com/notes/1"); + const parent = new Note({ + id: parentId, + content: "parent", + }); + const note = new Note({ + id: new URL("https://example.com/notes/2"), + replyTarget: parentId, + }); + const context: BackfillContext = { + documentLoader: (iri) => + Promise.resolve(iri.href === parentId.href ? parent : null), + }; + + const items = await collect(context, note, { + strategies: ["reply-tree"], + }); + + strictEqual(items.length, 1); + deepStrictEqual(items[0].object.id, parent.id); + strictEqual(items[0].origin, "in-reply-to"); + strictEqual(items[0].depth, 1); + }); + + test("reply tree maxDepth limits ancestors", async () => { + const rootId = new URL("https://example.com/notes/1"); + const parentId = new URL("https://example.com/notes/2"); + const root = new Note({ + id: rootId, + content: "root", + }); + const parent = new Note({ + id: parentId, + content: "parent", + replyTarget: rootId, + }); + const note = new Note({ + id: new URL("https://example.com/notes/3"), + replyTarget: parentId, + }); + const context: BackfillContext = { + documentLoader: (iri) => { + if (iri.href === parentId.href) return Promise.resolve(parent); + if (iri.href === rootId.href) return Promise.resolve(root); + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note, { + strategies: ["reply-tree"], + maxDepth: 1, + }); + + strictEqual(items.length, 1); + deepStrictEqual(items[0].object.id, parent.id); + strictEqual(items[0].depth, 1); + }); + + test("reply tree defaults maxDepth to 10 for ancestors", async () => { + let note = new Note({ + id: new URL("https://example.com/notes/0"), + }); + for (let i = 1; i <= 12; i++) { + note = new Note({ + id: new URL(`https://example.com/notes/${i}`), + replyTarget: note, + }); + } + const context: BackfillContext = { + documentLoader: () => { + throw new Error("documentLoader should not be called"); + }, + }; + + const items = await collect(context, note, { + strategies: ["reply-tree"], + }); + + strictEqual(items.length, 10); + strictEqual(items.at(-1)?.depth, 10); + }); + + test("maxRequests limits reply tree ancestor dereferencing", async () => { + const parentId = new URL("https://example.com/notes/1"); + const note = new Note({ + id: new URL("https://example.com/notes/2"), + replyTarget: parentId, + }); + const context: BackfillContext = { + documentLoader: () => { + throw new Error("documentLoader should not be called"); + }, + }; + + deepStrictEqual( + await collect(context, note, { + strategies: ["reply-tree"], + maxRequests: 0, + }), + [], + ); + }); + + test("reply tree avoids ancestor cycles", async () => { + const seedId = new URL("https://example.com/notes/1"); + const parentId = new URL("https://example.com/notes/2"); + const note = new Note({ + id: seedId, + replyTarget: parentId, + }); + const parent = new Note({ + id: parentId, + replyTarget: seedId, + }); + const context: BackfillContext = { + documentLoader: (iri) => { + if (iri.href === seedId.href) return Promise.resolve(note); + if (iri.href === parentId.href) return Promise.resolve(parent); + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note, { + strategies: ["reply-tree"], + }); + + strictEqual(items.length, 1); + deepStrictEqual(items[0].object.id, parent.id); + }); + + test("reply tree deduplicates ancestors from context collection", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const parentId = new URL("https://example.com/notes/1"); + const parent = new Note({ + id: parentId, + content: "parent", + }); + const note = new Note({ + id: new URL("https://example.com/notes/2"), + contexts: [contextId], + replyTarget: parentId, + }); + const context: BackfillContext = { + documentLoader: (iri) => { + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [parent], + }), + ); + } + if (iri.href === parentId.href) return Promise.resolve(parent); + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note, { + strategies: ["context-auto", "reply-tree"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, parent); + strictEqual(items[0].strategy, "context-auto"); + }); + + test("document cache avoids duplicate dereferences across strategies", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const parentId = new URL("https://example.com/notes/1"); + const parent = new Note({ + id: parentId, + content: "parent", + }); + const note = new Note({ + id: new URL("https://example.com/notes/2"), + contexts: [contextId], + replyTarget: parentId, + }); + const requests: URL[] = []; + const context: BackfillContext = { + documentLoader: (iri) => { + requests.push(iri); + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [parentId], + }), + ); + } + if (iri.href === parentId.href) return Promise.resolve(parent); + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note, { + strategies: ["context-auto", "reply-tree"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object.id?.href, parentId.href); + deepStrictEqual(requests.map((url) => url.href), [ + contextId.href, + parentId.href, + ]); + }); + + test("document cache does not keep failed dereferences", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const parentId = new URL("https://example.com/notes/1"); + const parent = new Note({ + id: parentId, + content: "parent", + }); + const note = new Note({ + id: new URL("https://example.com/notes/2"), + contexts: [contextId], + replyTarget: parentId, + }); + const requests: URL[] = []; + let parentRequests = 0; + const context: BackfillContext = { + documentLoader: (iri) => { + requests.push(iri); + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [parentId], + }), + ); + } + if (iri.href === parentId.href) { + parentRequests++; + if (parentRequests === 1) throw new Error("temporary failure"); + return Promise.resolve(parent); + } + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note, { + strategies: ["context-auto", "reply-tree"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object.id?.href, parentId.href); + deepStrictEqual(requests.map((url) => url.href), [ + contextId.href, + parentId.href, + parentId.href, + ]); + }); + + test("strategy order controls deduplicated item metadata", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const parentId = new URL("https://example.com/notes/1"); + const parent = new Note({ + id: parentId, + content: "parent", + }); + const note = new Note({ + id: new URL("https://example.com/notes/2"), + contexts: [contextId], + replyTarget: parentId, + }); + const context: BackfillContext = { + documentLoader: (iri) => { + if (iri.href === parentId.href) return Promise.resolve(parent); + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [parent], + }), + ); + } + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note, { + strategies: ["reply-tree", "context-auto"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object.id?.href, parentId.href); + strictEqual(items[0].strategy, "reply-tree"); + strictEqual(items[0].origin, "in-reply-to"); + }); + + test("context auto preserves strategy order across reply tree", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const parentId = new URL("https://example.com/notes/1"); + const parent = new Note({ + id: parentId, + content: "parent", + }); + const note = new Note({ + id: new URL("https://example.com/notes/2"), + contexts: [contextId], + replyTarget: parentId, + }); + const context: BackfillContext = { + documentLoader: (iri) => { + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [parent], + }), + ); + } + if (iri.href === parentId.href) return Promise.resolve(parent); + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note, { + strategies: ["context-objects", "reply-tree", "context-auto"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object.id?.href, parentId.href); + strictEqual(items[0].strategy, "context-objects"); + strictEqual(items[0].origin, "collection"); + }); + + test("reply tree yields embedded descendants", async () => { + const reply = new Note({ + id: new URL("https://example.com/notes/2"), + content: "reply", + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + replies: new Collection({ + id: new URL("https://example.com/notes/1/replies"), + items: [reply], + }), + }); + const context: BackfillContext = { + documentLoader: () => { + throw new Error("documentLoader should not be called"); + }, + }; + + const items = await collect(context, note, { + strategies: ["reply-tree"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, reply); + deepStrictEqual(items[0].id, reply.id); + strictEqual(items[0].strategy, "reply-tree"); + strictEqual(items[0].origin, "replies"); + strictEqual(items[0].depth, 1); + }); + + test("reply tree walks sibling descendants from discovered ancestor", async () => { + const seedId = new URL("https://example.com/notes/2"); + const sibling = new Note({ + id: new URL("https://example.com/notes/3"), + content: "sibling", + }); + const parent = new Note({ + id: new URL("https://example.com/notes/1"), + content: "parent", + replies: new Collection({ + id: new URL("https://example.com/notes/1/replies"), + items: [seedId, sibling], + }), + }); + const note = new Note({ + id: seedId, + replyTarget: parent, + }); + const context: BackfillContext = { + documentLoader: () => { + throw new Error("documentLoader should not be called"); + }, + }; + + const items = await collect(context, note, { + strategies: ["reply-tree"], + }); + + strictEqual(items.length, 2); + strictEqual(items[0].object, parent); + strictEqual(items[0].origin, "in-reply-to"); + strictEqual(items[0].depth, 1); + strictEqual(items[1].object, sibling); + strictEqual(items[1].origin, "replies"); + strictEqual(items[1].depth, 2); + }); + + test("reply tree maxDepth applies from seed through ancestors", async () => { + const seedId = new URL("https://example.com/notes/2"); + const sibling = new Note({ + id: new URL("https://example.com/notes/3"), + content: "sibling", + }); + const parent = new Note({ + id: new URL("https://example.com/notes/1"), + content: "parent", + replies: new Collection({ + id: new URL("https://example.com/notes/1/replies"), + items: [seedId, sibling], + }), + }); + const note = new Note({ + id: seedId, + replyTarget: parent, + }); + const context: BackfillContext = { + documentLoader: () => { + throw new Error("documentLoader should not be called"); + }, + }; + + const items = await collect(context, note, { + strategies: ["reply-tree"], + maxDepth: 1, + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, parent); + strictEqual(items[0].depth, 1); + }); + + test("reply tree dereferences replies collection URL", async () => { + const repliesId = new URL("https://example.com/notes/1/replies"); + const reply = new Note({ + id: new URL("https://example.com/notes/2"), + content: "reply", + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + replies: repliesId, + }); + const context: BackfillContext = { + documentLoader: (iri) => + Promise.resolve( + iri.href === repliesId.href + ? new Collection({ + id: repliesId, + items: [reply], + }) + : null, + ), + }; + + const items = await collect(context, note, { + strategies: ["reply-tree"], + }); + + strictEqual(items.length, 1); + deepStrictEqual(items[0].object.id, reply.id); + strictEqual(items[0].origin, "replies"); + strictEqual(items[0].depth, 1); + }); + + test("reply tree maxDepth limits descendants", async () => { + const grandchild = new Note({ + id: new URL("https://example.com/notes/3"), + content: "grandchild", + }); + const reply = new Note({ + id: new URL("https://example.com/notes/2"), + content: "reply", + replies: new Collection({ + id: new URL("https://example.com/notes/2/replies"), + items: [grandchild], + }), + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + replies: new Collection({ + id: new URL("https://example.com/notes/1/replies"), + items: [reply], + }), + }); + const context: BackfillContext = { + documentLoader: () => { + throw new Error("documentLoader should not be called"); + }, + }; + + const items = await collect(context, note, { + strategies: ["reply-tree"], + maxDepth: 1, + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, reply); + strictEqual(items[0].depth, 1); + }); + + test("reply tree defaults maxDepth to 10 for descendants", async () => { + let note = new Note({ + id: new URL("https://example.com/notes/12"), + }); + for (let i = 11; i >= 0; i--) { + note = new Note({ + id: new URL(`https://example.com/notes/${i}`), + replies: new Collection({ + id: new URL(`https://example.com/notes/${i}/replies`), + items: [note], + }), + }); + } + const context: BackfillContext = { + documentLoader: () => { + throw new Error("documentLoader should not be called"); + }, + }; + + const items = await collect(context, note, { + strategies: ["reply-tree"], + }); + + strictEqual(items.length, 10); + strictEqual(items.at(-1)?.depth, 10); + }); + + test("maxRequests limits reply tree replies dereferencing", async () => { + const repliesId = new URL("https://example.com/notes/1/replies"); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + replies: repliesId, + }); + const context: BackfillContext = { + documentLoader: () => { + throw new Error("documentLoader should not be called"); + }, + }; + + deepStrictEqual( + await collect(context, note, { + strategies: ["reply-tree"], + maxRequests: 0, + }), + [], + ); + }); + + test("reply tree does not reload visited replies collection URL", async () => { + const repliesId = new URL("https://example.com/notes/1/replies"); + const reply = new Note({ + id: new URL("https://example.com/notes/2"), + content: "reply", + replies: repliesId, + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + replies: repliesId, + }); + let requests = 0; + const context: BackfillContext = { + documentLoader: (iri) => { + requests++; + strictEqual(iri.href, repliesId.href); + return Promise.resolve( + new Collection({ + id: repliesId, + items: [reply], + }), + ); + }, + }; + + const items = await collect(context, note, { + strategies: ["reply-tree"], + }); + + strictEqual(requests, 1); + strictEqual(items.length, 1); + strictEqual(items[0].object.id?.href, reply.id?.href); + }); + + test("reply tree retries a replies collection after load failure", async () => { + const repliesId = new URL("https://example.com/replies/shared"); + const grandchild = new Note({ + id: new URL("https://example.com/notes/4"), + content: "grandchild", + }); + const first = new Note({ + id: new URL("https://example.com/notes/2"), + replies: repliesId, + }); + const second = new Note({ + id: new URL("https://example.com/notes/3"), + replies: repliesId, + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + replies: new Collection({ + id: new URL("https://example.com/notes/1/replies"), + items: [first, second], + }), + }); + let requests = 0; + const context: BackfillContext = { + documentLoader: (iri) => { + strictEqual(iri.href, repliesId.href); + requests++; + if (requests === 1) throw new Error("temporary failure"); + return Promise.resolve( + new Collection({ + id: repliesId, + items: [grandchild], + }), + ); + }, + }; + + const items = await collect(context, note, { + strategies: ["reply-tree"], + }); + + strictEqual(requests, 2); + deepStrictEqual( + items.map((item) => item.object.id?.href), + [first.id?.href, second.id?.href, grandchild.id?.href], + ); + strictEqual(items[2].depth, 2); + }); + + test("reply tree skips visited reply IRIs before dereferencing", async () => { + const seedId = new URL("https://example.com/notes/1"); + const siblingId = new URL("https://example.com/notes/2"); + const sibling = new Note({ + id: siblingId, + content: "sibling", + }); + const note = new Note({ + id: seedId, + replies: new Collection({ + id: new URL("https://example.com/notes/1/replies"), + items: [seedId, siblingId], + }), + }); + const requests: string[] = []; + const context: BackfillContext = { + documentLoader: (iri) => { + requests.push(iri.href); + if (iri.href === siblingId.href) return Promise.resolve(sibling); + if (iri.href === seedId.href) { + throw new Error("seed should have been skipped"); + } + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note, { + strategies: ["reply-tree"], + maxRequests: 1, + }); + + deepStrictEqual(requests, [siblingId.href]); + strictEqual(items.length, 1); + strictEqual(items[0].object.id?.href, siblingId.href); + }); + + test("reply tree avoids descendant cycles", async () => { + const seedId = new URL("https://example.com/notes/1"); + const replyId = new URL("https://example.com/notes/2"); + const note = new Note({ + id: seedId, + }); + const reply = new Note({ + id: replyId, + replies: new Collection({ + id: new URL("https://example.com/notes/2/replies"), + items: [note], + }), + }); + const seed = note.clone({ + replies: new Collection({ + id: new URL("https://example.com/notes/1/replies"), + items: [reply], + }), + }); + const context: BackfillContext = { + documentLoader: () => { + throw new Error("documentLoader should not be called"); + }, + }; + + const items = await collect(context, seed, { + strategies: ["reply-tree"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, reply); + }); + + test("context auto overrides overlapping context strategies", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ content: "anonymous" }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [item], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-objects", "context-auto", "reply-tree"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, item); + strictEqual(items[0].strategy, "context-auto"); + }); + + test("duplicate strategies are ignored", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ content: "anonymous" }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [item], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-objects", "context-objects"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, item); + strictEqual(items[0].strategy, "context-objects"); + }); + + test("context activity collection yields object from embedded Create", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ + id: new URL("https://example.com/notes/2"), + content: "hello", + }); + const activity = new Create({ + id: new URL("https://example.com/activities/1"), + object: item, + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [activity], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-activities"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, item); + strictEqual(items[0].id?.href, item.id?.href); + strictEqual(items[0].strategy, "context-activities"); + strictEqual(items[0].origin, "collection"); + }); + + test("combined context strategies yield posts and activity objects", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const post = new Note({ + id: new URL("https://example.com/notes/2"), + }); + const activityObject = new Note({ + id: new URL("https://example.com/notes/3"), + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [ + post, + new Create({ + id: new URL("https://example.com/activities/1"), + object: activityObject, + }), + ], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-objects", "context-activities"], + }); + + strictEqual(items.length, 2); + strictEqual(items[0].object, post); + strictEqual(items[0].strategy, "context-objects"); + strictEqual(items[1].object, activityObject); + strictEqual(items[1].strategy, "context-activities"); + }); + + test("combined context strategies share context collection loading", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const post = new Note({ + id: new URL("https://example.com/notes/2"), + content: "hello", + }); + const activityObject = new Note({ + id: new URL("https://example.com/notes/3"), + content: "activity object", + }); + const activity = new Create({ + id: new URL("https://example.com/activities/1"), + object: activityObject, + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + let requests = 0; + const context: BackfillContext = { + documentLoader: (iri) => { + requests++; + strictEqual(iri.href, contextId.href); + return Promise.resolve( + new Collection({ + id: contextId, + items: [post, activity], + }), + ); + }, + }; + + const items = await collect(context, note, { + strategies: ["context-objects", "context-activities"], + }); + + strictEqual(requests, 1); + strictEqual(items.length, 2); + strictEqual(items[0].object, post); + strictEqual(items[1].object, activityObject); + }); + + test("context activity collection dereferences activity object URL", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const itemId = new URL("https://example.com/notes/2"); + const item = new Note({ id: itemId, content: "hello" }); + const activity = new Create({ + id: new URL("https://example.com/activities/1"), + object: itemId, + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const requests: URL[] = []; + const context: BackfillContext = { + documentLoader: (iri) => { + requests.push(iri); + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [activity], + }), + ); + } + if (iri.href === itemId.href) return Promise.resolve(item); + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note, { + strategies: ["context-activities"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object.id?.href, item.id?.href); + deepStrictEqual(requests.map((url) => url.href), [ + contextId.href, + itemId.href, + ]); + }); + + test("context activity collection dereferences activity URL", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const activityId = new URL("https://example.com/activities/1"); + const item = new Note({ + id: new URL("https://example.com/notes/2"), + content: "hello", + }); + const activity = new Create({ id: activityId, object: item }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const requests: URL[] = []; + const context: BackfillContext = { + documentLoader: (iri) => { + requests.push(iri); + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [activityId], + }), + ); + } + if (iri.href === activityId.href) return Promise.resolve(activity); + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note, { + strategies: ["context-activities"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object.id?.href, item.id?.href); + deepStrictEqual(requests.map((url) => url.href), [ + contextId.href, + activityId.href, + ]); + }); + + test("context activity collection deduplicates by extracted object ID", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const itemId = new URL("https://example.com/notes/2"); + const first = new Create({ + id: new URL("https://example.com/activities/1"), + object: new Note({ id: itemId, content: "first" }), + }); + const second = new Create({ + id: new URL("https://example.com/activities/2"), + object: new Note({ id: itemId, content: "second" }), + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [first, second], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-activities"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].id?.href, itemId.href); + }); + + test("context activity collection skips missing object", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const activity = new Create({ + id: new URL("https://example.com/activities/1"), + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [activity], + }), + ), + }; + + deepStrictEqual( + await collect(context, note, { strategies: ["context-activities"] }), + [], + ); + }); + + test("context activity collection skips unsupported activity type", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const item = new Note({ id: new URL("https://example.com/notes/2") }); + const activity = new Announce({ + id: new URL("https://example.com/activities/1"), + object: item, + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [activity], + }), + ), + }; + + deepStrictEqual( + await collect(context, note, { strategies: ["context-activities"] }), + [], + ); + }); + + test("maxRequests limits activity object dereferencing", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const activityId = new URL("https://example.com/activities/1"); + const itemId = new URL("https://example.com/notes/2"); + const activity = new Create({ id: activityId, object: itemId }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const requests: URL[] = []; + const context: BackfillContext = { + documentLoader: (iri) => { + requests.push(iri); + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [activityId], + }), + ); + } + if (iri.href === activityId.href) return Promise.resolve(activity); + if (iri.href === itemId.href) { + return Promise.resolve( + new Note({ + id: itemId, + }), + ); + } + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note, { + maxRequests: 2, + strategies: ["context-activities"], + }); + + deepStrictEqual(items, []); + deepStrictEqual(requests.map((url) => url.href), [ + contextId.href, + activityId.href, + ]); + }); + + test("maxItems limits context activity items", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const first = new Note({ id: new URL("https://example.com/notes/2") }); + const second = new Note({ id: new URL("https://example.com/notes/3") }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [ + new Create({ + id: new URL("https://example.com/activities/1"), + object: first, + }), + new Create({ + id: new URL("https://example.com/activities/2"), + object: second, + }), + ], + }), + ), + }; + + const items = await collect(context, note, { + maxItems: 1, + strategies: ["context-activities"], + }); + + strictEqual(items.length, 1); + strictEqual(items[0].id?.href, first.id?.href); + }); + + test("context collection with URL items loads and yields objects", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const itemId = new URL("https://example.com/notes/2"); + const item = new Note({ + id: itemId, + content: "hello", + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const requests: URL[] = []; + const context: BackfillContext = { + documentLoader: (iri) => { + requests.push(iri); + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [itemId], + }), + ); + } + if (iri.href === itemId.href) return Promise.resolve(item); + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note); + + strictEqual(items.length, 1); + ok(items[0].id instanceof URL); + strictEqual(items[0].id.href, itemId.href); + deepStrictEqual(requests.map((url) => url.href), [ + contextId.href, + itemId.href, + ]); + }); + + test("seen context collection URL items are not loaded", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const seedId = new URL("https://example.com/notes/1"); + const itemId = new URL("https://example.com/notes/2"); + const item = new Note({ + id: itemId, + content: "hello", + }); + const note = new Note({ + id: seedId, + contexts: [contextId], + }); + const requests: URL[] = []; + const context: BackfillContext = { + documentLoader: (iri) => { + requests.push(iri); + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [seedId, itemId], + }), + ); + } + if (iri.href === itemId.href) return Promise.resolve(item); + throw new Error("seen collection item should not be loaded"); + }, + }; + + const items = await collect(context, note); + + strictEqual(items.length, 1); + strictEqual(items[0].id?.href, itemId.href); + deepStrictEqual(requests.map((url) => url.href), [ + contextId.href, + itemId.href, + ]); + }); + + test("failed URL collection items are skipped", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const missingItemId = new URL("https://example.com/notes/missing"); + const failedItemId = new URL("https://example.com/notes/failed"); + const itemId = new URL("https://example.com/notes/2"); + const item = new Note({ + id: itemId, + content: "hello", + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: (iri) => { + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [missingItemId, failedItemId, itemId], + }), + ); + } + if (iri.href === missingItemId.href) return Promise.resolve(null); + if (iri.href === failedItemId.href) { + return Promise.reject(new Error("failed to load")); + } + if (iri.href === itemId.href) return Promise.resolve(item); + return Promise.resolve(null); + }, + }; + + const items = await collect(context, note); + + strictEqual(items.length, 1); + strictEqual(items[0].id?.href, itemId.href); + }); + + test("seed is not yielded again when present in collection", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const other = new Note({ + id: new URL("https://example.com/notes/2"), + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [note, other], + }), + ), + }; + + const items = await collect(context, note); + + strictEqual(items.length, 1); + strictEqual(items[0].object, other); + }); + + test("duplicate object IDs are skipped", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const duplicateId = new URL("https://example.com/notes/2"); + const first = new Note({ id: duplicateId, content: "first" }); + const second = new Note({ id: duplicateId, content: "second" }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [first, second], + }), + ), + }; + + const items = await collect(context, note); + + strictEqual(items.length, 1); + strictEqual(items[0].object, first); + }); + + test("maxItems limits yielded items", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [ + new Note({ id: new URL("https://example.com/notes/2") }), + new Note({ id: new URL("https://example.com/notes/3") }), + ], + }), + ), + }; + + const items = await collect(context, note, { maxItems: 1 }); + + strictEqual(items.length, 1); + strictEqual(items[0].id?.href, "https://example.com/notes/2"); + }); + + test("maxItems is shared across context and reply tree", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const reply = new Note({ + id: new URL("https://example.com/notes/3"), + content: "reply", + }); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + replies: new Collection({ + id: new URL("https://example.com/notes/1/replies"), + items: [reply], + }), + }); + const contextItem = new Note({ + id: new URL("https://example.com/notes/2"), + content: "context item", + }); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [contextItem], + }), + ), + }; + + const items = await collect(context, note, { + strategies: ["context-auto", "reply-tree"], + maxItems: 1, + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, contextItem); + strictEqual(items[0].strategy, "context-auto"); + }); + + test("maxRequests limits dereferencing", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const itemId = new URL("https://example.com/notes/2"); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const context: BackfillContext = { + documentLoader: (iri) => { + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [itemId], + }), + ); + } + return Promise.resolve(new Note({ id: iri })); + }, + }; + + deepStrictEqual(await collect(context, note, { maxRequests: 1 }), []); + }); + + test("maxRequests is shared across context and reply tree", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const parentId = new URL("https://example.com/notes/0"); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + replyTarget: parentId, + }); + const contextItem = new Note({ + id: new URL("https://example.com/notes/2"), + content: "context item", + }); + const context: BackfillContext = { + documentLoader: (iri) => { + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [contextItem], + }), + ); + } + throw new Error("reply-tree request should be budgeted out"); + }, + }; + + const items = await collect(context, note, { + strategies: ["context-auto", "reply-tree"], + maxRequests: 1, + }); + + strictEqual(items.length, 1); + strictEqual(items[0].object, contextItem); + }); + + test("AbortSignal stops traversal", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const controller = new AbortController(); + controller.abort(); + const context: BackfillContext = { + documentLoader: () => + Promise.resolve( + new Collection({ + id: contextId, + items: [new Note({ id: new URL("https://example.com/notes/2") })], + }), + ), + }; + + await rejects( + collect(context, note, { signal: controller.signal }), + { name: "AbortError" }, + ); + }); + + test("AbortSignal stops traversal across strategies", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const parentId = new URL("https://example.com/notes/0"); + const controller = new AbortController(); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + replyTarget: parentId, + }); + const contextItem = new Note({ + id: new URL("https://example.com/notes/2"), + content: "context item", + }); + let requests = 0; + const context: BackfillContext = { + documentLoader: (iri) => { + requests++; + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [contextItem], + }), + ); + } + throw new Error("reply-tree request should not be started"); + }, + }; + + const items: Awaited> = []; + await rejects( + async () => { + for await ( + const item of backfill(context, note, { + strategies: ["context-auto", "reply-tree"], + signal: controller.signal, + }) + ) { + items.push(item); + controller.abort(); + } + }, + { name: "AbortError" }, + ); + + strictEqual(requests, 1); + strictEqual(items.length, 1); + strictEqual(items[0].object, contextItem); + }); + + test("documentLoader receives AbortSignal", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const controller = new AbortController(); + let receivedSignal: AbortSignal | undefined; + const context: BackfillContext = { + documentLoader: (_iri, options) => { + receivedSignal = options?.signal; + return Promise.resolve(new Collection({ id: contextId, items: [] })); + }, + }; + + await collect(context, note, { signal: controller.signal }); + + strictEqual(receivedSignal, controller.signal); + }); + + test("interval callback receives zero-based request index", async () => { + const contextId = new URL("https://example.com/contexts/1"); + const itemId = new URL("https://example.com/notes/2"); + const note = new Note({ + id: new URL("https://example.com/notes/1"), + contexts: [contextId], + }); + const iterations: number[] = []; + const context: BackfillContext = { + documentLoader: (iri) => { + if (iri.href === contextId.href) { + return Promise.resolve( + new Collection({ + id: contextId, + items: [itemId], + }), + ); + } + return Promise.resolve(new Note({ id: iri })); + }, + }; + + await collect(context, note, { + interval: (iteration) => { + iterations.push(iteration); + return { milliseconds: 0 }; + }, + }); + + deepStrictEqual(iterations, [0, 1]); + }); +}); diff --git a/packages/backfill/src/backfill.ts b/packages/backfill/src/backfill.ts new file mode 100644 index 000000000..a836f4c4a --- /dev/null +++ b/packages/backfill/src/backfill.ts @@ -0,0 +1,704 @@ +import { + Activity, + Collection, + CollectionPage, + Create, + type Link, + Object as APObject, + OrderedCollection, + OrderedCollectionPage, +} from "@fedify/vocab"; + +import type { + BackfillContext, + BackfillItem, + BackfillOptions, + BackfillOrigin, + BackfillStrategy, +} from "./types.ts"; + +const defaultStrategies = [ + "context-auto", +] as const satisfies readonly BackfillStrategy[]; + +const DEFAULT_MAX_DEPTH = 10; + +/** + * Thrown when backfill traversal exceeds the configured request budget. + * + * @since 2.3.0 + */ +export class MaxRequestsExceeded extends Error {} + +interface RequestBudget { + readonly signal?: AbortSignal; + requestCount: number; + readonly documents: Map>; +} + +type StrategyItem = { + readonly object: APObject; + readonly strategy: BackfillStrategy; + readonly origin: BackfillOrigin; + readonly depth: number; +}; + +type ReplyTreeTraversal = { + readonly depth: number; + readonly visitedObjectIds: Set; + readonly visitedObjects: WeakSet; + readonly visitedCollectionIds: Set; + readonly visitedCollections: WeakSet; +}; + +/** + * Backfills post-like objects related to a seed object. + * + * The seed object is not yielded by default, but its ID is treated as already + * seen so it will not be yielded again if the collection contains it. + * + * @since 2.3.0 + */ +export async function* backfill< + TObject extends APObject = APObject, +>( + context: BackfillContext, + note: TObject, + options: BackfillOptions = {}, +): AsyncGenerator, void, void> { + if (options.maxItems != null && options.maxItems <= 0) return; + const strategies = normalizeStrategies(options.strategies); + if (strategies.length < 1) return; + + const budget: RequestBudget = { + signal: options.signal, + requestCount: 0, + documents: new Map(), + }; + const seenIds = new Set(); + if (note.id != null) seenIds.add(note.id.href); + + let yielded = 0; + try { + for (let i = 0; i < strategies.length; i++) { + const strategy = strategies[i]; + let items: AsyncIterable; + if (isContextStrategy(strategy)) { + const contextStrategies: Exclude[] = [ + strategy, + ]; + while (true) { + const nextStrategy = strategies[i + 1]; + if (nextStrategy == null || !isContextStrategy(nextStrategy)) break; + contextStrategies.push(nextStrategy); + i++; + } + items = getContextStrategyItems( + context, + note, + contextStrategies, + options, + budget, + seenIds, + ); + } else { + items = getStrategyItems( + context, + note, + strategy, + options, + budget, + seenIds, + ); + } + + for await (const item of items) { + const id = item.object.id ?? undefined; + if (id != null) { + if (seenIds.has(id.href)) continue; + seenIds.add(id.href); + } + + options.signal?.throwIfAborted(); + yield { + object: item.object as TObject, + id, + strategy: item.strategy, + origin: item.origin, + depth: item.depth, + }; + + yielded++; + if (options.maxItems != null && yielded >= options.maxItems) return; + } + } + } catch (error) { + if (error instanceof MaxRequestsExceeded) return; + throw error; + } +} + +function normalizeStrategies( + strategies: readonly BackfillStrategy[] = defaultStrategies, +): readonly BackfillStrategy[] { + const normalized: BackfillStrategy[] = []; + for (const strategy of strategies) { + if (strategy === "context-auto") { + for ( + let i = normalized.length - 1; + i >= 0 && isContextStrategy(normalized[i]); + i-- + ) { + normalized.splice(i, 1); + } + if (!normalized.includes(strategy)) normalized.push(strategy); + } else if (isContextStrategy(strategy)) { + if ( + !currentContextGroupHasAuto(normalized) && + !normalized.includes(strategy) + ) { + normalized.push(strategy); + } + } else if (!normalized.includes(strategy)) { + normalized.push(strategy); + } + } + return normalized; +} + +function isContextStrategy( + strategy: BackfillStrategy, +): strategy is Exclude { + return strategy === "context-objects" || + strategy === "context-activities" || + strategy === "context-auto"; +} + +function currentContextGroupHasAuto( + strategies: readonly BackfillStrategy[], +): boolean { + for (let i = strategies.length - 1; i >= 0; i--) { + const strategy = strategies[i]; + if (!isContextStrategy(strategy)) return false; + if (strategy === "context-auto") return true; + } + return false; +} + +async function* getContextStrategyItems( + context: BackfillContext, + note: APObject, + strategies: readonly Exclude[], + options: BackfillOptions, + budget: RequestBudget, + seenIds: ReadonlySet, +): AsyncIterable<{ + readonly object: APObject; + readonly strategy: Exclude; + readonly origin: "collection"; + readonly depth: 0; +}> { + const contextId = note.contextIds[0]; + if (contextId == null) return; + const collection = await loadObject(context, contextId, options, budget); + if (!isCollection(collection)) return; + for await ( + const object of getCollectionItems( + context, + collection, + options, + budget, + seenIds, + ) + ) { + for (const strategy of strategies) { + for await ( + const item of getContextBackfillItems( + context, + object, + strategy, + options, + budget, + ) + ) { + yield { + object: item.object, + strategy: item.strategy, + origin: "collection", + depth: 0, + }; + } + } + } +} + +async function* getStrategyItems( + context: BackfillContext, + note: APObject, + strategy: BackfillStrategy, + options: BackfillOptions, + budget: RequestBudget, + seenIds: ReadonlySet, +): AsyncIterable<{ + readonly object: APObject; + readonly strategy: BackfillStrategy; + readonly origin: BackfillOrigin; + readonly depth: number; +}> { + if (isContextStrategy(strategy)) { + yield* getContextStrategyItems( + context, + note, + [strategy], + options, + budget, + seenIds, + ); + } else if (strategy === "reply-tree") { + yield* getReplyTreeItems(context, note, options, budget); + } +} + +async function* getReplyTreeItems( + context: BackfillContext, + note: APObject, + options: BackfillOptions, + budget: RequestBudget, +): AsyncIterable<{ + readonly object: APObject; + readonly strategy: "reply-tree"; + readonly origin: "in-reply-to" | "replies"; + readonly depth: number; +}> { + const visitedObjectIds = new Set(); + const visitedObjects = new WeakSet(); + const visitedCollectionIds = new Set(); + const visitedCollections = new WeakSet(); + if (note.id != null) visitedObjectIds.add(note.id.href); + visitedObjects.add(note); + const ancestors: Array<{ + readonly object: APObject; + readonly depth: number; + }> = []; + for await ( + const item of getReplyAncestors(context, note, options, budget, { + depth: 1, + visitedObjectIds, + visitedObjects, + visitedCollectionIds, + visitedCollections, + }) + ) { + ancestors.push({ object: item.object, depth: item.depth }); + yield item; + } + for (const ancestor of ancestors.toReversed()) { + yield* getReplyDescendants(context, ancestor.object, options, budget, { + depth: ancestor.depth + 1, + visitedObjectIds, + visitedObjects, + visitedCollectionIds, + visitedCollections, + }); + } + yield* getReplyDescendants(context, note, options, budget, { + depth: 1, + visitedObjectIds, + visitedObjects, + visitedCollectionIds, + visitedCollections, + }); +} + +async function* getReplyAncestors( + context: BackfillContext, + object: APObject, + options: BackfillOptions, + budget: RequestBudget, + traversal: ReplyTreeTraversal, +): AsyncIterable<{ + readonly object: APObject; + readonly strategy: "reply-tree"; + readonly origin: "in-reply-to"; + readonly depth: number; +}> { + if (traversal.depth > (options.maxDepth ?? DEFAULT_MAX_DEPTH)) return; + for await ( + const target of getReplyTargets(context, object, options, budget) + ) { + if (!isContextPostObject(target)) continue; + if (!visitReplyTreeObject(target, traversal)) continue; + yield { + object: target, + strategy: "reply-tree", + origin: "in-reply-to", + depth: traversal.depth, + }; + yield* getReplyAncestors(context, target, options, budget, { + depth: traversal.depth + 1, + visitedObjectIds: traversal.visitedObjectIds, + visitedObjects: traversal.visitedObjects, + visitedCollectionIds: traversal.visitedCollectionIds, + visitedCollections: traversal.visitedCollections, + }); + } +} + +async function* getReplyDescendants( + context: BackfillContext, + object: APObject, + options: BackfillOptions, + budget: RequestBudget, + traversal: ReplyTreeTraversal, +): AsyncIterable<{ + readonly object: APObject; + readonly strategy: "reply-tree"; + readonly origin: "replies"; + readonly depth: number; +}> { + if (traversal.depth > (options.maxDepth ?? DEFAULT_MAX_DEPTH)) return; + const repliesId = object.repliesId; + if ( + repliesId != null && + traversal.visitedCollectionIds.has(repliesId.href) + ) { + return; + } + const replies = await getRepliesCollection(context, object, options, budget); + if (replies == null) return; + const unvisited = visitReplyTreeCollection(replies, traversal); + if (repliesId != null) traversal.visitedCollectionIds.add(repliesId.href); + if (!unvisited) return; + for await ( + const reply of getCollectionItems( + context, + replies, + options, + budget, + traversal.visitedObjectIds, + ) + ) { + if (!isContextPostObject(reply)) continue; + if (!visitReplyTreeObject(reply, traversal)) continue; + yield { + object: reply, + strategy: "reply-tree", + origin: "replies", + depth: traversal.depth, + }; + yield* getReplyDescendants(context, reply, options, budget, { + depth: traversal.depth + 1, + visitedObjectIds: traversal.visitedObjectIds, + visitedObjects: traversal.visitedObjects, + visitedCollectionIds: traversal.visitedCollectionIds, + visitedCollections: traversal.visitedCollections, + }); + } +} + +async function* getReplyTargets( + context: BackfillContext, + object: APObject, + options: BackfillOptions, + budget: RequestBudget, +): AsyncIterable { + try { + yield* object.getReplyTargets({ + documentLoader: async (url) => { + return await loadCollectionItemDocument(context, url, options, budget); + }, + crossOrigin: "trust", + }); + } catch (error) { + if (error instanceof MaxRequestsExceeded) throw error; + budget.signal?.throwIfAborted(); + } +} + +async function getRepliesCollection( + context: BackfillContext, + object: APObject, + options: BackfillOptions, + budget: RequestBudget, +): Promise { + try { + return await object.getReplies({ + documentLoader: async (url) => { + return await loadCollectionItemDocument(context, url, options, budget); + }, + crossOrigin: "trust", + }); + } catch (error) { + if (error instanceof MaxRequestsExceeded) throw error; + budget.signal?.throwIfAborted(); + return null; + } +} + +function visitReplyTreeObject( + object: APObject, + traversal: ReplyTreeTraversal, +): boolean { + if (object.id != null) { + if (traversal.visitedObjectIds.has(object.id.href)) return false; + traversal.visitedObjectIds.add(object.id.href); + } else { + if (traversal.visitedObjects.has(object)) return false; + } + traversal.visitedObjects.add(object); + return true; +} + +function visitReplyTreeCollection( + collection: BackfillCollection, + traversal: ReplyTreeTraversal, +): boolean { + if (collection.id != null) { + return visitReplyTreeCollectionId(collection.id, traversal); + } else { + if (traversal.visitedCollections.has(collection)) return false; + } + traversal.visitedCollections.add(collection); + return true; +} + +function visitReplyTreeCollectionId( + id: URL, + traversal: ReplyTreeTraversal, +): boolean { + if (traversal.visitedCollectionIds.has(id.href)) return false; + traversal.visitedCollectionIds.add(id.href); + return true; +} + +async function* getContextBackfillItems( + context: BackfillContext, + object: APObject | Link, + strategy: Exclude, + options: BackfillOptions, + budget: RequestBudget, +): AsyncIterable<{ + readonly object: APObject; + readonly strategy: Exclude; +}> { + if (strategy === "context-objects" && isContextPostObject(object)) { + yield { object, strategy }; + } else if (strategy === "context-activities") { + const activityObject = await getCreateActivityObject( + context, + object, + options, + budget, + ); + if (activityObject != null && isContextPostObject(activityObject)) { + yield { object: activityObject, strategy }; + } + } else if (strategy === "context-auto") { + if (object instanceof Activity) { + const activityObject = await getCreateActivityObject( + context, + object, + options, + budget, + ); + if (activityObject != null && isContextPostObject(activityObject)) { + yield { object: activityObject, strategy }; + } + } else if (isContextPostObject(object)) { + yield { object, strategy }; + } + } +} + +async function* getCollectionItems( + context: BackfillContext, + collection: BackfillCollection, + options: BackfillOptions, + budget: RequestBudget, + skipIds?: ReadonlySet, +): AsyncIterable { + yield* collection.getItems({ + documentLoader: async (url) => { + return await loadCollectionItemDocument( + context, + url, + options, + budget, + skipIds, + ); + }, + crossOrigin: "trust", + }); +} + +async function getCreateActivityObject( + context: BackfillContext, + object: APObject | Link, + options: BackfillOptions, + budget: RequestBudget, +): Promise { + if (!(object instanceof Create)) return null; + try { + return await object.getObject({ + documentLoader: async (url) => { + return await loadCollectionItemDocument(context, url, options, budget); + }, + crossOrigin: "trust", + }); + } catch (error) { + if (error instanceof MaxRequestsExceeded) throw error; + budget.signal?.throwIfAborted(); + return null; + } +} + +async function loadCollectionItemDocument( + context: BackfillContext, + url: string, + options: BackfillOptions, + budget: RequestBudget, + skipIds?: ReadonlySet, +) { + let object: APObject | null; + try { + const iri = new URL(url); + if (skipIds?.has(iri.href)) return skippedCollectionItemDocument(url); + object = await loadObject( + context, + iri, + options, + budget, + true, + ); + } catch (error) { + if (error instanceof MaxRequestsExceeded) throw error; + budget.signal?.throwIfAborted(); + return skippedCollectionItemDocument(url); + } + if (object == null) return skippedCollectionItemDocument(url); + return { + contextUrl: null, + documentUrl: url, + document: await object.toJsonLd(), + }; +} + +function skippedCollectionItemDocument(url: string) { + return { + contextUrl: null, + documentUrl: url, + document: { + "@context": "https://www.w3.org/ns/activitystreams", + type: "Activity", + }, + }; +} + +async function loadObject( + context: BackfillContext, + iri: URL, + options: BackfillOptions, + budget: RequestBudget, + throwOnBudgetExceeded = false, +): Promise { + budget.signal?.throwIfAborted(); + const cacheKey = iri.href; + const cached = budget.documents.get(cacheKey); + if (cached != null) return await cached; + + if ( + options.maxRequests != null && + budget.requestCount >= options.maxRequests + ) { + if (throwOnBudgetExceeded) throw new MaxRequestsExceeded(); + return null; + } + + await waitForInterval(options, budget); + budget.signal?.throwIfAborted(); + + budget.requestCount++; + const document = context.documentLoader(iri, { signal: budget.signal }); + budget.documents.set(cacheKey, document); + try { + return await document; + } catch (error) { + if (budget.documents.get(cacheKey) === document) { + budget.documents.delete(cacheKey); + } + throw error; + } +} + +async function waitForInterval( + options: BackfillOptions, + budget: RequestBudget, +): Promise { + if (options.interval == null) return; + const duration = typeof options.interval === "function" + ? options.interval(budget.requestCount) + : options.interval; + const milliseconds = durationToMilliseconds(duration); + if (milliseconds <= 0) return; + await new Promise((resolve, reject) => { + if (budget.signal?.aborted) { + reject(budget.signal.reason); + return; + } + const timeout = setTimeout(() => { + budget.signal?.removeEventListener("abort", onAbort); + resolve(); + }, milliseconds); + const onAbort = () => { + clearTimeout(timeout); + reject(budget.signal?.reason); + }; + budget.signal?.addEventListener("abort", onAbort, { once: true }); + }); +} + +function durationToMilliseconds( + duration: Temporal.DurationLike | string, +): number { + if (typeof duration === "string") { + if (typeof Temporal === "undefined") { + throw new TypeError( + "Temporal is not globally available; pass interval as a " + + "Temporal.DurationLike object instead of a string, or provide a " + + "Temporal polyfill.", + ); + } + return Temporal.Duration.from(duration).total({ unit: "milliseconds" }); + } + + return ( + (duration.milliseconds ?? 0) + + (duration.seconds ?? 0) * 1000 + + (duration.minutes ?? 0) * 60 * 1000 + + (duration.hours ?? 0) * 60 * 60 * 1000 + + (duration.days ?? 0) * 24 * 60 * 60 * 1000 + ); +} + +type BackfillCollection = + | Collection + | OrderedCollection + | CollectionPage + | OrderedCollectionPage; + +function isCollection( + object: APObject | null, +): object is BackfillCollection { + return object instanceof Collection || + object instanceof OrderedCollection || + object instanceof CollectionPage || + object instanceof OrderedCollectionPage; +} + +function isContextPostObject( + object: APObject | Link, +): object is APObject { + return object instanceof APObject && + !(object instanceof Activity) && + !isCollection(object); +} diff --git a/packages/backfill/src/mod.ts b/packages/backfill/src/mod.ts new file mode 100644 index 000000000..d72c3c339 --- /dev/null +++ b/packages/backfill/src/mod.ts @@ -0,0 +1,18 @@ +/** + * ActivityPub backfill support for Fedify. + * + * This package provides async generator APIs for collecting historical + * ActivityPub objects related to a seed object. + * + * @module + */ +export { backfill, MaxRequestsExceeded } from "./backfill.ts"; +export type { + BackfillContext, + BackfillDocumentLoader, + BackfillDocumentLoaderOptions, + BackfillItem, + BackfillOptions, + BackfillOrigin, + BackfillStrategy, +} from "./types.ts"; diff --git a/packages/backfill/src/types.ts b/packages/backfill/src/types.ts new file mode 100644 index 000000000..58090e698 --- /dev/null +++ b/packages/backfill/src/types.ts @@ -0,0 +1,169 @@ +import type { Object as APObject } from "@fedify/vocab"; + +/** + * Backfill traversal strategy used to discover the returned object. + * + * - `"context-objects"` yields post-like objects directly from the context + * collection. + * - `"context-activities"` yields objects extracted from supported `Create` + * activities in the context collection. + * - `"context-auto"` classifies context collection items automatically, + * handling direct post-like objects and supported `Create` activities. + * If included, it absorbs other context collection strategies. + * - `"reply-tree"` walks the reply graph through `inReplyTo` ancestors and + * `replies` descendants, yielding discovered post-like objects. + * + * @since 2.3.0 + */ +export type BackfillStrategy = + | "context-objects" + | "context-activities" + | "context-auto" + | "reply-tree"; + +/** + * Source relation that produced a backfilled object. + * + * @since 2.3.0 + */ +export type BackfillOrigin = + | "collection" + | "in-reply-to" + | "replies"; + +/** + * Options passed to {@link BackfillDocumentLoader}. + * + * @since 2.3.0 + */ +export interface BackfillDocumentLoaderOptions { + /** + * Cancellation signal for the current dereference operation. + */ + readonly signal?: AbortSignal; +} + +/** + * Dereferences an ActivityPub object or collection IRI. + * + * @since 2.3.0 + */ +export type BackfillDocumentLoader = ( + iri: URL, + options?: BackfillDocumentLoaderOptions, +) => Promise; + +/** + * Dependencies used by backfill traversal. + * + * @since 2.3.0 + */ +export interface BackfillContext { + /** + * Dereferences context collections, collection item IRIs, reply targets, + * and replies collections. + */ + readonly documentLoader: BackfillDocumentLoader; +} + +/** + * Controls backfill traversal. + * + * @since 2.3.0 + */ +export interface BackfillOptions< + TObject extends APObject = APObject, +> { + /** + * Backfill strategies to run. + * + * Strategies run in order and share request, item, abort, and deduplication + * state. If multiple strategies discover the same object ID, the first + * strategy keeps its {@link BackfillItem} metadata. + * + * Defaults to `["context-auto"]`. + * If `"context-auto"` is included, it absorbs other context collection + * strategies. + * + * @since 2.3.0 + */ + readonly strategies?: readonly BackfillStrategy[]; + + /** + * Maximum number of items to yield. Skipped duplicates do not count. + */ + readonly maxItems?: number; + + /** + * Maximum reply-tree traversal depth. + * + * Immediate `inReplyTo` targets and direct `replies` collection items have + * depth 1. Their parents or replies have depth 2, and so on. Context + * collection items are depth 0 and are not limited by this option. + * + * Defaults to 10. + */ + readonly maxDepth?: number; + + /** + * Maximum number of calls to {@link BackfillContext.documentLoader}. + * + * Dereferencing the note context, collection item IRIs, reply target IRIs, + * replies collection IRIs, and future page IRIs all count as requests across + * all strategies. Embedded objects and collections do not count. + */ + readonly maxRequests?: number; + + /** + * Delay between `documentLoader` requests. + * + * When a callback is provided, `iteration` is the zero-based request index. + */ + readonly interval?: + | Temporal.DurationLike + | string + | ((iteration: number) => Temporal.DurationLike | string); + + /** + * Cancels traversal before requests and before yields. + */ + readonly signal?: AbortSignal; +} + +/** + * A single object discovered by backfill traversal. + * + * @since 2.3.0 + */ +export interface BackfillItem< + TObject extends APObject = APObject, +> { + /** + * The discovered ActivityPub object. + */ + readonly object: TObject; + + /** + * The object's ActivityPub ID, when present. + */ + readonly id?: URL; + + /** + * The traversal strategy that produced this item. + */ + readonly strategy: BackfillStrategy; + + /** + * The source relation that produced this item. + */ + readonly origin: BackfillOrigin; + + /** + * Traversal depth. + * + * Direct context collection items are depth 0. Reply-tree items use depth + * 1 for immediate `inReplyTo` targets and direct `replies` collection items, + * depth 2 for the next level, and so on. + */ + readonly depth?: number; +} diff --git a/packages/backfill/tsdown.config.ts b/packages/backfill/tsdown.config.ts new file mode 100644 index 000000000..bd0f4b7a0 --- /dev/null +++ b/packages/backfill/tsdown.config.ts @@ -0,0 +1,32 @@ +import { glob } from "node:fs/promises"; +import { sep } from "node:path"; +import { defineConfig } from "tsdown"; + +export default [ + defineConfig({ + entry: ["src/mod.ts"], + dts: true, + format: ["esm", "cjs"], + platform: "node", + outExtensions({ format }) { + return { + js: format === "cjs" ? ".cjs" : ".js", + dts: format === "cjs" ? ".d.cts" : ".d.ts", + }; + }, + deps: { neverBundle: ["@fedify/vocab"] }, + }), + defineConfig({ + entry: (await Array.fromAsync(glob(`src/**/*.test.ts`))) + .map((f) => f.replaceAll(sep, "/")), + format: ["esm", "cjs"], + platform: "node", + outExtensions({ format }) { + return { + js: format === "cjs" ? ".cjs" : ".js", + dts: format === "cjs" ? ".d.cts" : ".d.ts", + }; + }, + deps: { neverBundle: [/^node:/, "@fedify/vocab"] }, + }), +]; diff --git a/packages/fedify/README.md b/packages/fedify/README.md index c2c5109a4..c7508c353 100644 --- a/packages/fedify/README.md +++ b/packages/fedify/README.md @@ -106,6 +106,7 @@ Here is the list of packages: | [@fedify/create](/packages/create/) | | [npm][npm:@fedify/create] | Create a new Fedify project | | [@fedify/amqp](/packages/amqp/) | [JSR][jsr:@fedify/amqp] | [npm][npm:@fedify/amqp] | AMQP/RabbitMQ driver | | [@fedify/astro](/packages/astro/) | [JSR][jsr:@fedify/astro] | [npm][npm:@fedify/astro] | Astro integration | +| [@fedify/backfill](/packages/backfill/) | [JSR][jsr:@fedify/backfill] | [npm][npm:@fedify/backfill] | ActivityPub backfill support | | [@fedify/cfworkers](/packages/cfworkers/) | [JSR][jsr:@fedify/cfworkers] | [npm][npm:@fedify/cfworkers] | Cloudflare Workers integration | | [@fedify/debugger](/packages/debugger/) | [JSR][jsr:@fedify/debugger] | [npm][npm:@fedify/debugger] | Embedded ActivityPub debug dashboard | | [@fedify/denokv](/packages/denokv/) | [JSR][jsr:@fedify/denokv] | | Deno KV integration | @@ -142,6 +143,8 @@ Here is the list of packages: [npm:@fedify/amqp]: https://www.npmjs.com/package/@fedify/amqp [jsr:@fedify/astro]: https://jsr.io/@fedify/astro [npm:@fedify/astro]: https://www.npmjs.com/package/@fedify/astro +[jsr:@fedify/backfill]: https://jsr.io/@fedify/backfill +[npm:@fedify/backfill]: https://www.npmjs.com/package/@fedify/backfill [jsr:@fedify/cfworkers]: https://jsr.io/@fedify/cfworkers [npm:@fedify/cfworkers]: https://www.npmjs.com/package/@fedify/cfworkers [jsr:@fedify/debugger]: https://jsr.io/@fedify/debugger diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 1c093afb8..b2279217a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -205,6 +205,9 @@ importers: '@fedify/astro': specifier: workspace:^ version: link:../packages/astro + '@fedify/backfill': + specifier: workspace:^ + version: link:../packages/backfill '@fedify/cfworkers': specifier: workspace:^ version: link:../packages/cfworkers @@ -898,6 +901,19 @@ importers: specifier: 'catalog:' version: 6.0.3 + packages/backfill: + dependencies: + '@fedify/vocab': + specifier: workspace:* + version: link:../vocab + devDependencies: + tsdown: + specifier: 'catalog:' + version: 0.22.0(tsx@4.21.0)(typescript@6.0.3)(unrun@0.2.34(@emnapi/core@1.10.0)(@emnapi/runtime@1.10.0)) + typescript: + specifier: 'catalog:' + version: 6.0.3 + packages/cfworkers: dependencies: '@cloudflare/workers-types': diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 433b73212..267f7841e 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -1,6 +1,7 @@ packages: - packages/amqp - packages/astro +- packages/backfill - packages/cfworkers - packages/cli - packages/debugger