diff --git a/CHANGES.md b/CHANGES.md index 04d9bc57b..79c9e77af 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -315,11 +315,22 @@ To be released. that keep `triggerSinks` allowlisting enabled. This change is published as benchmark scenario schema version 2. [[#744], [#785], [#801], [#802]] + - Added `fedify bench compare` for CI-friendly performance regression gates. + The command checks out base and head refs into temporary worktrees, starts + the benchmark target for each ref, runs the same suite, and fails when the + head regresses beyond `--max-regression` plus the measured per-run noise + band. Benchmark scenarios now run three times by default and aggregate + repeated runs with median latency/throughput and pessimistic correctness + results. This change is published as benchmark report schema version 3 + and comparison report schema version 1. [[#744], [#786], [#804]] + [#783]: https://github.com/fedify-dev/fedify/issues/783 [#784]: https://github.com/fedify-dev/fedify/issues/784 [#785]: https://github.com/fedify-dev/fedify/issues/785 +[#786]: https://github.com/fedify-dev/fedify/issues/786 [#801]: https://github.com/fedify-dev/fedify/pull/801 [#802]: https://github.com/fedify-dev/fedify/pull/802 +[#804]: https://github.com/fedify-dev/fedify/pull/804 ### @fedify/fixture diff --git a/docs/manual/benchmarking.md b/docs/manual/benchmarking.md index d20b94e0f..f792f9717 100644 --- a/docs/manual/benchmarking.md +++ b/docs/manual/benchmarking.md @@ -100,7 +100,6 @@ crypto cost is real. > types, a few options the format accepts are also not implemented yet and are > rejected up front with a clear message: > -> - `runs` greater than `1` (repeated runs). > - An `inbox` `activity` that is not a `Create` carrying an embedded `Note`; > that is, a non-`Create` `type`, a non-`Note` `object.type`, or > `embedObject: false`. @@ -262,6 +261,29 @@ Signing is kept off the send critical path, set per scenario with `signing`: (open-loop only; Poisson arrivals may still sign a few extra during the run). +### Repeated runs + +Each scenario runs three times by default. Set `runs` in `defaults` to change +the whole suite, or set `runs` on one scenario to override the default for that +scenario: + +~~~~ yaml +defaults: + runs: 5 +scenarios: +- name: ci-smoke + type: webfinger + runs: 1 + recipient: acct:alice@localhost +~~~~ + +Repeated runs are aggregated for stable CI gates. Latency and throughput +metrics use the median run, request totals and error buckets are summed, queue +depth uses the worst observed maximum, and `successRate` uses the worst run so +one bad run is not hidden by clean neighbors. The JSON report records +`runCount` for every scenario and includes per-run measurements in `runs` when +the scenario ran more than once. + ### Output Choose the format with `--format text` (default), `json`, or `markdown`; @@ -288,7 +310,80 @@ CI check. Keep CI gates on robust signals such as success rate, error counts, and gross throughput or latency floors; precise latency-percentile regression belongs in a controlled environment, not a shared CI runner. -[report schema]: https://json-schema.fedify.dev/bench/report-v2.json +[report schema]: https://json-schema.fedify.dev/bench/report-v3.json + +### Comparing two revisions + +Use `fedify bench compare` when a CI job should compare a change against a base +revision on the same runner instead of relying on an absolute threshold: + +~~~~ sh +fedify bench compare \ + --base origin/main \ + --head HEAD \ + --file scenario.yaml \ + --start-command "pnpm dev" \ + --ready-url http://127.0.0.1:3000/health \ + --max-regression 15% +~~~~ + +The command creates temporary detached worktrees for the base and head refs, +starts the target command inside each worktree, waits for `--ready-url`, then +runs the same suite from the current checkout against that target. The two +targets run sequentially, so they can use the same port. Dependencies are not +installed automatically; either prepare both refs in the job before comparing +or make `--start-command` perform the needed build/start steps. + +If `--target` is omitted, the benchmark target defaults to the origin of +`--ready-url`. Pass `--target` when readiness and benchmark traffic use +different URLs. The comparison report can be written as text, JSON, or +Markdown with the same `--format` and `--output` options; JSON validates +against the [comparison report schema]. + +`--max-regression` accepts either a ratio such as `0.15` or a percentage such +as `15%`. For each scenario, `fedify bench compare` compares performance +metrics from the scenario's `expect` block when they are latency or rate +metrics; if no such metric is present, it compares `latency.p95` and +`throughputPerSec`. A head result passes when the measured regression is +within `--max-regression` plus the observed per-run noise band. The command +exits with status 1 when the head run fails its own `expect` gate or a +comparison exceeds that allowance; configuration and orchestration failures +exit with status 2. + +Use short, broad suites in shared CI: + +~~~~ yaml +defaults: + runs: 3 + duration: 20s + warmup: 5s +scenarios: +- name: inbox-ci + type: inbox + # ... + expect: + successRate: ">= 99%" + latency.p95: "< 500ms" +~~~~ + +Use a controlled performance runner for narrower regression checks: + +~~~~ yaml +defaults: + runs: 7 + duration: 2m + warmup: 20s +scenarios: +- name: inbox-lab + type: inbox + # ... + expect: + successRate: ">= 99.9%" + latency.p95: "< 120ms" + throughputPerSec: "> 250/s" +~~~~ + +[comparison report schema]: https://json-schema.fedify.dev/bench/compare-report-v1.json ### Safety diff --git a/packages/cli/src/bench/__fixtures__/compare-reports/basic.json b/packages/cli/src/bench/__fixtures__/compare-reports/basic.json new file mode 100644 index 000000000..75f712a3f --- /dev/null +++ b/packages/cli/src/bench/__fixtures__/compare-reports/basic.json @@ -0,0 +1,83 @@ +{ + "$schema": "https://json-schema.fedify.dev/bench/compare-report-v1.json", + "schemaVersion": 1, + "tool": { "name": "@fedify/cli", "version": "2.3.0" }, + "environment": { + "runtime": "deno", + "runtimeVersion": "2.5.0", + "os": "linux", + "cpuCount": 16 + }, + "startedAt": "2026-06-04T12:00:00.000Z", + "finishedAt": "2026-06-04T12:03:00.000Z", + "suite": { "name": "Inbox regression suite", "configHash": "sha256:abc123" }, + "maxRegression": 0.15, + "base": { + "ref": "origin/main", + "report": { + "$schema": "https://json-schema.fedify.dev/bench/report-v3.json", + "schemaVersion": 3, + "tool": { "name": "@fedify/cli", "version": "2.3.0" }, + "environment": { + "runtime": "deno", + "runtimeVersion": "2.5.0", + "os": "linux", + "cpuCount": 16 + }, + "target": { + "url": "http://localhost:3000", + "fedifyVersion": "2.3.0", + "statsAvailable": true + }, + "startedAt": "2026-06-04T12:00:00.000Z", + "finishedAt": "2026-06-04T12:01:00.000Z", + "suite": { + "name": "Inbox regression suite", + "configHash": "sha256:abc123" + }, + "passed": true, + "scenarios": [] + } + }, + "head": { + "ref": "HEAD", + "report": { + "$schema": "https://json-schema.fedify.dev/bench/report-v3.json", + "schemaVersion": 3, + "tool": { "name": "@fedify/cli", "version": "2.3.0" }, + "environment": { + "runtime": "deno", + "runtimeVersion": "2.5.0", + "os": "linux", + "cpuCount": 16 + }, + "target": { + "url": "http://localhost:3000", + "fedifyVersion": "2.3.0", + "statsAvailable": true + }, + "startedAt": "2026-06-04T12:02:00.000Z", + "finishedAt": "2026-06-04T12:03:00.000Z", + "suite": { + "name": "Inbox regression suite", + "configHash": "sha256:abc123" + }, + "passed": true, + "scenarios": [] + } + }, + "comparisons": [ + { + "scenario": "inbox-shared", + "metric": "latency.p95", + "direction": "lower-is-better", + "base": 91, + "head": 94, + "regression": 0.03296703296703297, + "noiseBand": 0.02, + "allowedRegression": 0.16999999999999998, + "pass": true + } + ], + "passed": true +} diff --git a/packages/cli/src/bench/__fixtures__/reports/inbox-report.json b/packages/cli/src/bench/__fixtures__/reports/inbox-report.json index b7ca535f8..aea7c800c 100644 --- a/packages/cli/src/bench/__fixtures__/reports/inbox-report.json +++ b/packages/cli/src/bench/__fixtures__/reports/inbox-report.json @@ -1,6 +1,6 @@ { - "$schema": "https://json-schema.fedify.dev/bench/report-v2.json", - "schemaVersion": 2, + "$schema": "https://json-schema.fedify.dev/bench/report-v3.json", + "schemaVersion": 3, "tool": { "name": "@fedify/cli", "version": "2.3.0" }, "environment": { "runtime": "deno", @@ -86,7 +86,8 @@ "pass": true } ], - "passed": true + "passed": true, + "runCount": 1 } ] } diff --git a/packages/cli/src/bench/action.test.ts b/packages/cli/src/bench/action.test.ts index b58dd9733..9120f5b77 100644 --- a/packages/cli/src/bench/action.test.ts +++ b/packages/cli/src/bench/action.test.ts @@ -6,11 +6,12 @@ import test from "node:test"; import { serve } from "srvx"; import { spawnBenchmarkTarget } from "../../test/bench/fixture.ts"; import runBench, { withUserAgent } from "./action.ts"; -import type { BenchCommand } from "./command.ts"; +import type { BenchRunCommand } from "./command.ts"; -function command(overrides: Partial): BenchCommand { +function command(overrides: Partial): BenchRunCommand { return { command: "bench", + mode: "run", scenario: "", target: undefined, format: "json", @@ -19,7 +20,7 @@ function command(overrides: Partial): BenchCommand { allowUnsafeTarget: false, userAgent: "Fedify-bench-test/1.0", ...overrides, - } as BenchCommand; + } as BenchRunCommand; } async function writeSuite(content: string): Promise { @@ -160,6 +161,8 @@ test("runBench - dry run prints a plan and sends nothing", async () => { }); assert.strictEqual(code, 0); assert.match(output, /dry run/i); + assert.match(output, /runs 3/); + assert.match(output, /total duration 750ms/); assert.match(output, /\/inbox/); assert.match(output, /No benchmark load was sent/); const requests = target.requests(); @@ -170,6 +173,86 @@ test("runBench - dry run prints a plan and sends nothing", async () => { } }); +test("runBench - dry run includes repeated open-loop request volume", async () => { + const target = await spawnBenchmarkTarget(); + try { + const file = await writeSuite(`version: 1 +target: ${target.url.href} +scenarios: + - name: inbox-open + type: inbox + recipient: "http://\${{ target.host }}/users/alice" + inbox: shared + load: { rate: 2/s } + duration: 500ms +`); + let code = -1; + let output = ""; + await runBench(command({ scenario: file, dryRun: true }), { + exit: (c) => { + code = c; + }, + writeOutput: (c) => { + output = c; + return Promise.resolve(); + }, + log: () => {}, + }); + assert.strictEqual(code, 0); + assert.match(output, /runs 3/); + assert.match(output, /total duration 1500ms/); + assert.match(output, /estimated scheduled requests 3/); + const requests = target.requests(); + assert.ok(requests.some((r) => r.method === "GET")); + assert.ok(!requests.some((r) => r.method === "POST")); + } finally { + await target.close(); + } +}); + +test("runBench - repeats a scenario according to runs", async () => { + const file = await writeSuite(`version: 1 +target: http://127.0.0.1:3000 +scenarios: + - name: wf + type: webfinger + recipient: "acct:alice@example.com" + runs: 2 + load: { concurrency: 1 } + duration: 5ms +`); + let code = -1; + let output = ""; + await runBench(command({ scenario: file }), { + exit: (c) => { + code = c; + }, + writeOutput: (c) => { + output = c; + return Promise.resolve(); + }, + log: () => {}, + fetch: (input) => { + const url = new URL(input instanceof Request ? input.url : input); + if (url.pathname === "/.well-known/fedify/bench/stats") { + return Promise.resolve(new Response("not found", { status: 404 })); + } + if (url.pathname === "/.well-known/webfinger") { + return Promise.resolve( + new Response(JSON.stringify({ subject: "acct:alice@example.com" }), { + headers: { "content-type": "application/jrd+json" }, + }), + ); + } + return Promise.resolve(new Response("not found", { status: 404 })); + }, + }); + const report = JSON.parse(output); + assert.strictEqual(code, 0); + assert.strictEqual(report.scenarios[0].runCount, 2); + assert.strictEqual(report.scenarios[0].runs.length, 2); +}); + test("runBench - dry run reports inbox discovery failures and continues", async () => { const target = await spawnBenchmarkTarget(); try { @@ -729,6 +812,7 @@ scenarios: type: inbox recipient: "${new URL("/users/alice", target.url).href}" inbox: "https://shared.staging.example/inbox" + runs: 1 load: { concurrency: 2 } duration: 250ms `); @@ -848,6 +932,46 @@ scenarios: } }); +test("runBench - unsafe public inbox destination needs explicit runs", async () => { + const target = await spawnBenchmarkTarget(); + try { + const file = await writeSuite(`version: 1 +target: ${target.url.href} +scenarios: + - name: inbox-shared + type: inbox + recipient: "${new URL("/users/alice", target.url).href}" + inbox: "https://prod.example/inbox" + load: { rate: 1/s } + duration: 1ms +`); + let code = -1; + let message = ""; + await runBench( + command({ + scenario: file, + target: target.url.href, + allowUnsafeTarget: true, + advertiseHost: "127.0.0.1", + }), + { + exit: (c) => { + code = c; + }, + writeOutput: () => Promise.resolve(), + log: (m) => { + message = m; + }, + resolveTargetAddresses: resolvePublicHost, + }, + ); + assert.strictEqual(code, 2); + assert.match(message, /runs/); + } finally { + await target.close(); + } +}); + test("runBench - unsafe public inbox destination honors suite defaults", async () => { const target = await spawnBenchmarkTarget(); try { @@ -855,6 +979,7 @@ test("runBench - unsafe public inbox destination honors suite defaults", async ( target: ${target.url.href} defaults: duration: 1ms + runs: 1 load: { rate: 1/s } scenarios: - name: inbox-shared diff --git a/packages/cli/src/bench/action.ts b/packages/cli/src/bench/action.ts index afc1f6f01..401e36d00 100644 --- a/packages/cli/src/bench/action.ts +++ b/packages/cli/src/bench/action.ts @@ -4,7 +4,7 @@ import process from "node:process"; import { getContextLoader, getDocumentLoader } from "../docloader.ts"; import { describeError } from "../utils.ts"; import { buildFleet } from "./actor/fleet.ts"; -import type { BenchCommand } from "./command.ts"; +import type { BenchRunCommand } from "./command.ts"; import { type DiscoveredInbox, discoverInbox, @@ -19,6 +19,7 @@ import { buildScenarioResult, configHash, detectEnvironment, + type ScenarioMeasurement, } from "./result/build.ts"; import { probeBenchmarkMode } from "./discovery/probe.ts"; import { renderReport, type ReportFormat } from "./render/index.ts"; @@ -50,6 +51,10 @@ import { } from "./server/synthetic.ts"; import { convertUrlIfHandle } from "../webfinger/lib.ts"; +type BenchRunRuntimeCommand = BenchRunCommand & { + readonly explicitCliTarget?: boolean; +}; + /** Injectable dependencies for {@link runBench}, overridable in tests. */ export interface RunBenchDeps { /** Terminates the process with an exit code. */ @@ -65,6 +70,8 @@ export interface RunBenchDeps { readonly fetch?: typeof fetch; /** Hostname resolver used for target risk classification. */ readonly resolveTargetAddresses?: ResolveTargetAddresses; + /** Aborts in-flight benchmark work. */ + readonly signal?: AbortSignal; } /** @@ -76,7 +83,7 @@ export interface RunBenchDeps { * @param deps Injectable dependencies for testing. */ export default async function runBench( - command: BenchCommand, + command: BenchRunRuntimeCommand, deps: RunBenchDeps = {}, ): Promise { // Set the exit code rather than terminating, so cleanup (closing the fleet) @@ -90,7 +97,13 @@ export default async function runBench( // Apply the configured User-Agent to all benchmark traffic — the probe, the // stats reads, and the runners' inbox/WebFinger requests — not just the // document loader, so a target that inspects the UA sees it on every request. - const fetchImpl = withUserAgent(deps.fetch ?? fetch, command.userAgent); + const signal = deps.signal; + const fetchImpl = withUserAgent( + withAbortSignal(deps.fetch ?? fetch, signal), + command.userAgent, + ); + const explicitCliTarget = command.explicitCliTarget ?? command.target != null; + throwIfAborted(signal); // Loading, validation, and normalization failures are all user-facing // configuration errors. @@ -105,6 +118,7 @@ export default async function runBench( log(describeError(error)); return void exit(2); } + throwIfAborted(signal); // Preflight every runner so an unsupported scenario type, an option the // runner cannot honor, or a malformed `expect` assertion fails fast, before @@ -124,19 +138,22 @@ export default async function runBench( log(describeError(error)); return void exit(2); } + throwIfAborted(signal); const tier = await classifyResolvedTarget( suite.target, deps.resolveTargetAddresses, ); + throwIfAborted(signal); const probe = await probeBenchmarkMode(suite.target, fetchImpl); + throwIfAborted(signal); try { if (!command.dryRun) { assertUnsafeOverrideAllowed({ tier, benchmarkMode: probe.benchmarkMode, allowUnsafe: command.allowUnsafeTarget, - explicitCliTarget: command.target != null, + explicitCliTarget, scenarios: unsafeOverrideScenarios(validated), }); } @@ -190,7 +207,7 @@ export default async function runBench( targetOrigin: suite.target.origin, targetBenchmarkMode: probe.benchmarkMode, allowUnsafe: command.allowUnsafeTarget, - explicitCliTarget: command.target != null, + explicitCliTarget, destinationTier, defaults: validated.defaults, }); @@ -219,7 +236,7 @@ export default async function runBench( targetOrigin: suite.target.origin, targetBenchmarkMode: probe.benchmarkMode, allowUnsafe: command.allowUnsafeTarget, - explicitCliTarget: command.target != null, + explicitCliTarget, destinationTier, defaults: validated.defaults, }); @@ -287,6 +304,7 @@ export default async function runBench( let fleet: SyntheticServer | undefined; const startedAt = new Date().toISOString(); try { + throwIfAborted(signal); if ( suite.scenarios.some((scenario) => scenarioNeedsSyntheticServer(scenario, suite.scenarios) @@ -299,25 +317,36 @@ export default async function runBench( const results = []; for (let i = 0; i < suite.scenarios.length; i++) { const scenario = suite.scenarios[i]; - log(`Running scenario "${scenario.name}" (${scenario.type})…`); - const measurement = await runners[i].run({ - scenario, - scenarios: suite.scenarios, - target: suite.target, - documentLoader, - contextLoader, - allowPrivateAddress, - fleet: fleet ?? null, - advertiseHost: command.advertiseHost, - fetch: fetchImpl, - assertDestinationAllowed: (url, gateScenario) => - assertDestinationAllowed(url, gateScenario ?? scenario), - assertReadDestinationAllowed: (url, gateScenario) => - assertReadDestinationAllowed(url, gateScenario ?? scenario), - assertActorlessDestinationAllowed: (url, gateScenario) => - assertActorlessDestinationAllowed(url, gateScenario ?? scenario), - }); - results.push(buildScenarioResult(scenario, measurement)); + const measurements: ScenarioMeasurement[] = []; + for (let run = 1; run <= scenario.runs; run++) { + throwIfAborted(signal); + const suffix = scenario.runs === 1 + ? "" + : ` run ${run}/${scenario.runs}`; + log(`Running scenario "${scenario.name}" (${scenario.type})${suffix}…`); + measurements.push( + await runners[i].run({ + scenario, + scenarios: suite.scenarios, + target: suite.target, + documentLoader, + contextLoader, + allowPrivateAddress, + fleet: fleet ?? null, + advertiseHost: command.advertiseHost, + fetch: fetchImpl, + assertDestinationAllowed: (url, gateScenario) => + assertDestinationAllowed(url, gateScenario ?? scenario), + assertReadDestinationAllowed: (url, gateScenario) => + assertReadDestinationAllowed(url, gateScenario ?? scenario), + assertActorlessDestinationAllowed: (url, gateScenario) => + assertActorlessDestinationAllowed(url, gateScenario ?? scenario), + signal, + }), + ); + throwIfAborted(signal); + } + results.push(buildScenarioResult(scenario, measurements)); } const report = buildReport({ scenarios: results, @@ -392,6 +421,25 @@ export function withUserAgent( }) as typeof fetch; } +function withAbortSignal( + fetchImpl: typeof fetch, + signal: AbortSignal | undefined, +): typeof fetch { + if (signal == null) return fetchImpl; + return ((input: URL | RequestInfo, init?: RequestInit) => { + if (signal.aborted) return Promise.reject(abortReason(signal)); + return fetchImpl(input, { ...init, signal }); + }) as typeof fetch; +} + +function throwIfAborted(signal: AbortSignal | undefined): void { + if (signal?.aborted) throw abortReason(signal); +} + +function abortReason(signal: AbortSignal): unknown { + return signal.reason ?? new Error("Benchmark run aborted."); +} + async function defaultWriteOutput( content: string, outputPath: string | undefined, @@ -446,7 +494,31 @@ function describePlan(scenario: ResolvedScenario): string { const load = scenario.load.kind === "open" ? `open-loop ${scenario.load.ratePerSec}/s ${scenario.load.arrival}` : `closed-loop concurrency ${scenario.load.concurrency}`; - return `${load}, duration ${scenario.durationMs}ms, signing ${scenario.signing}`; + const totalDurationMs = scenario.durationMs * scenario.runs; + const volume = describePlannedRequestVolume(scenario); + return [ + load, + `duration ${scenario.durationMs}ms`, + `runs ${scenario.runs}`, + `total duration ${totalDurationMs}ms`, + ...(volume == null ? [] : [volume]), + `signing ${scenario.signing}`, + ].join(", "); +} + +function describePlannedRequestVolume( + scenario: ResolvedScenario, +): string | null { + if (scenario.load.kind !== "open") return null; + const estimatedRequests = scenario.load.ratePerSec * + (scenario.durationMs / 1000) * scenario.runs; + return `estimated scheduled requests ${formatPlanNumber(estimatedRequests)}`; +} + +function formatPlanNumber(value: number): string { + if (Number.isInteger(value)) return String(value); + const formatted = value.toFixed(2).replace(/\.?0+$/, ""); + return formatted === "" ? "0" : formatted; } async function describeDiscoveryPlan( @@ -656,11 +728,13 @@ function unsafeOverrideScenario( ): Parameters[0]["scenarios"][number] { const defaultDuration = defaults?.duration != null; const defaultLoad = hasExplicitLoad(defaults?.load); + const defaultRuns = defaults?.runs != null; const raw = "raw" in scenario ? scenario.raw : scenario; return { name: scenario.name, explicitDuration: raw.duration != null || defaultDuration, explicitLoad: hasExplicitLoad(raw.load) || defaultLoad, + explicitRuns: raw.runs != null || defaultRuns, }; } diff --git a/packages/cli/src/bench/command.test.ts b/packages/cli/src/bench/command.test.ts index eb3dad4fb..5d91f5698 100644 --- a/packages/cli/src/bench/command.test.ts +++ b/packages/cli/src/bench/command.test.ts @@ -11,6 +11,7 @@ test("benchCommand - scenario file only", () => { assert.ok(result.success); if (result.success) { assert.strictEqual(result.value.command, COMMAND); + if (result.value.mode !== "run") assert.fail("Expected run mode."); assert.strictEqual(result.value.scenario, FILE); assert.strictEqual(result.value.target, undefined); assert.strictEqual(result.value.format, "text"); @@ -39,6 +40,7 @@ test("benchCommand - with all options", () => { ]); assert.ok(result.success); if (result.success) { + if (result.value.mode !== "run") assert.fail("Expected run mode."); assert.strictEqual(result.value.scenario, FILE); assert.strictEqual(result.value.target, "http://localhost:3000"); assert.strictEqual(result.value.format, "json"); @@ -54,6 +56,74 @@ test("benchCommand - missing scenario file fails", () => { assert.ok(!result.success); }); +test("benchCommand - compare mode", () => { + const result = parse(benchCommand, [ + COMMAND, + "compare", + "--base", + "origin/main", + "--head", + "HEAD", + "--file", + FILE, + "--start-command", + "pnpm dev", + "--ready-url", + "http://127.0.0.1:3000/health", + "--ready-timeout", + "45s", + "--max-regression", + "15%", + "--format", + "markdown", + "--target", + "http://127.0.0.1:3000", + "--allow-unsafe-target", + "-u", + "MyAgent/1.0", + ]); + assert.ok(result.success); + if (result.success) { + assert.strictEqual(result.value.command, COMMAND); + if (result.value.mode !== "compare") { + assert.fail("Expected compare mode."); + } + assert.strictEqual(result.value.base, "origin/main"); + assert.strictEqual(result.value.head, "HEAD"); + assert.strictEqual(result.value.file, FILE); + assert.strictEqual(result.value.startCommand, "pnpm dev"); + assert.strictEqual( + result.value.readyUrl, + "http://127.0.0.1:3000/health", + ); + assert.strictEqual(result.value.readyTimeout, "45s"); + assert.strictEqual(result.value.maxRegression, "15%"); + assert.strictEqual(result.value.format, "markdown"); + assert.strictEqual(result.value.target, "http://127.0.0.1:3000"); + assert.strictEqual(result.value.allowUnsafeTarget, true); + assert.strictEqual(result.value.userAgent, "MyAgent/1.0"); + } +}); + +test("benchCommand - compare mode requires refs", () => { + const result = parse(benchCommand, [ + COMMAND, + "compare", + "--file", + FILE, + "--start-command", + "pnpm dev", + "--ready-url", + "http://127.0.0.1:3000/health", + ]); + assert.ok(!result.success); +}); + +test("benchCommand - bare compare subcommand fails", () => { + const result = parse(benchCommand, [COMMAND, "compare"]); + assert.ok(!result.success); +}); + test("benchCommand - invalid format value fails", () => { const result = parse(benchCommand, [COMMAND, FILE, "--format", "xml"]); assert.ok(!result.success); diff --git a/packages/cli/src/bench/command.ts b/packages/cli/src/bench/command.ts index 1104753b3..fb5e02d41 100644 --- a/packages/cli/src/bench/command.ts +++ b/packages/cli/src/bench/command.ts @@ -12,6 +12,7 @@ import { object, option, optional, + or, string, withDefault, } from "@optique/core"; @@ -47,52 +48,120 @@ set in a configuration file.`, false, ); -export const benchCommand = command( - "bench", +const outputOption = optional( + option("-o", "--output", string({ metavar: "OUTPUT_PATH" }), { + description: + message`Write the report to a file instead of standard output.`, + }), +); + +const targetOption = optional( + option("-t", "--target", string({ metavar: "URL" }), { + description: message`Override the target URL declared in the suite.`, + }), +); + +const advertiseHostOption = optional( + option("--advertise-host", string({ metavar: "HOST" }), { + description: message`Host (name or IP) a non-loopback target can reach the \ +benchmark's synthetic actor server at. Required for signed scenarios against a \ +non-loopback target; binds the synthetic server on all interfaces and uses this \ +host in the actor and key URLs the target dereferences.`, + }), +); + +const runParser = merge( + "Benchmark options", + object({ + command: constant("bench"), + mode: constant("run"), + scenario: group( + "Arguments", + argument(string({ metavar: "SCENARIO_FILE" }), { + description: message`Path to the benchmark suite file (YAML or JSON).`, + }), + ), + target: targetOption, + format: formatOption, + output: outputOption, + dryRun: withDefault( + flag("--dry-run", { + description: + message`Resolve discovery and print the benchmark plan without \ +sending load.`, + }), + false, + ), + advertiseHost: advertiseHostOption, + allowUnsafeTarget, + }), + userAgentOption, +); + +const compareParser = command( + "compare", merge( - "Benchmark options", + "Compare options", object({ command: constant("bench"), - scenario: group( - "Arguments", - argument(string({ metavar: "SCENARIO_FILE" }), { - description: - message`Path to the benchmark suite file (YAML or JSON).`, - }), - ), - target: optional( - option("-t", "--target", string({ metavar: "URL" }), { - description: message`Override the target URL declared in the suite.`, - }), - ), - format: formatOption, - output: optional( - option("-o", "--output", string({ metavar: "OUTPUT_PATH" }), { + mode: constant("compare"), + base: option("--base", string({ metavar: "REF" }), { + description: message`The base git ref to benchmark.`, + }), + head: option("--head", string({ metavar: "REF" }), { + description: message`The head git ref to benchmark.`, + }), + file: option("--file", string({ metavar: "SCENARIO_FILE" }), { + description: message`Path to the benchmark suite file (YAML or JSON).`, + }), + startCommand: option( + "--start-command", + string({ metavar: "COMMAND" }), + { description: - message`Write the report to a file instead of standard output.`, - }), + message`Shell command that starts the target application in each \ +checked-out worktree.`, + }, ), - dryRun: withDefault( - flag("--dry-run", { - description: - message`Resolve discovery and print the benchmark plan without \ -sending load.`, + readyUrl: option("--ready-url", string({ metavar: "URL" }), { + description: + message`URL that returns success when the started target is ready.`, + }), + readyTimeout: withDefault( + option("--ready-timeout", string({ metavar: "DURATION" }), { + description: message`How long to wait for --ready-url.`, }), - false, + "30s", ), - advertiseHost: optional( - option("--advertise-host", string({ metavar: "HOST" }), { + maxRegression: option( + "--max-regression", + string({ metavar: "PERCENT" }), + { description: - message`Host (name or IP) a non-loopback target can reach the \ -benchmark's synthetic actor server at. Required for signed scenarios against a \ -non-loopback target; binds the synthetic server on all interfaces and uses this \ -host in the actor and key URLs the target dereferences.`, - }), + message`Maximum regression tolerated after the measured noise band.`, + }, ), + target: targetOption, + format: formatOption, + output: outputOption, + dryRun: constant(false), + advertiseHost: advertiseHostOption, allowUnsafeTarget, }), userAgentOption, ), + { + brief: message`Compare base and head benchmark runs.`, + description: + message`Run the same benchmark suite against two git revisions on the \ +same runner, then fail when the head revision regresses beyond the configured \ +tolerance and measured noise band.`, + }, +); + +export const benchCommand = command( + "bench", + or(compareParser, runParser), { brief: message`Benchmark a Fedify federation workload.`, description: message`Run an ActivityPub-specific load benchmark against a \ @@ -106,3 +175,5 @@ the suite format.`, ); export type BenchCommand = InferValue; +export type BenchRunCommand = Extract; +export type BenchCompareCommand = Extract; diff --git a/packages/cli/src/bench/compare.test.ts b/packages/cli/src/bench/compare.test.ts new file mode 100644 index 000000000..0bf3317ce --- /dev/null +++ b/packages/cli/src/bench/compare.test.ts @@ -0,0 +1,1129 @@ +import assert from "node:assert/strict"; +import { Buffer } from "node:buffer"; +import type { ChildProcess, SpawnOptions } from "node:child_process"; +import { EventEmitter } from "node:events"; +import { mkdtemp, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import test from "node:test"; +import type { BenchCompareCommand } from "./command.ts"; +import { + buildCompareReport, + createBenchmarkWorktree, + parseRegressionTolerance, + runBenchCompare, + startBenchmarkTarget, + stopTargetProcess, + waitReadyUrl, + windowsTaskkillArgs, +} from "./compare.ts"; +import type { BenchReport, ScenarioResult } from "./result/model.ts"; + +type FakeChildProcess = ChildProcess & { + readonly stdout: EventEmitter; + readonly stderr: EventEmitter; + readonly exitCode: number | null; + readonly signalCode: NodeJS.Signals | null; + kill(signal?: NodeJS.Signals | number): boolean; +}; + +function scenario( + overrides: Partial & { name?: string } = {}, +): ScenarioResult { + const base: ScenarioResult = { + name: "inbox-shared", + type: "inbox", + load: { + model: "closed", + concurrency: 1, + durationMs: 100, + warmupMs: 0, + }, + requests: { total: 10, ok: 10, failed: 0, successRate: 1 }, + throughputPerSec: 100, + client: { + latencyMs: { p50: 50, p95: 100, p99: 110, mean: 60, max: 120 }, + }, + server: null, + errors: [], + expectations: [{ + metric: "latency.p95", + op: "lt", + threshold: 250, + unit: "ms", + actual: 100, + severity: "fail", + pass: true, + }], + passed: true, + runCount: 3, + runs: [ + runResult(90, 100), + runResult(100, 100), + runResult(110, 100), + ], + }; + return { ...base, ...overrides, name: overrides.name ?? base.name }; +} + +function runResult(latencyP95: number, throughput: number) { + return { + run: 1, + requests: { total: 10, ok: 10, failed: 0, successRate: 1 }, + throughputPerSec: throughput, + client: { + latencyMs: { + p50: latencyP95 / 2, + p95: latencyP95, + p99: latencyP95, + mean: latencyP95 / 2, + max: latencyP95, + }, + }, + server: null, + errors: [], + }; +} + +function report(scenarios: ScenarioResult[]): BenchReport { + return { + $schema: "https://json-schema.fedify.dev/bench/report-v3.json", + schemaVersion: 3, + tool: { name: "@fedify/cli", version: "2.3.0" }, + environment: { + runtime: "node", + runtimeVersion: "22.0.0", + os: "linux", + cpuCount: 8, + }, + target: { url: "http://127.0.0.1:3000", statsAvailable: true }, + startedAt: "2026-06-13T00:00:00.000Z", + finishedAt: "2026-06-13T00:00:01.000Z", + suite: { configHash: "sha256:x" }, + passed: scenarios.every((s) => s.passed), + scenarios, + }; +} + +function command(overrides: Partial): BenchCompareCommand { + return { + command: "bench", + mode: "compare", + base: "origin/main", + head: "HEAD", + file: "scenario.yaml", + startCommand: "pnpm dev", + readyUrl: "http://127.0.0.1:3000/health", + readyTimeout: "30s", + maxRegression: "15%", + target: undefined, + format: "json", + output: undefined, + dryRun: false, + advertiseHost: undefined, + allowUnsafeTarget: false, + userAgent: "Fedify-bench-test/1.0", + ...overrides, + }; +} + +async function writeSuite(content: string): Promise { + const dir = await mkdtemp(join(tmpdir(), "fedify-bench-compare-")); + const path = join(dir, "suite.yaml"); + await writeFile(path, content, { encoding: "utf-8" }); + return path; +} + +function fakeChildProcess(pid = 1234): FakeChildProcess { + const child = new EventEmitter() as FakeChildProcess; + Object.defineProperties(child, { + pid: { value: pid, configurable: true }, + stdout: { value: new EventEmitter(), configurable: true }, + stderr: { value: new EventEmitter(), configurable: true }, + exitCode: { value: null, configurable: true }, + signalCode: { value: null, configurable: true }, + }); + child.kill = (signal?: NodeJS.Signals | number) => { + child.emit("exit", null, signal); + return true; + }; + return child; +} + +test("parseRegressionTolerance - parses percentages", () => { + assert.strictEqual(parseRegressionTolerance("15%"), 0.15); + assert.strictEqual(parseRegressionTolerance("0.2"), 0.2); + assert.strictEqual(parseRegressionTolerance("1"), 1); +}); + +test("parseRegressionTolerance - rejects malformed values", () => { + assert.throws(() => parseRegressionTolerance("15ms"), RangeError); + assert.throws(() => parseRegressionTolerance("-1%"), RangeError); + assert.throws(() => parseRegressionTolerance(""), RangeError); +}); + +test("parseRegressionTolerance - rejects ambiguous whole-number ratios", () => { + assert.throws(() => parseRegressionTolerance("15"), RangeError); + assert.throws(() => parseRegressionTolerance("1.01"), RangeError); +}); + +test("buildCompareReport - applies the measured noise band", () => { + const base = report([scenario()]); + const head = report([ + scenario({ + client: { + latencyMs: { p50: 50, p95: 114, p99: 120, mean: 60, max: 130 }, + }, + runs: [runResult(113, 100), runResult(114, 100), runResult(115, 100)], + }), + ]); + const compare = buildCompareReport({ + baseRef: "origin/main", + headRef: "HEAD", + baseReport: base, + headReport: head, + maxRegression: 0.05, + startedAt: "2026-06-13T00:00:00.000Z", + finishedAt: "2026-06-13T00:00:01.000Z", + }); + assert.strictEqual(compare.comparisons.length, 1); + assert.strictEqual(compare.comparisons[0].metric, "latency.p95"); + assert.ok(compare.comparisons[0].pass); + assert.strictEqual(compare.passed, true); +}); + +test("buildCompareReport - fails regressions outside tolerance and noise", () => { + const base = report([ + scenario({ + expectations: [], + runs: [runResult(100, 100), runResult(100, 100), runResult(100, 100)], + }), + ]); + const head = report([ + scenario({ + expectations: [], + throughputPerSec: 80, + runs: [runResult(100, 80), runResult(100, 80), runResult(100, 80)], + }), + ]); + const compare = buildCompareReport({ + baseRef: "origin/main", + headRef: "HEAD", + baseReport: base, + headReport: head, + maxRegression: 0.1, + startedAt: "2026-06-13T00:00:00.000Z", + finishedAt: "2026-06-13T00:00:01.000Z", + }); + const throughput = compare.comparisons.find((c) => + c.metric === "throughputPerSec" + ); + assert.ok(throughput); + assert.strictEqual(throughput.pass, false); + assert.strictEqual(compare.passed, false); +}); + +test("buildCompareReport - treats positive throughput after zero as passing", () => { + const throughputExpectation = (actual: number) => + ({ + metric: "throughputPerSec", + op: "gte", + threshold: 0, + unit: "/s", + actual, + severity: "fail", + pass: true, + }) as const; + const base = report([ + scenario({ + throughputPerSec: 0, + expectations: [throughputExpectation(0)], + runs: [runResult(100, 0), runResult(100, 0), runResult(100, 0)], + }), + ]); + const head = report([ + scenario({ + throughputPerSec: 10, + expectations: [throughputExpectation(10)], + runs: [runResult(100, 10), runResult(100, 10), runResult(100, 10)], + }), + ]); + const compare = buildCompareReport({ + baseRef: "origin/main", + headRef: "HEAD", + baseReport: base, + headReport: head, + maxRegression: 0, + startedAt: "2026-06-13T00:00:00.000Z", + finishedAt: "2026-06-13T00:00:01.000Z", + }); + assert.strictEqual(compare.comparisons.length, 1); + assert.strictEqual(compare.comparisons[0].metric, "throughputPerSec"); + assert.strictEqual(compare.comparisons[0].regression, 0); + assert.strictEqual(compare.comparisons[0].pass, true); + assert.strictEqual(compare.passed, true); +}); + +test("buildCompareReport - tolerates tiny latency after zero baseline", () => { + const latencyExpectation = (actual: number) => + ({ + metric: "latency.p95", + op: "lt", + threshold: 10, + unit: "ms", + actual, + severity: "fail", + pass: true, + }) as const; + const base = report([ + scenario({ + client: { + latencyMs: { p50: 0, p95: 0, p99: 0, mean: 0, max: 0 }, + }, + expectations: [latencyExpectation(0)], + runs: [runResult(0, 100), runResult(0, 100), runResult(0, 100)], + }), + ]); + const head = report([ + scenario({ + client: { + latencyMs: { p50: 1, p95: 1, p99: 1, mean: 1, max: 1 }, + }, + expectations: [latencyExpectation(1)], + runs: [runResult(1, 100), runResult(1, 100), runResult(1, 100)], + }), + ]); + const compare = buildCompareReport({ + baseRef: "origin/main", + headRef: "HEAD", + baseReport: base, + headReport: head, + maxRegression: 0, + startedAt: "2026-06-13T00:00:00.000Z", + finishedAt: "2026-06-13T00:00:01.000Z", + }); + assert.strictEqual(compare.comparisons.length, 1); + assert.strictEqual(compare.comparisons[0].metric, "latency.p95"); + assert.strictEqual(compare.comparisons[0].regression, 0); + assert.strictEqual(compare.comparisons[0].pass, true); + assert.strictEqual(compare.passed, true); +}); + +test("buildCompareReport - passes new head scenarios without a baseline", () => { + const base = report([scenario({ name: "existing" })]); + const head = report([ + scenario({ name: "existing" }), + scenario({ name: "new-scenario" }), + ]); + const compare = buildCompareReport({ + baseRef: "origin/main", + headRef: "HEAD", + baseReport: base, + headReport: head, + maxRegression: 0.1, + startedAt: "2026-06-13T00:00:00.000Z", + finishedAt: "2026-06-13T00:00:01.000Z", + }); + const newScenario = compare.comparisons.find((comparison) => + comparison.scenario === "new-scenario" + ); + assert.ok(newScenario); + assert.strictEqual(newScenario.metric, "scenario"); + assert.strictEqual(newScenario.base, null); + assert.strictEqual(newScenario.head, null); + assert.strictEqual(newScenario.pass, true); + assert.strictEqual(compare.passed, true); +}); + +test("buildCompareReport - matches duplicate scenario names by occurrence", () => { + const base = report([ + scenario({ + name: "duplicate", + client: { + latencyMs: { p50: 100, p95: 200, p99: 210, mean: 120, max: 220 }, + }, + runs: [ + runResult(200, 100), + runResult(200, 100), + runResult(200, 100), + ], + }), + scenario({ + name: "duplicate", + client: { + latencyMs: { p50: 50, p95: 100, p99: 110, mean: 60, max: 120 }, + }, + runs: [ + runResult(100, 100), + runResult(100, 100), + runResult(100, 100), + ], + }), + ]); + const head = report([ + scenario({ + name: "duplicate", + client: { + latencyMs: { p50: 115, p95: 230, p99: 240, mean: 130, max: 250 }, + }, + runs: [ + runResult(230, 100), + runResult(230, 100), + runResult(230, 100), + ], + }), + scenario({ + name: "duplicate", + client: { + latencyMs: { p50: 55, p95: 110, p99: 120, mean: 70, max: 130 }, + }, + runs: [ + runResult(110, 100), + runResult(110, 100), + runResult(110, 100), + ], + }), + ]); + const compare = buildCompareReport({ + baseRef: "origin/main", + headRef: "HEAD", + baseReport: base, + headReport: head, + maxRegression: 0.2, + startedAt: "2026-06-13T00:00:00.000Z", + finishedAt: "2026-06-13T00:00:01.000Z", + }); + assert.deepEqual(compare.comparisons.map((c) => c.base), [200, 100]); + assert.deepEqual(compare.comparisons.map((c) => c.head), [230, 110]); + assert.ok(compare.comparisons.every((c) => c.pass)); + assert.strictEqual(compare.passed, true); +}); + +test("buildCompareReport - matches reordered scenarios by name and type", () => { + const base = report([ + scenario({ + name: "first", + client: { + latencyMs: { p50: 50, p95: 100, p99: 110, mean: 60, max: 120 }, + }, + runs: [ + runResult(100, 100), + runResult(100, 100), + runResult(100, 100), + ], + }), + scenario({ + name: "second", + client: { + latencyMs: { p50: 100, p95: 200, p99: 210, mean: 120, max: 220 }, + }, + runs: [ + runResult(200, 100), + runResult(200, 100), + runResult(200, 100), + ], + }), + ]); + const head = report([ + scenario({ + name: "second", + client: { + latencyMs: { p50: 105, p95: 210, p99: 220, mean: 130, max: 230 }, + }, + runs: [ + runResult(210, 100), + runResult(210, 100), + runResult(210, 100), + ], + }), + scenario({ + name: "first", + client: { + latencyMs: { p50: 55, p95: 110, p99: 120, mean: 70, max: 130 }, + }, + runs: [ + runResult(110, 100), + runResult(110, 100), + runResult(110, 100), + ], + }), + ]); + const compare = buildCompareReport({ + baseRef: "origin/main", + headRef: "HEAD", + baseReport: base, + headReport: head, + maxRegression: 0.2, + startedAt: "2026-06-13T00:00:00.000Z", + finishedAt: "2026-06-13T00:00:01.000Z", + }); + assert.deepEqual(compare.comparisons.map((c) => c.base), [200, 100]); + assert.deepEqual(compare.comparisons.map((c) => c.head), [210, 110]); + assert.ok(compare.comparisons.every((c) => c.pass)); +}); + +test("buildCompareReport - keeps zero-median noise finite", () => { + const base = report([ + scenario({ + client: { + latencyMs: { p50: 0, p95: 100, p99: 100, mean: 50, max: 100 }, + }, + runs: [ + runResult(0, 100), + runResult(0, 100), + runResult(100, 100), + ], + }), + ]); + const head = report([ + scenario({ + client: { + latencyMs: { p50: 0, p95: 120, p99: 120, mean: 60, max: 120 }, + }, + runs: [ + runResult(0, 100), + runResult(0, 100), + runResult(120, 100), + ], + }), + ]); + const compare = buildCompareReport({ + baseRef: "origin/main", + headRef: "HEAD", + baseReport: base, + headReport: head, + maxRegression: 0.1, + startedAt: "2026-06-13T00:00:00.000Z", + finishedAt: "2026-06-13T00:00:01.000Z", + }); + const latency = compare.comparisons.find((c) => c.metric === "latency.p95"); + assert.ok(latency); + assert.strictEqual(latency.noiseBand, 0); + assert.strictEqual(latency.allowedRegression, 0.1); + assert.strictEqual(latency.pass, false); + assert.strictEqual( + JSON.parse(JSON.stringify(compare)).comparisons[0].noiseBand, + 0, + ); +}); + +test("buildCompareReport - missing client metrics fail comparisons", () => { + const malformed = scenario() as unknown as Record; + delete malformed.client; + const compare = buildCompareReport({ + baseRef: "origin/main", + headRef: "HEAD", + baseReport: report([scenario()]), + headReport: report([malformed as unknown as ScenarioResult]), + maxRegression: 0.1, + startedAt: "2026-06-13T00:00:00.000Z", + finishedAt: "2026-06-13T00:00:01.000Z", + }); + assert.strictEqual(compare.comparisons[0].head, null); + assert.strictEqual(compare.comparisons[0].pass, false); + assert.strictEqual(compare.passed, false); +}); + +test("buildCompareReport - missing baseline metrics pass comparisons", () => { + const signatureExpectation = (actual: number) => + ({ + metric: "signatureVerification.p95", + op: "lt", + threshold: 20, + unit: "ms", + actual, + severity: "fail", + pass: true, + }) as const; + const compare = buildCompareReport({ + baseRef: "origin/main", + headRef: "HEAD", + baseReport: report([scenario({ server: null })]), + headReport: report([ + scenario({ + expectations: [signatureExpectation(12)], + server: { + signatureVerificationMs: { + overall: { p50: 6, p95: 12, p99: 28 }, + }, + }, + }), + ]), + maxRegression: 0.1, + startedAt: "2026-06-13T00:00:00.000Z", + finishedAt: "2026-06-13T00:00:01.000Z", + }); + assert.strictEqual(compare.comparisons[0].base, null); + assert.strictEqual(compare.comparisons[0].head, 12); + assert.strictEqual(compare.comparisons[0].pass, true); + assert.strictEqual(compare.passed, true); +}); + +test("startBenchmarkTarget - keeps target stdout off stdout", async () => { + let options: SpawnOptions | undefined; + const child = fakeChildProcess(); + let stderr = ""; + const target = startBenchmarkTarget("/tmp/base", "pnpm dev", { + platform: "linux", + stderr: { + write: (chunk) => { + stderr += Buffer.isBuffer(chunk) ? chunk.toString("utf-8") : chunk; + return true; + }, + }, + spawn: (command, spawnOptions) => { + assert.strictEqual(command, "pnpm dev"); + options = spawnOptions; + return child; + }, + }); + assert.deepEqual(options?.stdio, ["ignore", "pipe", "pipe"]); + child.stdout.emit("data", Buffer.from("stdout log\n")); + child.stderr.emit("data", "stderr log\n"); + assert.strictEqual(stderr, "stdout log\nstderr log\n"); + await target.stop(); +}); + +test("stopTargetProcess - kills the Windows process tree", async () => { + const child = fakeChildProcess(4321); + const kills: Array<[number, NodeJS.Signals]> = []; + await stopTargetProcess(child, { + platform: "win32", + killWindowsProcessTree: (pid, signal) => { + kills.push([pid, signal]); + child.emit("exit", null, signal); + }, + }); + assert.deepEqual(kills, [[4321, "SIGTERM"]]); +}); + +test("windowsTaskkillArgs - only force kills on SIGKILL", () => { + assert.deepEqual(windowsTaskkillArgs(4321, "SIGTERM"), [ + "/pid", + "4321", + "/T", + ]); + assert.deepEqual(windowsTaskkillArgs(4321, "SIGKILL"), [ + "/pid", + "4321", + "/T", + "/F", + ]); +}); + +test("stopTargetProcess - rejects when forced kill does not exit", async () => { + const child = fakeChildProcess(4321); + child.kill = () => true; + await assert.rejects( + stopTargetProcess(child, { + forceTimeoutMs: 1, + forceKillTimeoutMs: 1, + }), + /did not exit/, + ); +}); + +test("stopTargetProcess - rejects when forced kill throws", async () => { + const child = fakeChildProcess(4321); + child.kill = () => true; + await assert.rejects( + stopTargetProcess(child, { + platform: "win32", + forceTimeoutMs: 1, + forceKillTimeoutMs: 10, + killWindowsProcessTree: (_pid, signal) => { + if (signal === "SIGKILL") { + throw new Error("forced kill failed"); + } + }, + }), + /forced kill failed/, + ); +}); + +test("stopTargetProcess - resolves immediately without a pid", async () => { + const child = fakeChildProcess(); + Object.defineProperty(child, "pid", { value: undefined }); + let killed = false; + child.kill = () => { + killed = true; + return true; + }; + await stopTargetProcess(child, { + forceTimeoutMs: 1, + forceKillTimeoutMs: 1, + }); + assert.strictEqual(killed, false); +}); + +test("createBenchmarkWorktree - cleans partial registrations", async () => { + const calls: string[][] = []; + const removals: string[] = []; + await assert.rejects( + createBenchmarkWorktree("missing-ref", "base", { + createTempDir: () => Promise.resolve("/tmp/fedify-bench-base-test"), + removePath: (path) => { + removals.push(path); + return Promise.resolve(); + }, + runGit: (args) => { + calls.push([...args]); + if (args[1] === "add") { + return Promise.reject(new Error("checkout failed")); + } + return Promise.resolve(); + }, + }), + /checkout failed/, + ); + assert.deepEqual(calls, [ + [ + "worktree", + "add", + "--detach", + "/tmp/fedify-bench-base-test", + "missing-ref", + ], + ["worktree", "remove", "--force", "/tmp/fedify-bench-base-test"], + ]); + assert.deepEqual(removals, ["/tmp/fedify-bench-base-test"]); +}); + +test("waitReadyUrl - does not wait for streaming response bodies", async () => { + let calls = 0; + await waitReadyUrl(new URL("http://ready.test/health"), 100, { + fetch: () => { + calls++; + return Promise.resolve( + new Response( + new ReadableStream({ + start(controller) { + controller.enqueue(new Uint8Array([1])); + }, + }), + { status: 200 }, + ), + ); + }, + }); + assert.strictEqual(calls, 1); +}); + +test("waitReadyUrl - tolerates response bodies without cancel", async () => { + await waitReadyUrl(new URL("http://ready.test/health"), 100, { + fetch: () => + Promise.resolve({ + status: 200, + body: {}, + } as Response), + }); +}); + +test("waitReadyUrl - aborts a hanging fetch at the timeout", async () => { + const startedAt = Date.now(); + await assert.rejects( + waitReadyUrl(new URL("http://ready.test/health"), 20, { + fetch: (_input, init) => + new Promise((_resolve, reject) => { + init?.signal?.addEventListener( + "abort", + () => reject(new Error("aborted")), + { once: true }, + ); + }), + sleep: () => Promise.resolve(), + }), + /Timed out waiting/, + ); + assert.ok(Date.now() - startedAt < 1000); +}); + +test("waitReadyUrl - prefers abort reason over transport errors", async () => { + await assert.rejects( + waitReadyUrl(new URL("http://ready.test/health"), 20, { + fetch: (_input, init) => + new Promise((_resolve, reject) => { + init?.signal?.addEventListener( + "abort", + () => reject(new TypeError("transport failure")), + { once: true }, + ); + }), + sleep: () => Promise.resolve(), + }), + /ready URL timed out after 20ms/, + ); +}); + +test("runBenchCompare - orchestrates worktrees and cleans up", async () => { + const events: string[] = []; + let code = -1; + let output = ""; + await runBenchCompare(command({ maxRegression: "10%" }), { + exit: (c) => { + code = c; + }, + writeOutput: (content) => { + output = content; + return Promise.resolve(); + }, + log: (message) => events.push(`log:${message}`), + createWorktree: (ref, label) => { + events.push(`worktree:${label}:${ref}`); + return Promise.resolve(`/tmp/${label}`); + }, + removeWorktree: (path) => { + events.push(`remove:${path}`); + return Promise.resolve(); + }, + startTarget: (cwd, startCommand) => { + events.push(`start:${cwd}:${startCommand}`); + return Promise.resolve({ + stop: () => { + events.push(`stop:${cwd}`); + return Promise.resolve(); + }, + }); + }, + waitReady: (url, timeoutMs) => { + events.push(`ready:${url.href}:${timeoutMs}`); + return Promise.resolve(); + }, + runBenchInWorktree: ({ cwd, target }) => { + events.push(`bench:${cwd}:${target}`); + return Promise.resolve(report([scenario()])); + }, + }); + assert.strictEqual(code, 0); + assert.strictEqual(JSON.parse(output).passed, true); + assert.deepEqual(events, [ + "log:Checking out base benchmark ref origin/main…", + "worktree:base:origin/main", + "start:/tmp/base:pnpm dev", + "ready:http://127.0.0.1:3000/health:30000", + "bench:/tmp/base:http://127.0.0.1:3000", + "stop:/tmp/base", + "log:Checking out head benchmark ref HEAD…", + "worktree:head:HEAD", + "start:/tmp/head:pnpm dev", + "ready:http://127.0.0.1:3000/health:30000", + "bench:/tmp/head:http://127.0.0.1:3000", + "stop:/tmp/head", + "remove:/tmp/head", + "remove:/tmp/base", + ]); +}); + +test("runBenchCompare - stops target and removes worktree on interrupt", async () => { + const signals = new EventEmitter(); + const events: string[] = []; + let code = -1; + await runBenchCompare(command({}), { + exit: (c) => { + code = c; + }, + writeOutput: () => { + events.push("write"); + return Promise.resolve(); + }, + log: (message) => events.push(`log:${message}`), + createWorktree: (_ref, label) => Promise.resolve(`/tmp/${label}`), + removeWorktree: (path) => { + events.push(`remove:${path}`); + return Promise.resolve(); + }, + startTarget: (cwd) => { + events.push(`start:${cwd}`); + return Promise.resolve({ + stop: () => { + events.push(`stop:${cwd}`); + return Promise.resolve(); + }, + }); + }, + waitReady: (_url, _timeoutMs, signal) => { + assert.ok(signal); + return new Promise((_resolve, reject) => { + signal.addEventListener("abort", () => { + events.push("ready-abort"); + reject(signal.reason); + }, { once: true }); + queueMicrotask(() => signals.emit("SIGINT", "SIGINT")); + }); + }, + runBenchInWorktree: () => { + events.push("bench"); + return Promise.resolve(report([scenario()])); + }, + signalTarget: signals, + }); + assert.strictEqual(code, 130); + assert.deepEqual(events, [ + "log:Checking out base benchmark ref origin/main…", + "start:/tmp/base", + "ready-abort", + "stop:/tmp/base", + "remove:/tmp/base", + ]); + assert.strictEqual(signals.listenerCount("SIGINT"), 0); + assert.strictEqual(signals.listenerCount("SIGTERM"), 0); +}); + +test("runBenchCompare - aborts raced benchmark work on interrupt", async () => { + const signals = new EventEmitter(); + const events: string[] = []; + let code = -1; + await runBenchCompare(command({}), { + exit: (c) => { + code = c; + }, + writeOutput: () => { + events.push("write"); + return Promise.resolve(); + }, + log: (message) => events.push(`log:${message}`), + createWorktree: (_ref, label) => Promise.resolve(`/tmp/${label}`), + removeWorktree: (path) => { + events.push(`remove:${path}`); + return Promise.resolve(); + }, + startTarget: (cwd) => { + events.push(`start:${cwd}`); + return Promise.resolve({ + stop: () => { + events.push(`stop:${cwd}`); + return Promise.resolve(); + }, + }); + }, + waitReady: () => { + events.push("ready"); + return Promise.resolve(); + }, + runBenchInWorktree: ({ signal }) => { + assert.ok(signal); + queueMicrotask(() => signals.emit("SIGTERM", "SIGTERM")); + return new Promise((_resolve, reject) => { + signal.addEventListener("abort", () => { + events.push("bench-abort"); + reject(signal.reason); + }, { once: true }); + }); + }, + signalTarget: signals, + }); + assert.strictEqual(code, 143); + assert.deepEqual(events, [ + "log:Checking out base benchmark ref origin/main…", + "start:/tmp/base", + "ready", + "bench-abort", + "stop:/tmp/base", + "remove:/tmp/base", + ]); +}); + +test("runBenchCompare - aborts benchmark work when target exits", async () => { + const events: string[] = []; + let rejectExit!: (error: Error) => void; + let code = -1; + await runBenchCompare(command({}), { + exit: (c) => { + code = c; + }, + writeOutput: () => { + events.push("write"); + return Promise.resolve(); + }, + log: (message) => events.push(`log:${message}`), + createWorktree: (_ref, label) => Promise.resolve(`/tmp/${label}`), + removeWorktree: (path) => { + events.push(`remove:${path}`); + return Promise.resolve(); + }, + startTarget: (cwd) => { + events.push(`start:${cwd}`); + return Promise.resolve({ + exited: new Promise((_resolve, reject) => { + rejectExit = reject; + }), + stop: () => { + events.push(`stop:${cwd}`); + return Promise.resolve(); + }, + }); + }, + waitReady: () => { + events.push("ready"); + return Promise.resolve(); + }, + runBenchInWorktree: ({ signal }) => { + assert.ok(signal); + queueMicrotask(() => rejectExit(new Error("target exited"))); + return new Promise((_resolve, reject) => { + signal.addEventListener("abort", () => { + events.push("bench-abort"); + reject(signal.reason); + }, { once: true }); + }); + }, + }); + assert.strictEqual(code, 2); + assert.deepEqual(events, [ + "log:Checking out base benchmark ref origin/main…", + "start:/tmp/base", + "ready", + "bench-abort", + "stop:/tmp/base", + "log:target exited", + "remove:/tmp/base", + ]); +}); + +test("runBenchCompare - ignores target exit while stopping normally", async () => { + const events: string[] = []; + let rejectExit!: (error: Error) => void; + let code = -1; + await runBenchCompare(command({}), { + exit: (c) => { + code = c; + }, + writeOutput: () => { + events.push("write"); + return Promise.resolve(); + }, + log: (message) => events.push(`log:${message}`), + createWorktree: (_ref, label) => Promise.resolve(`/tmp/${label}`), + removeWorktree: (path) => { + events.push(`remove:${path}`); + return Promise.resolve(); + }, + startTarget: (cwd) => { + events.push(`start:${cwd}`); + return Promise.resolve({ + exited: new Promise((_resolve, reject) => { + rejectExit = reject; + }), + stop: () => { + events.push(`stop:${cwd}`); + rejectExit(new Error("target stopped")); + return Promise.resolve(); + }, + }); + }, + waitReady: () => { + events.push("ready"); + return Promise.resolve(); + }, + runBenchInWorktree: () => { + events.push("bench"); + return Promise.resolve(report([scenario()])); + }, + }); + await Promise.resolve(); + assert.strictEqual(code, 0); + assert.deepEqual(events, [ + "log:Checking out base benchmark ref origin/main…", + "start:/tmp/base", + "ready", + "bench", + "stop:/tmp/base", + "log:Checking out head benchmark ref HEAD…", + "start:/tmp/head", + "ready", + "bench", + "stop:/tmp/head", + "write", + "remove:/tmp/head", + "remove:/tmp/base", + ]); +}); + +test("runBenchCompare - fails when target exits before readiness", async () => { + const events: string[] = []; + let code = -1; + await runBenchCompare(command({}), { + exit: (c) => { + code = c; + }, + writeOutput: () => { + events.push("write"); + return Promise.resolve(); + }, + log: (message) => events.push(`log:${message}`), + createWorktree: (_ref, label) => Promise.resolve(`/tmp/${label}`), + removeWorktree: (path) => { + events.push(`remove:${path}`); + return Promise.resolve(); + }, + startTarget: (cwd) => { + events.push(`start:${cwd}`); + return Promise.resolve({ + exited: Promise.reject(new Error("target exited early")), + stop: () => { + events.push(`stop:${cwd}`); + return Promise.resolve(); + }, + }); + }, + waitReady: () => { + events.push("ready"); + return Promise.resolve(); + }, + runBenchInWorktree: () => { + events.push("bench"); + return Promise.resolve(report([scenario()])); + }, + }); + assert.strictEqual(code, 2); + assert.match(events.join("\n"), /target exited early/); + assert.deepEqual(events, [ + "log:Checking out base benchmark ref origin/main…", + "start:/tmp/base", + "ready", + "stop:/tmp/base", + "log:target exited early", + "remove:/tmp/base", + ]); +}); + +test("runBenchCompare - does not treat derived target as explicit", async () => { + const file = await writeSuite(`version: 1 +target: https://example.com +defaults: + load: { rate: 1/s } + duration: 1ms +scenarios: + - name: wf + type: webfinger + recipient: "acct:alice@example.com" +`); + const events: string[] = []; + let code = -1; + await runBenchCompare( + command({ + file, + readyUrl: "https://example.com/health", + allowUnsafeTarget: true, + }), + { + exit: (c) => { + code = c; + }, + writeOutput: () => Promise.resolve(), + log: (message) => events.push(`compare:${message}`), + createWorktree: (_ref, label) => Promise.resolve(`/tmp/${label}`), + removeWorktree: () => Promise.resolve(), + startTarget: () => Promise.resolve({ stop: () => Promise.resolve() }), + waitReady: () => Promise.resolve(), + benchDeps: { + log: (message) => events.push(`bench:${message}`), + fetch: () => + Promise.resolve(new Response("not found", { status: 404 })), + resolveTargetAddresses: () => Promise.resolve(["93.184.216.34"]), + }, + }, + ); + assert.strictEqual(code, 2); + assert.match(events.join("\n"), /--target/); +}); diff --git a/packages/cli/src/bench/compare.ts b/packages/cli/src/bench/compare.ts new file mode 100644 index 000000000..61b353e61 --- /dev/null +++ b/packages/cli/src/bench/compare.ts @@ -0,0 +1,1061 @@ +import { + type ChildProcess, + spawn, + type SpawnOptions, +} from "node:child_process"; +import { mkdtemp, rm, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import process from "node:process"; +import type { BenchCompareCommand, BenchRunCommand } from "./command.ts"; +import runBench from "./action.ts"; +import type { RunBenchDeps } from "./action.ts"; +import { COMPARE_REPORT_SCHEMA_ID } from "./compare/schema.ts"; +import { parseDuration } from "./scenario/units.ts"; +import type { + BenchReport, + ScenarioResult, + ScenarioRunResult, +} from "./result/model.ts"; +import { metricUnit } from "./result/expect/metrics.ts"; +import { describeError } from "../utils.ts"; + +const ZERO_BASE_LATENCY_ALLOWANCE_MS = 1; + +/** A benchmark comparison report. */ +export interface BenchCompareReport { + readonly $schema: string; + readonly schemaVersion: 1; + readonly tool: BenchReport["tool"]; + readonly environment: BenchReport["environment"]; + readonly startedAt: string; + readonly finishedAt: string; + readonly suite: BenchReport["suite"]; + readonly maxRegression: number; + readonly base: CompareSide; + readonly head: CompareSide; + readonly comparisons: ComparisonResult[]; + readonly passed: boolean; +} + +/** One side of a comparison. */ +export interface CompareSide { + readonly ref: string; + readonly report: BenchReport; +} + +/** One metric comparison between base and head. */ +export interface ComparisonResult { + readonly scenario: string; + readonly metric: string; + readonly direction: "lower-is-better" | "higher-is-better"; + readonly base: number | null; + readonly head: number | null; + readonly regression: number | null; + readonly noiseBand: number; + readonly allowedRegression: number; + readonly pass: boolean; +} + +/** Dependencies injectable for tests. */ +export interface RunBenchCompareDeps { + readonly exit?: (code: number) => void; + readonly writeOutput?: ( + content: string, + outputPath: string | undefined, + ) => Promise; + readonly log?: (message: string) => void; + readonly createWorktree?: ( + ref: string, + label: "base" | "head", + ) => Promise; + readonly removeWorktree?: (path: string) => Promise; + readonly startTarget?: ( + cwd: string, + startCommand: string, + ) => Promise; + readonly waitReady?: ( + url: URL, + timeoutMs: number, + signal?: AbortSignal, + ) => Promise; + readonly runBenchInWorktree?: ( + input: RunBenchInWorktreeInput, + ) => Promise; + readonly benchDeps?: RunBenchDeps; + readonly signalTarget?: SignalTarget; +} + +/** A started target process. */ +export interface StartedTarget { + readonly exited?: Promise; + stop(): Promise; +} + +/** Input to a worktree-local benchmark run. */ +export interface RunBenchInWorktreeInput { + readonly cwd: string; + readonly command: BenchCompareCommand; + readonly target: string; + readonly signal?: AbortSignal; +} + +type ProcessOutput = { + write(chunk: string | Uint8Array): unknown; +}; + +type SpawnTarget = ( + command: string, + options: SpawnOptions, +) => ChildProcess; + +type BenchRunCompareCommand = BenchRunCommand & { + readonly explicitCliTarget: boolean; +}; + +type BenchmarkSignal = "SIGINT" | "SIGTERM"; +type SignalListener = (signal: BenchmarkSignal) => void; + +interface SignalTarget { + on(signal: BenchmarkSignal, listener: SignalListener): unknown; + off(signal: BenchmarkSignal, listener: SignalListener): unknown; +} + +/** Options for starting a benchmark target. */ +export interface StartBenchmarkTargetOptions { + readonly platform?: NodeJS.Platform; + readonly spawn?: SpawnTarget; + readonly stderr?: ProcessOutput; +} + +/** Options for stopping a benchmark target process. */ +export interface StopTargetProcessOptions { + readonly platform?: NodeJS.Platform; + readonly killWindowsProcessTree?: ( + pid: number, + signal: NodeJS.Signals, + ) => void; + readonly killProcessGroup?: (pid: number, signal: NodeJS.Signals) => void; + readonly forceTimeoutMs?: number; + readonly forceKillTimeoutMs?: number; +} + +/** Dependencies for waiting until a benchmark target is ready. */ +export interface WaitReadyUrlDeps { + readonly fetch?: typeof fetch; + readonly sleep?: (ms: number, signal?: AbortSignal) => Promise; + readonly signal?: AbortSignal; +} + +type CreateTempDir = (prefix: string) => Promise; +type RemovePath = ( + path: string, + options: { readonly recursive: boolean; readonly force: boolean }, +) => Promise; +type RunGit = (args: readonly string[]) => Promise; + +/** Dependencies for creating benchmark comparison worktrees. */ +export interface CreateBenchmarkWorktreeDeps { + readonly createTempDir?: CreateTempDir; + readonly removePath?: RemovePath; + readonly runGit?: RunGit; +} + +/** Runs `fedify bench compare`. */ +export async function runBenchCompare( + command: BenchCompareCommand, + deps: RunBenchCompareDeps = {}, +): Promise { + const exit = deps.exit ?? ((code: number) => { + process.exitCode = code; + }); + const writeOutput = deps.writeOutput ?? defaultWriteOutput; + const log = deps.log ?? + ((message: string) => process.stderr.write(`${message}\n`)); + const createWorktree = deps.createWorktree ?? defaultCreateWorktree; + const removeWorktree = deps.removeWorktree ?? defaultRemoveWorktree; + const startTarget = deps.startTarget ?? defaultStartTarget; + const waitReady = deps.waitReady ?? defaultWaitReady; + const runBenchInWorktree = deps.runBenchInWorktree ?? + ((input) => defaultRunBenchInWorktree(input, deps.benchDeps)); + const signalTarget = deps.signalTarget ?? process; + + let readyUrl: URL; + let readyTimeoutMs: number; + let maxRegression: number; + try { + readyUrl = new URL(command.readyUrl); + readyTimeoutMs = parseDuration(command.readyTimeout); + maxRegression = parseRegressionTolerance(command.maxRegression); + } catch (error) { + log(describeError(error)); + return void exit(2); + } + const target = command.target ?? new URL("/", readyUrl).origin; + const worktrees: string[] = []; + const startedAt = new Date().toISOString(); + let activeTarget: StartedTarget | undefined; + let interruptError: BenchmarkInterrupted | undefined; + const interruptController = new AbortController(); + const interruptSignal = interruptController.signal; + let rejectInterrupt!: (error: BenchmarkInterrupted) => void; + const interruptPromise = new Promise((_resolve, reject) => { + rejectInterrupt = reject; + }); + void interruptPromise.catch(() => {}); + let interrupted = false; + const onSignal: SignalListener = (signal) => { + if (interrupted) return; + interrupted = true; + interruptError = new BenchmarkInterrupted(signal); + interruptController.abort(interruptError); + rejectInterrupt(interruptError); + }; + signalTarget.on("SIGINT", onSignal); + signalTarget.on("SIGTERM", onSignal); + try { + const baseReport = await runSide("base", command.base); + throwIfInterrupted(); + const headReport = await runSide("head", command.head); + throwIfInterrupted(); + const report = buildCompareReport({ + baseRef: command.base, + headRef: command.head, + baseReport, + headReport, + maxRegression, + startedAt, + finishedAt: new Date().toISOString(), + }); + throwIfInterrupted(); + await withInterrupt(writeOutput( + renderCompareReport(report, command.format), + command.output, + )); + throwIfInterrupted(); + return void exit(report.passed ? 0 : 1); + } catch (error) { + if (error instanceof BenchmarkInterrupted) { + return void exit(error.exitCode); + } + log(describeError(error)); + return void exit(2); + } finally { + if (activeTarget != null) { + try { + await activeTarget.stop(); + } catch (error) { + log(`Failed to stop benchmark target: ${describeError(error)}`); + } finally { + activeTarget = undefined; + } + } + for (let i = worktrees.length - 1; i >= 0; i--) { + const path = worktrees[i]; + try { + await removeWorktree(path); + } catch (error) { + log( + `Failed to remove benchmark worktree ${path}: ${ + describeError(error) + }`, + ); + } + } + signalTarget.off("SIGINT", onSignal); + signalTarget.off("SIGTERM", onSignal); + } + + async function runSide( + label: "base" | "head", + ref: string, + ): Promise { + log(`Checking out ${label} benchmark ref ${ref}…`); + const cwd = await createWorktree(ref, label); + worktrees.push(cwd); + throwIfInterrupted(); + const targetProcess = await startTarget(cwd, command.startCommand); + activeTarget = targetProcess; + let stoppingTarget = false; + let targetExitError: unknown; + const targetExit = targetProcessExited(targetProcess).catch((error) => { + targetExitError = error; + if (!stoppingTarget && !interruptSignal.aborted) { + interruptController.abort(error); + throw error; + } + return new Promise(() => {}); + }); + const throwIfTargetExited = () => { + if (targetExitError != null) throw targetExitError; + }; + try { + throwIfInterrupted(); + await withInterrupt( + Promise.race([ + targetExit, + waitReady(readyUrl, readyTimeoutMs, interruptSignal), + ]), + ); + await Promise.resolve(); + throwIfTargetExited(); + return await withInterrupt( + Promise.race([ + targetExit, + runBenchInWorktree({ + cwd, + command, + target, + signal: interruptSignal, + }), + ]), + ); + } finally { + try { + stoppingTarget = true; + await targetProcess.stop(); + } finally { + if (activeTarget === targetProcess) activeTarget = undefined; + } + } + } + + function withInterrupt(promise: Promise): Promise { + return Promise.race([interruptPromise, promise]); + } + + function throwIfInterrupted(): void { + if (interruptError != null) throw interruptError; + } +} + +class BenchmarkInterrupted extends Error { + constructor(readonly signal: BenchmarkSignal) { + super(`Interrupted by ${signal}.`); + } + + get exitCode(): number { + return this.signal === "SIGINT" ? 130 : 143; + } +} + +function targetProcessExited(target: StartedTarget): Promise { + return target.exited ?? new Promise(() => {}); +} + +/** Parses `--max-regression`, accepting ratios or percentages. */ +export function parseRegressionTolerance(value: string): number { + const trimmed = value.trim(); + const match = /^(\d+(?:\.\d+)?|\.\d+)(%)?$/.exec(trimmed); + const numeric = match == null ? NaN : Number(match[1]); + if (!Number.isFinite(numeric) || numeric < 0) { + throw new RangeError( + `Invalid --max-regression value: ${JSON.stringify(value)}.`, + ); + } + if (match?.[2] == null && numeric > 1) { + throw new RangeError( + `Invalid --max-regression value: ${JSON.stringify(value)}; ` + + "use a ratio between 0 and 1 or an explicit percentage.", + ); + } + return match?.[2] === "%" ? numeric / 100 : numeric; +} + +/** Builds a compare report from two benchmark reports. */ +export function buildCompareReport(input: { + readonly baseRef: string; + readonly headRef: string; + readonly baseReport: BenchReport; + readonly headReport: BenchReport; + readonly maxRegression: number; + readonly startedAt: string; + readonly finishedAt: string; +}): BenchCompareReport { + const comparisons = compareReports( + input.baseReport, + input.headReport, + input.maxRegression, + ); + return { + $schema: COMPARE_REPORT_SCHEMA_ID, + schemaVersion: 1, + tool: input.headReport.tool, + environment: input.headReport.environment, + startedAt: input.startedAt, + finishedAt: input.finishedAt, + suite: input.headReport.suite, + maxRegression: input.maxRegression, + base: { ref: input.baseRef, report: input.baseReport }, + head: { ref: input.headRef, report: input.headReport }, + comparisons, + passed: input.headReport.passed && comparisons.every((c) => c.pass), + }; +} + +function compareReports( + base: BenchReport, + head: BenchReport, + maxRegression: number, +): ComparisonResult[] { + const results: ComparisonResult[] = []; + const baseByScenario = new Map(); + for (const baseScenario of base.scenarios) { + const key = comparisonScenarioKey(baseScenario); + const scenarios = baseByScenario.get(key); + if (scenarios == null) { + baseByScenario.set(key, [baseScenario]); + } else { + scenarios.push(baseScenario); + } + } + const headCounts = new Map(); + for (const headScenario of head.scenarios) { + const key = comparisonScenarioKey(headScenario); + const occurrence = headCounts.get(key) ?? 0; + headCounts.set(key, occurrence + 1); + const baseScenario = baseByScenario.get(key)?.[occurrence]; + if (baseScenario == null) { + results.push(newScenario(headScenario.name, maxRegression)); + continue; + } + for (const metric of comparisonMetrics(headScenario)) { + results.push( + compareMetric(baseScenario, headScenario, metric, maxRegression), + ); + } + } + return results; +} + +function comparisonScenarioKey(scenario: ScenarioResult): string { + return `${scenario.name}\0${scenario.type}`; +} + +function comparisonMetrics(scenario: ScenarioResult): string[] { + const fromExpect = scenario.expectations + .map((e) => e.metric) + .filter(isPerformanceMetric); + return [ + ...new Set( + fromExpect.length < 1 ? ["latency.p95", "throughputPerSec"] : fromExpect, + ), + ]; +} + +function isPerformanceMetric(metric: string): boolean { + const unit = metricUnit(metric); + return unit === "ms" || unit === "rate"; +} + +function compareMetric( + baseScenario: ScenarioResult, + headScenario: ScenarioResult, + metric: string, + maxRegression: number, +): ComparisonResult { + const unit = metricUnit(metric); + const direction = unit === "rate" ? "higher-is-better" : "lower-is-better"; + const base = metricValue(baseScenario, metric); + const head = metricValue(headScenario, metric); + const noiseBand = Math.max( + relativeNoise(baseScenario, metric), + relativeNoise(headScenario, metric), + ); + const regression = base == null || head == null + ? null + : regressionRatio(base, head, direction, unit); + const allowedRegression = maxRegression + noiseBand; + return { + scenario: headScenario.name, + metric, + direction, + base, + head, + regression, + noiseBand, + allowedRegression, + pass: (base == null && head != null) || + (regression != null && regression <= allowedRegression), + }; +} + +function newScenario( + scenario: string, + maxRegression: number, +): ComparisonResult { + return { + scenario, + metric: "scenario", + direction: "lower-is-better", + base: null, + head: null, + regression: null, + noiseBand: 0, + allowedRegression: maxRegression, + pass: true, + }; +} + +function metricValue( + scenario: ScenarioResult | ScenarioRunResult, + metric: string, +): number | null { + switch (metric) { + case "throughputPerSec": + return scenario.throughputPerSec; + case "deliveryThroughput": + return scenario.deliveryThroughputPerSec ?? null; + } + if (metric.startsWith("latency.")) { + const latency = scenario.client?.latencyMs; + return latency == null ? null : latencyValue(latency, metric.slice(8)); + } + if (metric.startsWith("signatureVerification.")) { + return partialValue( + scenario.server?.signatureVerificationMs?.overall, + metric.slice("signatureVerification.".length), + ); + } + if (metric.startsWith("queueDrain.")) { + return partialValue( + scenario.server?.queue?.drainMs, + metric.slice("queueDrain.".length), + ); + } + return null; +} + +function latencyValue( + latency: ScenarioResult["client"]["latencyMs"], + field: string, +): number | null { + switch (field) { + case "p50": + return latency.p50; + case "p95": + return latency.p95; + case "p99": + return latency.p99; + case "mean": + return latency.mean; + case "max": + return latency.max; + default: + return null; + } +} + +function partialValue( + latency: { + readonly p50?: number; + readonly p95?: number; + readonly p99?: number; + } | undefined, + field: string, +): number | null { + switch (field) { + case "p50": + return latency?.p50 ?? null; + case "p95": + return latency?.p95 ?? null; + case "p99": + return latency?.p99 ?? null; + default: + return null; + } +} + +function regressionRatio( + base: number, + head: number, + direction: ComparisonResult["direction"], + unit: ReturnType, +): number | null { + if (!Number.isFinite(base) || !Number.isFinite(head)) { + return null; + } + if (base < 0) { + return base === head ? 0 : null; + } + if (base === 0) { + if (base === head) return 0; + if ( + direction === "lower-is-better" && + unit === "ms" && + head <= ZERO_BASE_LATENCY_ALLOWANCE_MS + ) return 0; + return direction === "higher-is-better" && head > base ? 0 : null; + } + return direction === "higher-is-better" + ? (base - head) / base + : (head - base) / base; +} + +function relativeNoise(scenario: ScenarioResult, metric: string): number { + const values = (scenario.runs ?? []) + .map((run) => metricValue(run, metric)) + .filter((value): value is number => + value != null && Number.isFinite(value) + ); + if (values.length < 2) return 0; + const medianValue = median(values); + if (medianValue <= 0) { + return 0; + } + return (Math.max(...values) - Math.min(...values)) / (2 * medianValue); +} + +function median(values: readonly number[]): number { + const sorted = [...values].sort((a, b) => a - b); + const middle = Math.floor(sorted.length / 2); + if (sorted.length % 2 === 1) return sorted[middle]; + return (sorted[middle - 1] + sorted[middle]) / 2; +} + +function renderCompareReport( + report: BenchCompareReport, + format: BenchCompareCommand["format"], +): string { + switch (format) { + case "json": + return `${JSON.stringify(report, null, 2)}\n`; + case "markdown": + return renderCompareMarkdown(report); + case "text": + return renderCompareText(report); + } + throw new RangeError(`Unsupported benchmark report format: ${format}.`); +} + +function renderCompareText(report: BenchCompareReport): string { + const lines = [ + "Fedify benchmark comparison", + "", + `Base: ${report.base.ref}`, + `Head: ${report.head.ref}`, + `Maximum regression: ${formatPercent(report.maxRegression)}`, + "", + ]; + for (const comparison of report.comparisons) { + lines.push( + `[${comparison.pass ? "PASS" : "FAIL"}] ${comparison.scenario} ` + + `${comparison.metric}: base ${formatNumberOrNull(comparison.base)}, ` + + `head ${formatNumberOrNull(comparison.head)}, regression ${ + formatNumberOrNull(comparison.regression, formatPercent) + }, noise ${formatPercent(comparison.noiseBand)}`, + ); + } + lines.push("", `Overall: ${report.passed ? "PASS" : "FAIL"}`); + return `${lines.join("\n")}\n`; +} + +function renderCompareMarkdown(report: BenchCompareReport): string { + const lines = [ + "# Fedify benchmark comparison", + "", + `**Result:** ${report.passed ? "PASS" : "FAIL"}`, + "", + `- **Base:** \`${report.base.ref}\``, + `- **Head:** \`${report.head.ref}\``, + `- **Maximum regression:** ${formatPercent(report.maxRegression)}`, + "", + "| Scenario | Metric | Base | Head | Regression | Noise | Result |", + "| --- | --- | --- | --- | --- | --- | --- |", + ]; + for (const comparison of report.comparisons) { + lines.push( + `| ${comparison.scenario} | \`${comparison.metric}\` | ${ + formatNumberOrNull(comparison.base) + } | ${formatNumberOrNull(comparison.head)} | ${ + formatNumberOrNull(comparison.regression, formatPercent) + } | ${formatPercent(comparison.noiseBand)} | ${ + comparison.pass ? "PASS" : "FAIL" + } |`, + ); + } + return `${lines.join("\n")}\n`; +} + +function formatNumberOrNull( + value: number | null, + formatter: (value: number) => string = formatNumber, +): string { + return value == null ? "n/a" : formatter(value); +} + +function formatNumber(value: number): string { + if (!Number.isFinite(value)) return String(value); + return Number.isInteger(value) ? String(value) : value.toFixed(3); +} + +function formatPercent(value: number): string { + if (!Number.isFinite(value)) return String(value); + return `${(value * 100).toFixed(1)}%`; +} + +async function defaultRunBenchInWorktree( + input: RunBenchInWorktreeInput, + benchDeps: RunBenchDeps = {}, +): Promise { + let output = ""; + let exitCode = 0; + const runCommand: BenchRunCompareCommand = { + command: "bench", + mode: "run", + scenario: input.command.file, + target: input.target, + format: "json", + output: undefined, + dryRun: false, + advertiseHost: input.command.advertiseHost, + allowUnsafeTarget: input.command.allowUnsafeTarget, + userAgent: input.command.userAgent, + explicitCliTarget: input.command.target != null, + }; + await runBench(runCommand, { + ...benchDeps, + signal: input.signal, + exit: (code) => { + exitCode = code; + }, + writeOutput: (content) => { + output = content; + return Promise.resolve(); + }, + }); + if (exitCode === 2 || output.trim() === "") { + throw new Error(`Benchmark run failed for ${input.cwd}.`); + } + return JSON.parse(output) as BenchReport; +} + +function defaultCreateWorktree( + ref: string, + label: "base" | "head", +): Promise { + return createBenchmarkWorktree(ref, label); +} + +/** Creates a detached Git worktree for one side of a benchmark comparison. */ +export async function createBenchmarkWorktree( + ref: string, + label: "base" | "head", + deps: CreateBenchmarkWorktreeDeps = {}, +): Promise { + const createTempDir = deps.createTempDir ?? mkdtemp; + const removePath = deps.removePath ?? rm; + const run = deps.runGit ?? runGit; + const path = await createTempDir(join(tmpdir(), `fedify-bench-${label}-`)); + try { + await run(["worktree", "add", "--detach", path, ref]); + } catch (error) { + try { + await run(["worktree", "remove", "--force", path]); + } catch { + // Preserve the original checkout failure. + } + try { + await removePath(path, { recursive: true, force: true }); + } catch { + // Preserve the original checkout failure. + } + throw error; + } + return path; +} + +async function defaultRemoveWorktree(path: string): Promise { + await runGit(["worktree", "remove", "--force", path]); +} + +function runGit(args: readonly string[]): Promise { + return new Promise((resolve, reject) => { + const child = spawn("git", args, { stdio: "ignore" }); + child.on("error", reject); + child.on("close", (code) => { + if (code === 0) resolve(); + else reject(new Error(`git ${args.join(" ")} exited with code ${code}`)); + }); + }); +} + +function defaultStartTarget( + cwd: string, + startCommand: string, +): Promise { + return Promise.resolve(startBenchmarkTarget(cwd, startCommand)); +} + +/** Starts a benchmark target process. */ +export function startBenchmarkTarget( + cwd: string, + startCommand: string, + options: StartBenchmarkTargetOptions = {}, +): StartedTarget { + const platform = options.platform ?? process.platform; + const spawnTarget = options.spawn ?? spawn; + const stderr = options.stderr ?? process.stderr; + const child = spawnTarget(startCommand, { + cwd, + detached: platform !== "win32", + shell: true, + stdio: ["ignore", "pipe", "pipe"], + env: process.env, + }); + forwardTargetOutput(child, stderr); + const exited = createTargetExitPromise(child); + void exited.catch(() => {}); + return { + exited, + stop: () => stopTargetProcess(child, { platform }), + }; +} + +function forwardTargetOutput(child: ChildProcess, stderr: ProcessOutput): void { + child.stdout?.on("data", (chunk: string | Uint8Array) => { + stderr.write(chunk); + }); + child.stderr?.on("data", (chunk: string | Uint8Array) => { + stderr.write(chunk); + }); +} + +function createTargetExitPromise(child: ChildProcess): Promise { + return new Promise((_resolve, reject) => { + const onError = (error: Error) => { + child.removeListener("exit", onExit); + reject(error); + }; + const onExit = (code: number | null, signal: NodeJS.Signals | null) => { + child.removeListener("error", onError); + const suffix = signal == null + ? ` with code ${code ?? ""}` + : ` from ${signal}`; + reject( + new Error( + `Benchmark target process ${child.pid ?? ""} exited` + + `${suffix} before benchmark completion.`, + ), + ); + }; + child.once("error", onError); + child.once("exit", onExit); + }); +} + +/** Stops a benchmark target process. */ +export function stopTargetProcess( + child: ChildProcess, + options: StopTargetProcessOptions = {}, +): Promise { + const platform = options.platform ?? process.platform; + const killWindowsProcessTree = options.killWindowsProcessTree ?? + defaultKillWindowsProcessTree; + const killProcessGroup = options.killProcessGroup ?? + ((pid, signal) => process.kill(pid, signal)); + const forceTimeoutMs = options.forceTimeoutMs ?? 5000; + const forceKillTimeoutMs = options.forceKillTimeoutMs ?? forceTimeoutMs; + return new Promise((resolve, reject) => { + if ( + child.pid == null || child.exitCode != null || child.signalCode != null + ) { + resolve(); + return; + } + let settled = false; + let forceKillTimer: ReturnType | undefined; + const clearTimers = () => { + clearTimeout(forceTimer); + if (forceKillTimer != null) clearTimeout(forceKillTimer); + }; + const onExit = () => { + if (settled) return; + settled = true; + clearTimers(); + resolve(); + }; + const rejectStop = (error: unknown) => { + if (settled) return; + settled = true; + clearTimers(); + child.removeListener("exit", onExit); + reject(error); + }; + const forceTimer = setTimeout(() => { + try { + killTargetProcess(child, "SIGKILL", { + platform, + killWindowsProcessTree, + killProcessGroup, + }); + } catch (error) { + rejectStop(error); + return; + } + forceKillTimer = setTimeout(() => { + rejectStop( + new Error( + `Benchmark target process ${child.pid ?? ""} ` + + "did not exit after SIGKILL.", + ), + ); + }, forceKillTimeoutMs); + }, forceTimeoutMs); + child.once("exit", onExit); + try { + killTargetProcess(child, "SIGTERM", { + platform, + killWindowsProcessTree, + killProcessGroup, + }); + } catch (error) { + rejectStop(error); + } + }); +} + +interface KillTargetProcessOptions { + readonly platform: NodeJS.Platform; + readonly killWindowsProcessTree: ( + pid: number, + signal: NodeJS.Signals, + ) => void; + readonly killProcessGroup: (pid: number, signal: NodeJS.Signals) => void; +} + +function killTargetProcess( + child: ChildProcess, + signal: NodeJS.Signals, + options: KillTargetProcessOptions, +): void { + if (child.pid == null) { + child.kill(signal); + return; + } + if (options.platform === "win32") { + options.killWindowsProcessTree(child.pid, signal); + return; + } + try { + options.killProcessGroup(-child.pid, signal); + } catch { + child.kill(signal); + } +} + +function defaultKillWindowsProcessTree( + pid: number, + signal: NodeJS.Signals, +): void { + const child = spawn("taskkill", windowsTaskkillArgs(pid, signal), { + stdio: "ignore", + windowsHide: true, + }); + child.on("error", () => {}); +} + +/** Builds the Windows `taskkill` arguments used for target cleanup. */ +export function windowsTaskkillArgs( + pid: number, + signal: NodeJS.Signals, +): string[] { + const args = ["/pid", String(pid), "/T"]; + if (signal === "SIGKILL") args.push("/F"); + return args; +} + +async function defaultWaitReady( + url: URL, + timeoutMs: number, + signal?: AbortSignal, +): Promise { + return await waitReadyUrl(url, timeoutMs, { signal }); +} + +/** Waits until a benchmark target readiness URL responds successfully. */ +export async function waitReadyUrl( + url: URL, + timeoutMs: number, + deps: WaitReadyUrlDeps = {}, +): Promise { + const fetchReady = deps.fetch ?? fetch; + const sleep = deps.sleep ?? + ((ms, signal) => abortableSleep(ms, signal)); + const signal = deps.signal; + const deadline = Date.now() + timeoutMs; + let lastError: unknown; + while (Date.now() <= deadline) { + throwIfAborted(signal); + const remainingMs = deadline - Date.now(); + if (remainingMs <= 0) break; + const controller = new AbortController(); + const onAbort = () => { + controller.abort(abortReason(signal!)); + }; + signal?.addEventListener("abort", onAbort, { once: true }); + const timer = setTimeout(() => { + controller.abort(new Error(`ready URL timed out after ${timeoutMs}ms`)); + }, remainingMs); + try { + const response = await fetchReady(url, { signal: controller.signal }); + void response.body?.cancel?.().catch(() => {}); + if (response.status >= 200 && response.status < 400) return; + lastError = new Error(`ready URL returned ${response.status}`); + } catch (error) { + if (signal?.aborted) throw abortReason(signal); + if (controller.signal.aborted) { + lastError = controller.signal.reason ?? error; + break; + } + lastError = error; + } finally { + signal?.removeEventListener("abort", onAbort); + clearTimeout(timer); + } + const delayMs = Math.min(250, deadline - Date.now()); + if (delayMs > 0) await sleep(delayMs, signal); + } + throw new Error( + `Timed out waiting for ${url.href}: ${describeError(lastError)}.`, + ); +} + +function abortableSleep(ms: number, signal?: AbortSignal): Promise { + if (signal?.aborted) return Promise.reject(abortReason(signal)); + if (ms <= 0) return Promise.resolve(); + return new Promise((resolve, reject) => { + const timer = setTimeout(() => { + cleanup(); + resolve(); + }, ms); + const onAbort = () => { + clearTimeout(timer); + cleanup(); + reject(abortReason(signal!)); + }; + const cleanup = () => { + signal?.removeEventListener("abort", onAbort); + }; + signal?.addEventListener("abort", onAbort, { once: true }); + }); +} + +function throwIfAborted(signal: AbortSignal | undefined): void { + if (signal?.aborted) throw abortReason(signal); +} + +function abortReason(signal: AbortSignal): unknown { + return signal.reason ?? new Error("Benchmark comparison aborted."); +} + +async function defaultWriteOutput( + content: string, + outputPath: string | undefined, +): Promise { + if (outputPath == null) { + process.stdout.write(content.endsWith("\n") ? content : `${content}\n`); + return; + } + await writeFile(outputPath, content, { encoding: "utf-8" }); +} diff --git a/packages/cli/src/bench/compare/schema.ts b/packages/cli/src/bench/compare/schema.ts new file mode 100644 index 000000000..3b65d6f74 --- /dev/null +++ b/packages/cli/src/bench/compare/schema.ts @@ -0,0 +1,157 @@ +/** + * The embedded JSON Schema (draft 2020-12) for benchmark comparison output. + * + * The comparison report embeds the two benchmark reports it compares; this + * schema validates the comparison envelope and checks that the embedded reports + * look like current benchmark reports without duplicating the complete report + * schema in two published files. + * @since 2.3.0 + * @module + */ + +/** The hosted URL that serves the comparison report schema. */ +export const COMPARE_REPORT_SCHEMA_ID = + "https://json-schema.fedify.dev/bench/compare-report-v1.json"; + +/** The benchmark comparison report JSON Schema (draft 2020-12). */ +export const compareReportSchemaV1 = { + $schema: "https://json-schema.org/draft/2020-12/schema", + $id: COMPARE_REPORT_SCHEMA_ID, + title: "Fedify benchmark comparison report", + type: "object", + additionalProperties: false, + required: [ + "schemaVersion", + "tool", + "environment", + "startedAt", + "finishedAt", + "suite", + "maxRegression", + "base", + "head", + "comparisons", + "passed", + ], + properties: { + $schema: { type: "string" }, + schemaVersion: { const: 1 }, + tool: { $ref: "#/$defs/tool" }, + environment: { $ref: "#/$defs/environment" }, + startedAt: { type: "string" }, + finishedAt: { type: "string" }, + suite: { $ref: "#/$defs/suite" }, + maxRegression: { type: "number", minimum: 0 }, + base: { $ref: "#/$defs/compareSide" }, + head: { $ref: "#/$defs/compareSide" }, + comparisons: { + type: "array", + items: { $ref: "#/$defs/comparisonResult" }, + }, + passed: { type: "boolean" }, + }, + $defs: { + tool: { + type: "object", + additionalProperties: false, + required: ["name", "version"], + properties: { + name: { type: "string" }, + version: { type: "string" }, + }, + }, + environment: { + type: "object", + additionalProperties: false, + required: ["runtime", "runtimeVersion", "os", "cpuCount"], + properties: { + runtime: { type: "string" }, + runtimeVersion: { type: "string" }, + os: { type: "string" }, + cpuCount: { type: "integer", minimum: 0 }, + }, + }, + suite: { + type: "object", + additionalProperties: false, + required: ["configHash"], + properties: { + name: { type: "string" }, + configHash: { type: "string" }, + }, + }, + benchmarkReport: { + type: "object", + additionalProperties: true, + required: [ + "$schema", + "schemaVersion", + "tool", + "environment", + "target", + "suite", + "passed", + "scenarios", + ], + properties: { + $schema: { + const: "https://json-schema.fedify.dev/bench/report-v3.json", + }, + schemaVersion: { const: 3 }, + tool: { $ref: "#/$defs/tool" }, + environment: { $ref: "#/$defs/environment" }, + target: { $ref: "#/$defs/target" }, + suite: { $ref: "#/$defs/suite" }, + passed: { type: "boolean" }, + scenarios: { type: "array" }, + }, + }, + target: { + type: "object", + additionalProperties: false, + required: ["url", "statsAvailable"], + properties: { + url: { type: "string" }, + fedifyVersion: { type: ["string", "null"] }, + statsAvailable: { type: "boolean" }, + }, + }, + compareSide: { + type: "object", + additionalProperties: false, + required: ["ref", "report"], + properties: { + ref: { type: "string" }, + report: { $ref: "#/$defs/benchmarkReport" }, + }, + }, + comparisonResult: { + type: "object", + additionalProperties: false, + required: [ + "scenario", + "metric", + "direction", + "base", + "head", + "regression", + "noiseBand", + "allowedRegression", + "pass", + ], + properties: { + scenario: { type: "string" }, + metric: { type: "string" }, + direction: { + enum: ["lower-is-better", "higher-is-better"], + }, + base: { type: ["number", "null"] }, + head: { type: ["number", "null"] }, + regression: { type: ["number", "null"] }, + noiseBand: { type: "number", minimum: 0 }, + allowedRegression: { type: "number", minimum: 0 }, + pass: { type: "boolean" }, + }, + }, + }, +} as const; diff --git a/packages/cli/src/bench/load/clock.ts b/packages/cli/src/bench/load/clock.ts index 848e0bdce..95eaf2d0e 100644 --- a/packages/cli/src/bench/load/clock.ts +++ b/packages/cli/src/bench/load/clock.ts @@ -10,17 +10,36 @@ export interface Clock { /** The current time in milliseconds (monotonic, not wall-clock). */ now(): number; /** Resolves once the clock reaches `timeMs` (or immediately if already past). */ - sleepUntil(timeMs: number): Promise; + sleepUntil(timeMs: number, signal?: AbortSignal): Promise; } /** Returns a clock backed by `performance.now()` and `setTimeout`. */ export function systemClock(): Clock { return { now: () => performance.now(), - sleepUntil(timeMs: number): Promise { + sleepUntil(timeMs: number, signal?: AbortSignal): Promise { + if (signal?.aborted) return Promise.reject(abortReason(signal)); const remaining = timeMs - performance.now(); if (remaining <= 0) return Promise.resolve(); - return new Promise((resolve) => setTimeout(resolve, remaining)); + return new Promise((resolve, reject) => { + const timer = setTimeout(() => { + cleanup(); + resolve(); + }, remaining); + const onAbort = () => { + clearTimeout(timer); + cleanup(); + reject(abortReason(signal!)); + }; + const cleanup = () => { + signal?.removeEventListener("abort", onAbort); + }; + signal?.addEventListener("abort", onAbort, { once: true }); + }); }, }; } + +function abortReason(signal: AbortSignal): unknown { + return signal.reason ?? new Error("Operation aborted."); +} diff --git a/packages/cli/src/bench/load/generator.test.ts b/packages/cli/src/bench/load/generator.test.ts index c23567f26..9ce56aa97 100644 --- a/packages/cli/src/bench/load/generator.test.ts +++ b/packages/cli/src/bench/load/generator.test.ts @@ -153,3 +153,21 @@ test("runLoad - records send exceptions as failed samples", async () => { assert.ok(result.samples.every((s) => !s.outcome.ok)); assert.ok(result.samples.every((s) => s.outcome.errorKind === "exception")); }); + +test("runLoad - aborts scheduled sleeps", async () => { + const controller = new AbortController(); + const startedAt = Date.now(); + const load = runLoad( + { + load: { kind: "open", ratePerSec: 1, arrival: "constant" }, + durationMs: 10_000, + warmupMs: 0, + }, + () => Promise.resolve(ok), + undefined, + controller.signal, + ); + setTimeout(() => controller.abort(new Error("cancelled")), 10); + await assert.rejects(load, /cancelled/); + assert.ok(Date.now() - startedAt < 1000); +}); diff --git a/packages/cli/src/bench/load/generator.ts b/packages/cli/src/bench/load/generator.ts index 7b05caae1..a4744a9a3 100644 --- a/packages/cli/src/bench/load/generator.ts +++ b/packages/cli/src/bench/load/generator.ts @@ -72,10 +72,11 @@ export function runLoad( plan: LoadPlan, send: SendFunction, clock: Clock = systemClock(), + signal?: AbortSignal, ): Promise { return plan.load.kind === "open" - ? runOpenLoop(plan, plan.load, send, clock) - : runClosedLoop(plan, plan.load, send, clock); + ? runOpenLoop(plan, plan.load, send, clock, signal) + : runClosedLoop(plan, plan.load, send, clock, signal); } async function runOpenLoop( @@ -83,6 +84,7 @@ async function runOpenLoop( load: Extract, send: SendFunction, clock: Clock, + signal: AbortSignal | undefined, ): Promise { const arrivals = scheduleArrivals({ ratePerSec: load.ratePerSec, @@ -98,8 +100,13 @@ async function runOpenLoop( // bounded by the in-flight count rather than the total request count. const active = new Set>(); for (const offset of arrivals) { - await clock.sleepUntil(start + offset); - if (await slots.acquire()) saturated = true; + throwIfAborted(signal); + await clock.sleepUntil(start + offset, signal); + if (await slots.acquire(signal)) saturated = true; + if (signal?.aborted) { + slots.release(); + throw abortReason(signal); + } const dispatched = dispatch( send, offset, @@ -123,6 +130,7 @@ async function runClosedLoop( load: Extract, send: SendFunction, clock: Clock, + signal: AbortSignal | undefined, ): Promise { const samples: Sample[] = []; const slots = createSemaphore(load.maxInFlight); @@ -131,7 +139,12 @@ async function runClosedLoop( const deadline = start + plan.durationMs; async function worker(): Promise { while (clock.now() < deadline) { - if (await slots.acquire()) saturated = true; + throwIfAborted(signal); + if (await slots.acquire(signal)) saturated = true; + if (signal?.aborted) { + slots.release(); + throw abortReason(signal); + } if (clock.now() >= deadline) { slots.release(); break; @@ -176,7 +189,7 @@ async function dispatch( interface Semaphore { /** Acquires a slot; resolves `true` if it had to wait (backpressure). */ - acquire(): Promise; + acquire(signal?: AbortSignal): Promise; /** Releases a slot, transferring it to the next waiter if any. */ release(): void; } @@ -188,7 +201,8 @@ function createSemaphore(max: number | undefined): Semaphore { let count = 0; const queue: Array<() => void> = []; return { - acquire(): Promise { + acquire(signal?: AbortSignal): Promise { + throwIfAborted(signal); if (count < max) { count++; return Promise.resolve(false); @@ -196,7 +210,23 @@ function createSemaphore(max: number | undefined): Semaphore { // Wait in FIFO order; release() transfers the slot to us directly // (count is not decremented), so an active worker cannot barge ahead of // a queued one. - return new Promise((resolve) => queue.push(() => resolve(true))); + return new Promise((resolve, reject) => { + const waiter = () => { + cleanup(); + resolve(true); + }; + const onAbort = () => { + const index = queue.indexOf(waiter); + if (index >= 0) queue.splice(index, 1); + cleanup(); + reject(abortReason(signal!)); + }; + const cleanup = () => { + signal?.removeEventListener("abort", onAbort); + }; + signal?.addEventListener("abort", onAbort, { once: true }); + queue.push(waiter); + }); }, release(): void { const next = queue.shift(); @@ -205,3 +235,11 @@ function createSemaphore(max: number | undefined): Semaphore { }, }; } + +function throwIfAborted(signal: AbortSignal | undefined): void { + if (signal?.aborted) throw abortReason(signal); +} + +function abortReason(signal: AbortSignal): unknown { + return signal.reason ?? new Error("Benchmark load aborted."); +} diff --git a/packages/cli/src/bench/mod.ts b/packages/cli/src/bench/mod.ts index 4ca1ef232..7d91a54fc 100644 --- a/packages/cli/src/bench/mod.ts +++ b/packages/cli/src/bench/mod.ts @@ -1,2 +1,12 @@ -export { default as runBench } from "./action.ts"; +import runBenchSuite from "./action.ts"; +import { runBenchCompare } from "./compare.ts"; +import type { BenchCommand } from "./command.ts"; + export { benchCommand } from "./command.ts"; + +/** Runs a parsed benchmark command. */ +export function runBench(command: BenchCommand): Promise { + return command.mode === "compare" + ? runBenchCompare(command) + : runBenchSuite(command); +} diff --git a/packages/cli/src/bench/render/render.test.ts b/packages/cli/src/bench/render/render.test.ts index d96b732bc..a534f37b1 100644 --- a/packages/cli/src/bench/render/render.test.ts +++ b/packages/cli/src/bench/render/render.test.ts @@ -5,7 +5,7 @@ import { dirname, join } from "node:path"; import test from "node:test"; import { fileURLToPath } from "node:url"; import type { BenchReport } from "../result/model.ts"; -import { reportSchemaV2 } from "../result/schema.ts"; +import { reportSchemaV3 } from "../result/schema.ts"; import { renderReport } from "./index.ts"; // `import.meta.dirname` needs Node >= 20.11; derive it from the URL instead. @@ -21,7 +21,7 @@ test("renderReport json - valid JSON that validates against the schema", () => { const json = renderReport(report, "json"); const parsed = JSON.parse(json); const validator = new Validator( - reportSchemaV2 as unknown as Schema, + reportSchemaV3 as unknown as Schema, "2020-12", ); assert.ok(validator.validate(parsed).valid); diff --git a/packages/cli/src/bench/result/build.test.ts b/packages/cli/src/bench/result/build.test.ts index 4a268dc57..813b0a765 100644 --- a/packages/cli/src/bench/result/build.test.ts +++ b/packages/cli/src/bench/result/build.test.ts @@ -9,7 +9,8 @@ import { detectEnvironment, type ScenarioMeasurement, } from "./build.ts"; -import { reportSchemaV2 } from "./schema.ts"; +import { LogLinearHistogram } from "../metrics/histogram.ts"; +import { reportSchemaV3 } from "./schema.ts"; function resolvedInbox() { return normalizeSuite({ @@ -50,6 +51,7 @@ test("buildScenarioResult - summarizes load and evaluates expect", () => { assert.strictEqual(result.expectations.length, 2); assert.ok(result.expectations.every((e) => e.pass)); assert.strictEqual(result.passed, true); + assert.strictEqual(result.runCount, 1); }); test("buildScenarioResult - a run that measured nothing never passes", () => { @@ -70,6 +72,147 @@ test("buildScenarioResult - preserves delivery throughput", () => { assert.strictEqual(result.deliveryThroughputPerSec, 42); }); +test("buildScenarioResult - aggregates repeated runs for CI gates", () => { + const scenario = normalizeSuite({ + version: 1, + target: "http://localhost:3000", + defaults: { + load: { concurrency: 50 }, + duration: "60s", + warmup: "10s", + runs: 3, + }, + scenarios: [{ + name: "inbox-shared", + type: "inbox", + recipient: "acct:a@x", + expect: { + successRate: ">= 95%", + "latency.p95": "< 250ms", + throughputPerSec: ">= 100/s", + }, + }], + }).scenarios[0]; + const result = buildScenarioResult(scenario, [ + { + ...measurement(), + requests: { total: 10, ok: 10, failed: 0, successRate: 1 }, + throughputPerSec: 90, + client: { + latencyMs: { p50: 10, p95: 100, p99: 110, mean: 20, max: 120 }, + }, + }, + { + ...measurement(), + requests: { total: 10, ok: 9, failed: 1, successRate: 0.9 }, + throughputPerSec: 100, + client: { + latencyMs: { p50: 20, p95: 200, p99: 210, mean: 30, max: 220 }, + }, + }, + { + ...measurement(), + requests: { total: 10, ok: 10, failed: 0, successRate: 1 }, + throughputPerSec: 200, + client: { + latencyMs: { p50: 30, p95: 300, p99: 310, mean: 40, max: 320 }, + }, + }, + ]); + assert.strictEqual(result.runCount, 3); + assert.strictEqual(result.runs?.length, 3); + assert.strictEqual(result.client.latencyMs.p95, 200); + assert.strictEqual(result.throughputPerSec, 100); + assert.strictEqual(result.requests.successRate, 0.9); + assert.strictEqual(result.expectations[0].actual, 0.9); + assert.strictEqual(result.expectations[1].actual, 200); + assert.strictEqual(result.expectations[2].actual, 100); + assert.strictEqual(result.passed, false); +}); + +test("buildScenarioResult - fails repeated server gates with missing stats", () => { + const scenario = normalizeSuite({ + version: 1, + target: "http://localhost:3000", + defaults: { + load: { concurrency: 50 }, + duration: "60s", + warmup: "10s", + runs: 3, + }, + scenarios: [{ + name: "inbox-shared", + type: "inbox", + recipient: "acct:a@x", + expect: { "signatureVerification.p95": "< 20ms" }, + }], + }).scenarios[0]; + const result = buildScenarioResult(scenario, [ + measurement(), + { ...measurement(), server: null }, + measurement(), + ]); + assert.strictEqual(result.server, null); + assert.strictEqual(result.expectations[0].actual, null); + assert.strictEqual(result.expectations[0].pass, false); + assert.strictEqual(result.passed, false); +}); + +test("buildScenarioResult - keeps present by-standard repeated metrics", () => { + const first = measurement(); + const missingStandard = measurement(); + const third = measurement(); + const result = buildScenarioResult(resolvedInbox(), [ + { + ...first, + server: { + signatureVerificationMs: { + overall: first.server!.signatureVerificationMs!.overall, + byStandard: { + "rfc9421": { p50: 1, p95: 10, p99: 100 }, + }, + }, + }, + }, + { + ...missingStandard, + server: { + signatureVerificationMs: { + overall: missingStandard.server!.signatureVerificationMs!.overall, + }, + }, + }, + { + ...third, + server: { + signatureVerificationMs: { + overall: third.server!.signatureVerificationMs!.overall, + byStandard: { + "rfc9421": { p50: 3, p95: 30, p99: 300 }, + }, + }, + }, + }, + ]); + assert.deepEqual( + result.server?.signatureVerificationMs?.byStandard?.["rfc9421"], + { p50: 2, p95: 20, p99: 200 }, + ); +}); + +test("buildScenarioResult - omits aggregate repeated-run histogram", () => { + const first = new LogLinearHistogram(); + first.record(10); + const second = new LogLinearHistogram(); + second.record(100); + const result = buildScenarioResult(resolvedInbox(), [ + { ...measurement(), histogram: first.toJSON() }, + { ...measurement(), histogram: second.toJSON() }, + ]); + assert.strictEqual(result.histogram, undefined); + assert.ok(result.runs?.every((run) => run.histogram != null)); +}); + test("buildReport - gate passes only when all scenarios pass", () => { const ok = buildScenarioResult(resolvedInbox(), measurement()); const bad = buildScenarioResult(resolvedInbox(), { @@ -101,7 +244,7 @@ test("buildReport - output validates against the report schema", () => { suite: { name: "suite", configHash: configHash({ a: 1 }) }, }); const validator = new Validator( - reportSchemaV2 as unknown as Schema, + reportSchemaV3 as unknown as Schema, "2020-12", ); const result = validator.validate(JSON.parse(JSON.stringify(report))); diff --git a/packages/cli/src/bench/result/build.ts b/packages/cli/src/bench/result/build.ts index 9e04d73b3..7fee0ba85 100644 --- a/packages/cli/src/bench/result/build.ts +++ b/packages/cli/src/bench/result/build.ts @@ -24,6 +24,7 @@ import type { LoadSummary, RequestSummary, ScenarioResult, + ScenarioRunResult, ServerMetrics, TargetInfo, } from "./model.ts"; @@ -48,9 +49,16 @@ export interface ScenarioMeasurement { */ export function buildScenarioResult( scenario: ResolvedScenario, - measurement: ScenarioMeasurement, + measurement: ScenarioMeasurement | readonly ScenarioMeasurement[], ): ScenarioResult { - const { results, passed } = evaluateExpect(scenario.expect, measurement); + const measurements = Array.isArray(measurement) ? measurement : [measurement]; + if (measurements.length < 1) { + throw new RangeError("At least one scenario measurement is required."); + } + const aggregate = measurements.length === 1 + ? measurements[0] + : aggregateMeasurements(measurements); + const { results, passed } = evaluateExpect(scenario.expect, aggregate); // A scenario that measured no requests must never pass: an empty sample set // makes every `expect` assertion vacuously true (and a missing-metric one // could only fail), so without this guard a run that sent nothing would @@ -59,17 +67,21 @@ export function buildScenarioResult( name: scenario.name, type: scenario.type, load: loadSummary(scenario), - requests: measurement.requests, - throughputPerSec: measurement.throughputPerSec, - ...(measurement.deliveryThroughputPerSec == null ? {} : { - deliveryThroughputPerSec: measurement.deliveryThroughputPerSec, + requests: aggregate.requests, + throughputPerSec: aggregate.throughputPerSec, + ...(aggregate.deliveryThroughputPerSec == null ? {} : { + deliveryThroughputPerSec: aggregate.deliveryThroughputPerSec, }), - client: measurement.client, - server: measurement.server, - errors: measurement.errors, + client: aggregate.client, + server: aggregate.server, + errors: aggregate.errors, expectations: results, - passed: passed && measurement.requests.total > 0, - ...(measurement.histogram ? { histogram: measurement.histogram } : {}), + passed: passed && measurements.every((m) => m.requests.total > 0), + runCount: measurements.length, + ...(measurements.length > 1 + ? { runs: measurements.map((m, index) => runResult(index + 1, m)) } + : {}), + ...(aggregate.histogram ? { histogram: aggregate.histogram } : {}), }; } @@ -92,7 +104,7 @@ export interface ReportInput { export function buildReport(input: ReportInput): BenchReport { return { $schema: REPORT_SCHEMA_ID, - schemaVersion: 2, + schemaVersion: 3, tool: { name: "@fedify/cli", version: metadata.version }, environment: input.environment, target: input.target, @@ -104,6 +116,193 @@ export function buildReport(input: ReportInput): BenchReport { }; } +function aggregateMeasurements( + measurements: readonly ScenarioMeasurement[], +): ScenarioMeasurement { + const errors = sumErrorBuckets(measurements.flatMap((m) => m.errors)); + const total = sum(measurements.map((m) => m.requests.total)); + const ok = sum(measurements.map((m) => m.requests.ok)); + const failed = sum(measurements.map((m) => m.requests.failed)); + const delivery = medianPresent( + measurements.map((m) => m.deliveryThroughputPerSec), + ); + return { + requests: { + total, + ok, + failed, + // Correctness gates are intentionally pessimistic in repeated runs: + // one bad run should not be hidden by two clean ones. + successRate: Math.min(...measurements.map((m) => m.requests.successRate)), + }, + throughputPerSec: median(measurements.map((m) => m.throughputPerSec)), + ...(delivery == null ? {} : { deliveryThroughputPerSec: delivery }), + client: { + latencyMs: { + p50: median(measurements.map((m) => m.client.latencyMs.p50)), + p95: median(measurements.map((m) => m.client.latencyMs.p95)), + p99: median(measurements.map((m) => m.client.latencyMs.p99)), + mean: median(measurements.map((m) => m.client.latencyMs.mean)), + max: median(measurements.map((m) => m.client.latencyMs.max)), + }, + }, + server: aggregateServer(measurements.map((m) => m.server)), + errors, + }; +} + +function runResult( + run: number, + measurement: ScenarioMeasurement, +): ScenarioRunResult { + return { + run, + requests: measurement.requests, + throughputPerSec: measurement.throughputPerSec, + ...(measurement.deliveryThroughputPerSec == null ? {} : { + deliveryThroughputPerSec: measurement.deliveryThroughputPerSec, + }), + client: measurement.client, + server: measurement.server, + errors: measurement.errors, + ...(measurement.histogram ? { histogram: measurement.histogram } : {}), + }; +} + +function aggregateServer( + servers: readonly (ServerMetrics | null)[], +): ServerMetrics | null { + const present = servers.filter((s): s is ServerMetrics => s != null); + if (present.length !== servers.length) return null; + const signature = aggregateSignatureVerification(present); + const queue = aggregateQueue(present); + return { + ...(signature == null ? {} : { signatureVerificationMs: signature }), + ...(queue == null ? {} : { queue }), + }; +} + +function aggregateSignatureVerification( + servers: readonly ServerMetrics[], +): NonNullable | null { + const values = servers + .map((s) => s.signatureVerificationMs) + .filter((s): s is NonNullable => + s != null + ); + if (values.length !== servers.length) return null; + const standards = new Set(); + for (const value of values) { + for (const key of Object.keys(value.byStandard ?? {})) standards.add(key); + } + const byStandard: Record> = {}; + for (const standard of standards) { + byStandard[standard] = aggregatePartial( + values.map((v) => v.byStandard?.[standard]), + "present", + ); + } + return { + overall: aggregatePartial(values.map((v) => v.overall)), + ...(Object.keys(byStandard).length < 1 ? {} : { byStandard }), + }; +} + +function aggregateQueue( + servers: readonly ServerMetrics[], +): NonNullable | null { + const values = servers + .map((s) => s.queue) + .filter((q): q is NonNullable => q != null); + if (values.length !== servers.length) return null; + const drainMs = aggregatePartial(values.map((v) => v.drainMs)); + const depths = values.map((v) => v.depthMax); + return { + ...(hasPartial(drainMs) ? { drainMs } : {}), + ...(depths.every(isNumber) ? { depthMax: Math.max(...depths) } : {}), + }; +} + +type PartialMetric = { + readonly p50?: number; + readonly p95?: number; + readonly p99?: number; +}; + +function aggregatePartial( + values: readonly (PartialMetric | undefined)[], + mode: "complete" | "present" = "complete", +) { + return { + ...partialField(values, "p50", mode), + ...partialField(values, "p95", mode), + ...partialField(values, "p99", mode), + }; +} + +function partialField( + values: + readonly ({ readonly [key: string]: number | undefined } | undefined)[], + key: "p50" | "p95" | "p99", + mode: "complete" | "present", +): Record | Record { + const fieldValues = values.map((v) => v?.[key]); + if (mode === "present") { + const present = fieldValues.filter(isNumber); + return present.length < 1 + ? {} + : { [key]: median(present) } as Record; + } + return fieldValues.every(isNumber) + ? { [key]: median(fieldValues as readonly number[]) } as Record< + typeof key, + number + > + : {}; +} + +function hasPartial(value: { + readonly p50?: number; + readonly p95?: number; + readonly p99?: number; +}): boolean { + return value.p50 != null || value.p95 != null || value.p99 != null; +} + +function sumErrorBuckets(errors: readonly ErrorBucket[]): ErrorBucket[] { + const buckets = new Map(); + for (const error of errors) { + const key = `${error.kind}|${error.status ?? ""}|${error.reason}`; + const previous = buckets.get(key); + buckets.set(key, { + ...error, + count: (previous?.count ?? 0) + error.count, + }); + } + return [...buckets.values()].sort((a, b) => b.count - a.count); +} + +function medianPresent(values: readonly (number | undefined)[]): number | null { + const present = values.filter(isNumber); + return present.length < 1 ? null : median(present); +} + +function median(values: readonly number[]): number { + if (values.length < 1) return 0; + const sorted = [...values].sort((a, b) => a - b); + const middle = Math.floor(sorted.length / 2); + if (sorted.length % 2 === 1) return sorted[middle]; + return (sorted[middle - 1] + sorted[middle]) / 2; +} + +function sum(values: readonly number[]): number { + return values.reduce((a, b) => a + b, 0); +} + +function isNumber(value: number | undefined): value is number { + return typeof value === "number" && Number.isFinite(value); +} + /** Detects the current runtime environment for reproducibility metadata. */ export function detectEnvironment(): Environment { const g = globalThis as { diff --git a/packages/cli/src/bench/result/model.ts b/packages/cli/src/bench/result/model.ts index bbdf2d8bc..b02d90c03 100644 --- a/packages/cli/src/bench/result/model.ts +++ b/packages/cli/src/bench/result/model.ts @@ -149,15 +149,31 @@ export interface ScenarioResult { readonly errors: ErrorBucket[]; readonly expectations: ExpectResult[]; readonly passed: boolean; + /** The number of runs aggregated into this scenario result. */ + readonly runCount: number; + /** Per-run measurements, present when a scenario was repeated. */ + readonly runs?: ScenarioRunResult[]; /** An optional serialized client latency histogram for re-aggregation. */ readonly histogram?: SerializedHistogram; } +/** The measured result of one repeated scenario run. */ +export interface ScenarioRunResult { + readonly run: number; + readonly requests: RequestSummary; + readonly throughputPerSec: number; + readonly deliveryThroughputPerSec?: number; + readonly client: ClientMetrics; + readonly server: ServerMetrics | null; + readonly errors: ErrorBucket[]; + readonly histogram?: SerializedHistogram; +} + /** A complete benchmark report. */ export interface BenchReport { /** The published report schema URL. */ readonly $schema?: string; - readonly schemaVersion: 2; + readonly schemaVersion: 3; readonly tool: { readonly name: string; readonly version: string }; readonly environment: Environment; readonly target: TargetInfo; diff --git a/packages/cli/src/bench/result/schema.ts b/packages/cli/src/bench/result/schema.ts index 787e4b63c..a0cdcd613 100644 --- a/packages/cli/src/bench/result/schema.ts +++ b/packages/cli/src/bench/result/schema.ts @@ -10,6 +10,10 @@ /** The hosted URL that serves the report schema. */ export const REPORT_SCHEMA_ID = + "https://json-schema.fedify.dev/bench/report-v3.json"; + +/** The hosted URL for the version 2 report schema. */ +export const REPORT_SCHEMA_V2_ID = "https://json-schema.fedify.dev/bench/report-v2.json"; /** The hosted URL for the original report schema. */ @@ -292,7 +296,7 @@ export const reportSchemaV1 = { /** The benchmark report JSON Schema (draft 2020-12). */ export const reportSchemaV2 = { ...reportSchemaV1, - $id: REPORT_SCHEMA_ID, + $id: REPORT_SCHEMA_V2_ID, properties: { ...reportSchemaV1.properties, schemaVersion: { const: 2 }, @@ -308,3 +312,66 @@ export const reportSchemaV2 = { }, }, } as const; + +/** The current benchmark report JSON Schema (draft 2020-12). */ +export const reportSchemaV3 = { + ...reportSchemaV2, + $id: REPORT_SCHEMA_ID, + properties: { + ...reportSchemaV2.properties, + schemaVersion: { const: 3 }, + }, + $defs: { + ...reportSchemaV2.$defs, + scenarioRunResult: { + type: "object", + additionalProperties: false, + required: [ + "run", + "requests", + "throughputPerSec", + "client", + "server", + "errors", + ], + properties: { + run: { type: "integer", minimum: 1 }, + requests: { $ref: "#/$defs/requestSummary" }, + throughputPerSec: { type: "number" }, + deliveryThroughputPerSec: { type: "number" }, + client: { $ref: "#/$defs/clientMetrics" }, + server: { + anyOf: [{ $ref: "#/$defs/serverMetrics" }, { type: "null" }], + }, + errors: { + type: "array", + items: { $ref: "#/$defs/errorBucket" }, + }, + histogram: { $ref: "#/$defs/serializedHistogram" }, + }, + }, + scenarioResult: { + ...reportSchemaV2.$defs.scenarioResult, + required: [ + ...reportSchemaV2.$defs.scenarioResult.required, + "runCount", + ], + properties: { + ...reportSchemaV2.$defs.scenarioResult.properties, + runCount: { type: "integer", minimum: 1 }, + runs: { + type: "array", + minItems: 2, + items: { $ref: "#/$defs/scenarioRunResult" }, + }, + }, + allOf: [{ + if: { + required: ["runCount"], + properties: { runCount: { minimum: 2 } }, + }, + then: { required: ["runs"] }, + }], + }, + }, +} as const; diff --git a/packages/cli/src/bench/safety/gate.test.ts b/packages/cli/src/bench/safety/gate.test.ts index ea496dbe4..002e944c7 100644 --- a/packages/cli/src/bench/safety/gate.test.ts +++ b/packages/cli/src/bench/safety/gate.test.ts @@ -73,6 +73,7 @@ test("assertUnsafeOverrideAllowed - unsafe flag needs an explicit CLI target", ( name: "wf", explicitDuration: true, explicitLoad: true, + explicitRuns: true, }], }), (error: unknown) => @@ -92,6 +93,7 @@ test("assertUnsafeOverrideAllowed - unsafe public defaults need explicit load", name: "wf", explicitDuration: true, explicitLoad: false, + explicitRuns: true, }], }), (error: unknown) => @@ -111,6 +113,7 @@ test("assertUnsafeOverrideAllowed - unsafe public defaults need explicit duratio name: "wf", explicitDuration: false, explicitLoad: true, + explicitRuns: true, }], }), (error: unknown) => @@ -118,6 +121,26 @@ test("assertUnsafeOverrideAllowed - unsafe public defaults need explicit duratio ); }); +test("assertUnsafeOverrideAllowed - unsafe public defaults need explicit runs", () => { + assert.throws( + () => + assertUnsafeOverrideAllowed({ + tier: "public", + benchmarkMode: false, + allowUnsafe: true, + explicitCliTarget: true, + scenarios: [{ + name: "wf", + explicitDuration: true, + explicitLoad: true, + explicitRuns: false, + }], + }), + (error: unknown) => + error instanceof UnsafeTargetError && /runs/.test(error.message), + ); +}); + test("assertUnsafeOverrideAllowed - safe targets do not need unsafe metadata", () => { assert.doesNotThrow(() => assertUnsafeOverrideAllowed({ diff --git a/packages/cli/src/bench/safety/gate.ts b/packages/cli/src/bench/safety/gate.ts index 11c87ab8a..203e9850b 100644 --- a/packages/cli/src/bench/safety/gate.ts +++ b/packages/cli/src/bench/safety/gate.ts @@ -54,6 +54,8 @@ export interface UnsafeOverrideScenario { readonly explicitDuration: boolean; /** Whether the scenario or suite explicitly selected a load model. */ readonly explicitLoad: boolean; + /** Whether the scenario or suite explicitly set the run count. */ + readonly explicitRuns: boolean; } /** The inputs for validating an unsafe public-target override. */ @@ -75,8 +77,9 @@ export interface UnsafeOverrideContext { * * The override is only meaningful for a public target that does not advertise * benchmark mode. In that caution tier, the operator must name the target on - * the command line for this run and must explicitly set load and duration, so - * the built-in defaults cannot accidentally create a long public benchmark. + * the command line for this run and must explicitly set load, duration, and + * runs, so the built-in defaults cannot accidentally create a long public + * benchmark. * @param context The unsafe override decision inputs. * @throws {UnsafeTargetError} If the unsafe override is too broad. */ @@ -110,6 +113,13 @@ export function assertUnsafeOverrideAllowed( "--allow-unsafe-target against a public target.", ); } + if (!scenario.explicitRuns) { + throw new UnsafeTargetError( + `Scenario "${scenario.name}" uses the built-in benchmark runs ` + + "default. Set runs explicitly before using --allow-unsafe-target " + + "against a public target.", + ); + } } } diff --git a/packages/cli/src/bench/scenario/normalize.test.ts b/packages/cli/src/bench/scenario/normalize.test.ts index 8456a264b..40d0e65a4 100644 --- a/packages/cli/src/bench/scenario/normalize.test.ts +++ b/packages/cli/src/bench/scenario/normalize.test.ts @@ -28,7 +28,7 @@ test("normalizeSuite - applies defaults and parses units", () => { maxInFlight: undefined, }); assert.strictEqual(s.signing, "pipeline"); - assert.strictEqual(s.runs, 1); + assert.strictEqual(s.runs, 3); assert.deepEqual(s.recipients, ["acct:alice@x"]); }); @@ -240,10 +240,20 @@ test("normalizeSuite - allows warmup shorter than duration", () => { assert.strictEqual(s.warmupMs, 9000); }); -test("normalizeSuite - rejects multiple runs (runs > 1)", () => { - assert.throws( - () => normalizeSuite(suite({ defaults: { runs: 3 } })), - (error: unknown) => - error instanceof SuiteNormalizeError && /runs/.test(error.message), - ); +test("normalizeSuite - allows multiple runs", () => { + const s = normalizeSuite(suite({ defaults: { runs: 5 } })).scenarios[0]; + assert.strictEqual(s.runs, 5); +}); + +test("normalizeSuite - scenario runs override defaults", () => { + const s = normalizeSuite(suite({ + defaults: { runs: 5 }, + scenarios: [{ + name: "wf", + type: "webfinger", + recipient: "acct:a@x", + runs: 2, + }], + })).scenarios[0]; + assert.strictEqual(s.runs, 2); }); diff --git a/packages/cli/src/bench/scenario/normalize.ts b/packages/cli/src/bench/scenario/normalize.ts index ab0cbe1fa..56fe20a82 100644 --- a/packages/cli/src/bench/scenario/normalize.ts +++ b/packages/cli/src/bench/scenario/normalize.ts @@ -30,7 +30,7 @@ const DEFAULT_DURATION_MS = 60_000; const DEFAULT_WARMUP_MS = 0; const DEFAULT_RATE_PER_SEC = 50; const DEFAULT_SIGNING: SigningMode = "pipeline"; -const DEFAULT_RUNS = 1; +const DEFAULT_RUNS = 3; /** The resolved load model for a scenario. */ export type LoadModel = @@ -170,12 +170,6 @@ function resolveScenario(scenario: Scenario, suite: Suite): ResolvedScenario { ); } const runs = scenario.runs ?? defaults.runs ?? DEFAULT_RUNS; - if (runs > 1) { - throw new SuiteNormalizeError( - `Scenario "${scenario.name}": multiple runs (runs > 1) are not yet ` + - "implemented in fedify bench; set runs to 1.", - ); - } return { name: scenario.name, type: scenario.type, diff --git a/packages/cli/src/bench/scenarios/failure.test.ts b/packages/cli/src/bench/scenarios/failure.test.ts index a41984c5f..b27bf8851 100644 --- a/packages/cli/src/bench/scenarios/failure.test.ts +++ b/packages/cli/src/bench/scenarios/failure.test.ts @@ -351,6 +351,65 @@ test("failureRunner - tolerates transient remote fault stats failures", async () assert.ok(statsCalls >= 3); }); +test("failureRunner - uses abortable remote fault poll sleeps", async () => { + const target = new URL("http://target.test/"); + const signal = new AbortController().signal; + const sleepSignals: (AbortSignal | undefined)[] = []; + let now = 0; + const clock: Clock = { + now: () => now, + sleepUntil: (timeMs, signal) => { + now = Math.max(now, timeMs); + sleepSignals.push(signal); + return Promise.resolve(); + }, + }; + const scenario = normalizeSuite({ + version: 1, + target: target.href, + scenarios: [{ + name: "failure", + type: "failure", + fault: "remote-404", + sender: "alice", + load: { rate: "1000/s" }, + duration: "1ms", + queueDrainTimeout: "1ms", + }], + }).scenarios[0]; + let triggerCalls = 0; + + await failureRunner.run({ + scenario, + target, + documentLoader: await getDocumentLoader({ allowPrivateAddress: true }), + contextLoader: await getContextLoader({ allowPrivateAddress: true }), + allowPrivateAddress: true, + fleet: null, + fetch: (input) => { + const url = new URL(input instanceof Request ? input.url : input); + if (url.pathname === "/.well-known/fedify/bench/stats") { + return Promise.resolve(statsJson(statsSnapshot({ + enqueued: triggerCalls, + completed: triggerCalls, + failed: 0, + permanentFailures: 0, + }))); + } + if (url.pathname === "/.well-known/fedify/bench/trigger") { + triggerCalls++; + return Promise.resolve(statsJson({ version: 1 }, 202)); + } + return Promise.resolve(new Response("unexpected", { status: 500 })); + }, + assertDestinationAllowed: () => {}, + clock, + signal, + }); + + assert.deepStrictEqual(sleepSignals, [signal, signal]); +}); + test("failureRunner.validate - requires sender for remote faults", () => { const scenario = normalizeSuite({ version: 1, diff --git a/packages/cli/src/bench/scenarios/failure.ts b/packages/cli/src/bench/scenarios/failure.ts index 323d96e5e..9b02359f5 100644 --- a/packages/cli/src/bench/scenarios/failure.ts +++ b/packages/cli/src/bench/scenarios/failure.ts @@ -7,6 +7,7 @@ import { Create, Note } from "@fedify/vocab"; import { discoverInbox, selectInbox } from "../discovery/discover.ts"; import { runLoad, type SendOutcome } from "../load/generator.ts"; +import { type Clock, systemClock } from "../load/clock.ts"; import { aggregateSamples } from "../metrics/aggregate.ts"; import { diffSnapshots, @@ -108,6 +109,7 @@ export const failureRunner: ScenarioRunner = { async run(context: RunContext) { this.validate?.(context.scenario); + const clock = context.clock ?? systemClock(); const faults = faultsOf(context); const deliveryTarget = await resolveFailureDeliveryTarget(context, faults); const remoteTargets = await resolveRemoteFailureTargets(context, faults); @@ -141,7 +143,8 @@ export const failureRunner: ScenarioRunner = { const result = await runLoad( loadPlanOf(context.scenario, context.rng), send, - context.clock, + clock, + context.signal, ); return aggregateSamples(result.samples, { measuredWindowMs: measuredWindowMs(context.scenario), @@ -337,6 +340,8 @@ async function sendRemoteFailure( fetch: fetchImpl, baseline, fault, + clock: context.clock ?? systemClock(), + signal: context.signal, timeoutMs: context.scenario.queueDrainTimeoutMs ?? DEFAULT_DRAIN_TIMEOUT_MS, }); @@ -384,13 +389,17 @@ async function waitForRemoteFault(options: { readonly fetch: typeof fetch; readonly baseline: Awaited>; readonly fault: RemoteFailureFault; + readonly clock: Clock; + readonly signal?: AbortSignal; readonly timeoutMs: number; }): Promise { if (options.baseline == null) return null; const baselineRemaining = queueTaskRemaining(options.baseline) ?? 0; - const deadline = Date.now() + options.timeoutMs; + const deadline = options.clock.now() + options.timeoutMs; do { + throwIfAborted(options.signal); const snapshot = await fetchServerSnapshot(options.target, options.fetch); + throwIfAborted(options.signal); if (snapshot != null) { const diff = diffSnapshots(options.baseline, snapshot); const queueTasks = diff.queueTasks; @@ -416,11 +425,24 @@ async function waitForRemoteFault(options: { } } } - await new Promise((resolve) => setTimeout(resolve, DRAIN_POLL_MS)); - } while (Date.now() < deadline); + const now = options.clock.now(); + if (now >= deadline) break; + await options.clock.sleepUntil( + Math.min(deadline, now + DRAIN_POLL_MS), + options.signal, + ); + } while (options.clock.now() < deadline); return { timedOut: true }; } +function throwIfAborted(signal: AbortSignal | undefined): void { + if (signal?.aborted) throw abortReason(signal); +} + +function abortReason(signal: AbortSignal): unknown { + return signal.reason ?? new Error("Operation aborted."); +} + function expectedRemoteFailure(fault: RemoteFailureFault): SendOutcome { switch (fault) { case "remote-404": diff --git a/packages/cli/src/bench/scenarios/fanout.test.ts b/packages/cli/src/bench/scenarios/fanout.test.ts index a81ed7ccb..070dd4717 100644 --- a/packages/cli/src/bench/scenarios/fanout.test.ts +++ b/packages/cli/src/bench/scenarios/fanout.test.ts @@ -2,6 +2,7 @@ import assert from "node:assert/strict"; import test from "node:test"; import { serve } from "srvx"; import { getContextLoader, getDocumentLoader } from "../../docloader.ts"; +import type { Clock } from "../load/clock.ts"; import { normalizeSuite } from "../scenario/normalize.ts"; import type { Suite } from "../scenario/types.ts"; import { fanoutRunner, spawnSinkServer } from "./fanout.ts"; @@ -358,6 +359,63 @@ test("fanoutRunner - tolerates transient drain stats failures", async () => { assert.ok(statsCalls >= 3); }); +test("fanoutRunner - uses abortable drain poll sleeps", async () => { + const target = new URL("http://target.test/"); + const signal = new AbortController().signal; + const sleepSignals: (AbortSignal | undefined)[] = []; + let now = 0; + const clock: Clock = { + now: () => now, + sleepUntil: (timeMs, signal) => { + now = Math.max(now, timeMs); + sleepSignals.push(signal); + return Promise.resolve(); + }, + }; + const scenario = normalizeSuite({ + version: 1, + target: target.href, + scenarios: [{ + name: "fanout", + type: "fanout", + sender: "alice", + followers: 5, + load: { rate: "1000/s" }, + duration: "1ms", + queueDrainTimeout: "1ms", + }], + }).scenarios[0]; + let triggerCalls = 0; + + await fanoutRunner.run({ + scenario, + target, + documentLoader: await getDocumentLoader({ allowPrivateAddress: true }), + contextLoader: await getContextLoader({ allowPrivateAddress: true }), + allowPrivateAddress: true, + fleet: null, + fetch: (input) => { + const url = new URL(input instanceof Request ? input.url : input); + if (url.pathname === "/.well-known/fedify/bench/stats") { + return Promise.resolve(json(statsSnapshot({ + enqueued: triggerCalls * 6, + completed: 0, + failed: 0, + }))); + } + if (url.pathname === "/.well-known/fedify/bench/trigger") { + triggerCalls++; + return Promise.resolve(json({ version: 1 }, 202)); + } + return Promise.resolve(new Response("unexpected", { status: 500 })); + }, + clock, + signal, + }); + + assert.deepStrictEqual(sleepSignals, [signal, signal]); +}); + test("fanoutRunner - uses configured sink base for recipients", async () => { const target = new URL("http://target.test/"); const sinkBase = `http://127.0.0.1:${await reservePort()}/`; diff --git a/packages/cli/src/bench/scenarios/fanout.ts b/packages/cli/src/bench/scenarios/fanout.ts index 045fbff96..7e5670acb 100644 --- a/packages/cli/src/bench/scenarios/fanout.ts +++ b/packages/cli/src/bench/scenarios/fanout.ts @@ -6,6 +6,7 @@ import { serve } from "srvx"; import { runLoad, type SendOutcome } from "../load/generator.ts"; +import { type Clock, systemClock } from "../load/clock.ts"; import { aggregateSamples } from "../metrics/aggregate.ts"; import { LogLinearHistogram } from "../metrics/histogram.ts"; import { @@ -46,6 +47,7 @@ export const fanoutRunner: ScenarioRunner = { throw new Error("The fanout scenario requires a sender."); } this.validate?.(context.scenario); + const clock = context.clock ?? systemClock(); const fetchImpl = context.fetch ?? fetch; const followers = context.scenario.followers ?? DEFAULT_FOLLOWERS; const sink = await spawnSinkServer({ @@ -90,6 +92,8 @@ export const fanoutRunner: ScenarioRunner = { target: context.target, fetch: fetchImpl, baseline, + clock, + signal: context.signal, timeoutMs: context.scenario.queueDrainTimeoutMs ?? DEFAULT_DRAIN_TIMEOUT_MS, }); @@ -132,7 +136,8 @@ export const fanoutRunner: ScenarioRunner = { const result = await runLoad( loadPlanOf(context.scenario, context.rng), send, - context.clock, + clock, + context.signal, ); const measurement = aggregateSamples(result.samples, { measuredWindowMs: measuredWindowMs(context.scenario), @@ -328,13 +333,17 @@ async function waitForDrain(options: { readonly target: URL; readonly fetch: typeof fetch; readonly baseline: Awaited>; + readonly clock: Clock; + readonly signal?: AbortSignal; readonly timeoutMs: number; }): Promise { if (options.baseline == null) return null; const baselineRemaining = queueTaskRemaining(options.baseline) ?? 0; - const deadline = Date.now() + options.timeoutMs; + const deadline = options.clock.now() + options.timeoutMs; do { + throwIfAborted(options.signal); const snapshot = await fetchServerSnapshot(options.target, options.fetch); + throwIfAborted(options.signal); if (snapshot != null) { const diff = diffSnapshots(options.baseline, snapshot); const queueTasks = diff.queueTasks; @@ -348,11 +357,24 @@ async function waitForDrain(options: { return { timedOut: false, failed: queueTasks.failed }; } } - await new Promise((resolve) => setTimeout(resolve, DRAIN_POLL_MS)); - } while (Date.now() < deadline); + const now = options.clock.now(); + if (now >= deadline) break; + await options.clock.sleepUntil( + Math.min(deadline, now + DRAIN_POLL_MS), + options.signal, + ); + } while (options.clock.now() < deadline); return { timedOut: true, failed: 0 }; } +function throwIfAborted(signal: AbortSignal | undefined): void { + if (signal?.aborted) throw abortReason(signal); +} + +function abortReason(signal: AbortSignal): unknown { + return signal.reason ?? new Error("Operation aborted."); +} + function addQueueDrain( server: ServerMetrics | null, histogram: LogLinearHistogram, diff --git a/packages/cli/src/bench/scenarios/inbox.ts b/packages/cli/src/bench/scenarios/inbox.ts index 4bf8379d0..7718d13fd 100644 --- a/packages/cli/src/bench/scenarios/inbox.ts +++ b/packages/cli/src/bench/scenarios/inbox.ts @@ -137,6 +137,7 @@ export const inboxRunner: ScenarioRunner = { loadPlanOf(scenario, context.rng), send, context.clock, + context.signal, ); const measurement = aggregateSamples(result.samples, { measuredWindowMs: measuredWindowMs(scenario), diff --git a/packages/cli/src/bench/scenarios/read.ts b/packages/cli/src/bench/scenarios/read.ts index 0c7f24b55..aacd15518 100644 --- a/packages/cli/src/bench/scenarios/read.ts +++ b/packages/cli/src/bench/scenarios/read.ts @@ -123,6 +123,7 @@ export async function runReadLoad( loadPlanOf(context.scenario, context.rng), send, context.clock, + context.signal, ); const measurement = aggregateSamples(result.samples, { measuredWindowMs: measuredWindowMs(context.scenario), diff --git a/packages/cli/src/bench/scenarios/runner.ts b/packages/cli/src/bench/scenarios/runner.ts index eb1ce970d..c5ccd5477 100644 --- a/packages/cli/src/bench/scenarios/runner.ts +++ b/packages/cli/src/bench/scenarios/runner.ts @@ -31,6 +31,8 @@ export interface RunContext { readonly rng?: Rng; /** Fetch implementation (overridable for tests). */ readonly fetch?: typeof fetch; + /** Aborts in-flight benchmark work when the orchestrator is interrupted. */ + readonly signal?: AbortSignal; /** Host advertised for local benchmark-owned servers. */ readonly advertiseHost?: string; /** diff --git a/packages/cli/src/bench/scenarios/webfinger.ts b/packages/cli/src/bench/scenarios/webfinger.ts index 14b77a093..3b2fbb049 100644 --- a/packages/cli/src/bench/scenarios/webfinger.ts +++ b/packages/cli/src/bench/scenarios/webfinger.ts @@ -64,6 +64,7 @@ export const webfingerRunner: ScenarioRunner = { loadPlanOf(context.scenario, context.rng), send, context.clock, + context.signal, ); const measurement = aggregateSamples(result.samples, { measuredWindowMs: measuredWindowMs(context.scenario), diff --git a/packages/cli/src/bench/schema.test.ts b/packages/cli/src/bench/schema.test.ts index e3b33834f..b73677c6d 100644 --- a/packages/cli/src/bench/schema.test.ts +++ b/packages/cli/src/bench/schema.test.ts @@ -69,6 +69,7 @@ const FIXTURE_GROUPS: readonly FixtureGroup[] = [ { dir: "scenarios", schema: "scenario", valid: true }, { dir: "invalid", schema: "scenario", valid: false }, { dir: "reports", schema: "report", valid: true }, + { dir: "compare-reports", schema: "compare-report", valid: true }, ]; function fixtureFiles(dir: string): string[] { @@ -98,6 +99,27 @@ for (const group of FIXTURE_GROUPS) { } } +test("schema guard - report v3 requires runs for repeated scenarios", () => { + const file = join(FIXTURES, "reports", "inbox-report.json"); + const report = parseSuiteText(readFileSync(file, "utf-8")) as { + scenarios: Array>; + }; + report.scenarios[0].runCount = 2; + delete report.scenarios[0].runs; + const result = validators.get("report")!.validate(report); + assert.strictEqual(result.valid, false); +}); + +test("schema guard - compare report types embedded targets", () => { + const file = join(FIXTURES, "compare-reports", "basic.json"); + const report = parseSuiteText(readFileSync(file, "utf-8")) as { + base: { report: Record }; + }; + report.base.report.target = null; + const result = validators.get("compare-report")!.validate(report); + assert.strictEqual(result.valid, false); +}); + // Guard 3: drift between embedded schema and the published file. for (const { name, fileName, schema } of PUBLISHED_SCHEMAS) { test(`schema guard - ${name} embedded schema matches published file`, () => { diff --git a/packages/cli/src/bench/schemas.ts b/packages/cli/src/bench/schemas.ts index 82f9e279b..6574fb793 100644 --- a/packages/cli/src/bench/schemas.ts +++ b/packages/cli/src/bench/schemas.ts @@ -9,7 +9,12 @@ * @module */ -import { reportSchemaV1, reportSchemaV2 } from "./result/schema.ts"; +import { compareReportSchemaV1 } from "./compare/schema.ts"; +import { + reportSchemaV1, + reportSchemaV2, + reportSchemaV3, +} from "./result/schema.ts"; import { scenarioSchemaV1, scenarioSchemaV2 } from "./scenario/schema.ts"; /** A published JSON Schema and where it is hosted. */ @@ -36,6 +41,11 @@ export const PUBLISHED_SCHEMAS: readonly PublishedSchema[] = [ }, { name: "report", + fileName: "report-v3.json", + schema: reportSchemaV3 as unknown as Record, + }, + { + name: "report-v2", fileName: "report-v2.json", schema: reportSchemaV2 as unknown as Record, }, @@ -44,4 +54,9 @@ export const PUBLISHED_SCHEMAS: readonly PublishedSchema[] = [ fileName: "report-v1.json", schema: reportSchemaV1 as unknown as Record, }, + { + name: "compare-report", + fileName: "compare-report-v1.json", + schema: compareReportSchemaV1 as unknown as Record, + }, ]; diff --git a/schema/README.md b/schema/README.md index 7bd588534..4c27b1064 100644 --- a/schema/README.md +++ b/schema/README.md @@ -15,10 +15,14 @@ Current schemas: format (input). - *bench/scenario-v1.json* — the version 1 `fedify bench` scenario suite format (input). - - *bench/report-v2.json* — the current `fedify bench` report format + - *bench/report-v3.json* — the current `fedify bench` report format + (output). + - *bench/report-v2.json* — the version 2 `fedify bench` report format (output). - *bench/report-v1.json* — the version 1 `fedify bench` report format (output). + - *bench/compare-report-v1.json* — the `fedify bench compare` report format + (output). Versioning: append-only and immutable @@ -41,6 +45,7 @@ binary self-contained): - *packages/cli/src/bench/scenario/schema.ts* - *packages/cli/src/bench/result/schema.ts* + - *packages/cli/src/bench/compare/schema.ts* The *.json* files here are generated from those objects. After editing an embedded schema, regenerate the published copies: @@ -50,8 +55,9 @@ deno task -f @fedify/cli generate-bench-schema ~~~~ The matching TypeScript types live next to each schema -(*packages/cli/src/bench/scenario/types.ts* and -*packages/cli/src/bench/result/model.ts*); keep them in sync with the schema. +(*packages/cli/src/bench/scenario/types.ts*, +*packages/cli/src/bench/result/model.ts*, and +*packages/cli/src/bench/compare.ts*); keep them in sync with the schema. Guards diff --git a/schema/bench/compare-report-v1.json b/schema/bench/compare-report-v1.json new file mode 100644 index 000000000..108fe1537 --- /dev/null +++ b/schema/bench/compare-report-v1.json @@ -0,0 +1,256 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://json-schema.fedify.dev/bench/compare-report-v1.json", + "title": "Fedify benchmark comparison report", + "type": "object", + "additionalProperties": false, + "required": [ + "schemaVersion", + "tool", + "environment", + "startedAt", + "finishedAt", + "suite", + "maxRegression", + "base", + "head", + "comparisons", + "passed" + ], + "properties": { + "$schema": { + "type": "string" + }, + "schemaVersion": { + "const": 1 + }, + "tool": { + "$ref": "#/$defs/tool" + }, + "environment": { + "$ref": "#/$defs/environment" + }, + "startedAt": { + "type": "string" + }, + "finishedAt": { + "type": "string" + }, + "suite": { + "$ref": "#/$defs/suite" + }, + "maxRegression": { + "type": "number", + "minimum": 0 + }, + "base": { + "$ref": "#/$defs/compareSide" + }, + "head": { + "$ref": "#/$defs/compareSide" + }, + "comparisons": { + "type": "array", + "items": { + "$ref": "#/$defs/comparisonResult" + } + }, + "passed": { + "type": "boolean" + } + }, + "$defs": { + "tool": { + "type": "object", + "additionalProperties": false, + "required": [ + "name", + "version" + ], + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + } + } + }, + "environment": { + "type": "object", + "additionalProperties": false, + "required": [ + "runtime", + "runtimeVersion", + "os", + "cpuCount" + ], + "properties": { + "runtime": { + "type": "string" + }, + "runtimeVersion": { + "type": "string" + }, + "os": { + "type": "string" + }, + "cpuCount": { + "type": "integer", + "minimum": 0 + } + } + }, + "suite": { + "type": "object", + "additionalProperties": false, + "required": [ + "configHash" + ], + "properties": { + "name": { + "type": "string" + }, + "configHash": { + "type": "string" + } + } + }, + "benchmarkReport": { + "type": "object", + "additionalProperties": true, + "required": [ + "$schema", + "schemaVersion", + "tool", + "environment", + "target", + "suite", + "passed", + "scenarios" + ], + "properties": { + "$schema": { + "const": "https://json-schema.fedify.dev/bench/report-v3.json" + }, + "schemaVersion": { + "const": 3 + }, + "tool": { + "$ref": "#/$defs/tool" + }, + "environment": { + "$ref": "#/$defs/environment" + }, + "target": { + "$ref": "#/$defs/target" + }, + "suite": { + "$ref": "#/$defs/suite" + }, + "passed": { + "type": "boolean" + }, + "scenarios": { + "type": "array" + } + } + }, + "target": { + "type": "object", + "additionalProperties": false, + "required": [ + "url", + "statsAvailable" + ], + "properties": { + "url": { + "type": "string" + }, + "fedifyVersion": { + "type": [ + "string", + "null" + ] + }, + "statsAvailable": { + "type": "boolean" + } + } + }, + "compareSide": { + "type": "object", + "additionalProperties": false, + "required": [ + "ref", + "report" + ], + "properties": { + "ref": { + "type": "string" + }, + "report": { + "$ref": "#/$defs/benchmarkReport" + } + } + }, + "comparisonResult": { + "type": "object", + "additionalProperties": false, + "required": [ + "scenario", + "metric", + "direction", + "base", + "head", + "regression", + "noiseBand", + "allowedRegression", + "pass" + ], + "properties": { + "scenario": { + "type": "string" + }, + "metric": { + "type": "string" + }, + "direction": { + "enum": [ + "lower-is-better", + "higher-is-better" + ] + }, + "base": { + "type": [ + "number", + "null" + ] + }, + "head": { + "type": [ + "number", + "null" + ] + }, + "regression": { + "type": [ + "number", + "null" + ] + }, + "noiseBand": { + "type": "number", + "minimum": 0 + }, + "allowedRegression": { + "type": "number", + "minimum": 0 + }, + "pass": { + "type": "boolean" + } + } + } + } +} diff --git a/schema/bench/report-v3.json b/schema/bench/report-v3.json new file mode 100644 index 000000000..2fffd812b --- /dev/null +++ b/schema/bench/report-v3.json @@ -0,0 +1,603 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://json-schema.fedify.dev/bench/report-v3.json", + "title": "Fedify benchmark report", + "type": "object", + "additionalProperties": false, + "required": [ + "schemaVersion", + "tool", + "environment", + "target", + "startedAt", + "finishedAt", + "suite", + "passed", + "scenarios" + ], + "properties": { + "$schema": { + "type": "string" + }, + "schemaVersion": { + "const": 3 + }, + "tool": { + "type": "object", + "additionalProperties": false, + "required": [ + "name", + "version" + ], + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + } + } + }, + "environment": { + "type": "object", + "additionalProperties": false, + "required": [ + "runtime", + "runtimeVersion", + "os", + "cpuCount" + ], + "properties": { + "runtime": { + "type": "string" + }, + "runtimeVersion": { + "type": "string" + }, + "os": { + "type": "string" + }, + "cpuCount": { + "type": "integer", + "minimum": 0 + } + } + }, + "target": { + "type": "object", + "additionalProperties": false, + "required": [ + "url", + "statsAvailable" + ], + "properties": { + "url": { + "type": "string" + }, + "fedifyVersion": { + "type": [ + "string", + "null" + ] + }, + "statsAvailable": { + "type": "boolean" + } + } + }, + "startedAt": { + "type": "string" + }, + "finishedAt": { + "type": "string" + }, + "suite": { + "type": "object", + "additionalProperties": false, + "required": [ + "configHash" + ], + "properties": { + "name": { + "type": "string" + }, + "configHash": { + "type": "string" + } + } + }, + "passed": { + "type": "boolean" + }, + "scenarios": { + "type": "array", + "items": { + "$ref": "#/$defs/scenarioResult" + } + } + }, + "$defs": { + "latencyMs": { + "type": "object", + "additionalProperties": false, + "required": [ + "p50", + "p95", + "p99", + "mean", + "max" + ], + "properties": { + "p50": { + "type": "number" + }, + "p95": { + "type": "number" + }, + "p99": { + "type": "number" + }, + "mean": { + "type": "number" + }, + "max": { + "type": "number" + } + } + }, + "partialLatencyMs": { + "type": "object", + "additionalProperties": false, + "properties": { + "p50": { + "type": "number" + }, + "p95": { + "type": "number" + }, + "p99": { + "type": "number" + } + } + }, + "loadSummary": { + "type": "object", + "additionalProperties": false, + "required": [ + "model", + "durationMs", + "warmupMs" + ], + "properties": { + "model": { + "enum": [ + "open", + "closed" + ] + }, + "ratePerSec": { + "type": "number" + }, + "arrival": { + "type": "string" + }, + "concurrency": { + "type": "integer" + }, + "durationMs": { + "type": "number" + }, + "warmupMs": { + "type": "number" + }, + "maxInFlight": { + "type": "integer" + } + }, + "oneOf": [ + { + "properties": { + "model": { + "const": "open" + } + }, + "required": [ + "ratePerSec", + "arrival" + ], + "not": { + "required": [ + "concurrency" + ] + } + }, + { + "properties": { + "model": { + "const": "closed" + } + }, + "required": [ + "concurrency" + ], + "not": { + "anyOf": [ + { + "required": [ + "ratePerSec" + ] + }, + { + "required": [ + "arrival" + ] + } + ] + } + } + ] + }, + "requestSummary": { + "type": "object", + "additionalProperties": false, + "required": [ + "total", + "ok", + "failed", + "successRate" + ], + "properties": { + "total": { + "type": "integer", + "minimum": 0 + }, + "ok": { + "type": "integer", + "minimum": 0 + }, + "failed": { + "type": "integer", + "minimum": 0 + }, + "successRate": { + "type": "number", + "minimum": 0, + "maximum": 1 + } + } + }, + "clientMetrics": { + "type": "object", + "additionalProperties": false, + "required": [ + "latencyMs" + ], + "properties": { + "latencyMs": { + "$ref": "#/$defs/latencyMs" + } + } + }, + "serverMetrics": { + "type": "object", + "additionalProperties": false, + "properties": { + "signatureVerificationMs": { + "type": "object", + "additionalProperties": false, + "required": [ + "overall" + ], + "properties": { + "overall": { + "$ref": "#/$defs/partialLatencyMs" + }, + "byStandard": { + "type": "object", + "additionalProperties": { + "$ref": "#/$defs/partialLatencyMs" + } + } + } + }, + "queue": { + "type": "object", + "additionalProperties": false, + "properties": { + "drainMs": { + "$ref": "#/$defs/partialLatencyMs" + }, + "depthMax": { + "type": "number" + } + } + } + } + }, + "errorBucket": { + "type": "object", + "additionalProperties": false, + "required": [ + "kind", + "reason", + "count" + ], + "properties": { + "kind": { + "type": "string" + }, + "status": { + "type": "integer" + }, + "reason": { + "type": "string" + }, + "count": { + "type": "integer", + "minimum": 0 + } + } + }, + "expectResult": { + "type": "object", + "additionalProperties": false, + "required": [ + "metric", + "op", + "threshold", + "unit", + "actual", + "severity", + "pass" + ], + "properties": { + "metric": { + "type": "string" + }, + "op": { + "enum": [ + "lt", + "lte", + "gt", + "gte", + "eq" + ] + }, + "threshold": { + "type": "number" + }, + "unit": { + "type": [ + "string", + "null" + ] + }, + "actual": { + "type": [ + "number", + "null" + ] + }, + "severity": { + "enum": [ + "warn", + "fail" + ] + }, + "pass": { + "type": "boolean" + } + } + }, + "scenarioResult": { + "type": "object", + "additionalProperties": false, + "required": [ + "name", + "type", + "load", + "requests", + "throughputPerSec", + "client", + "server", + "errors", + "expectations", + "passed", + "runCount" + ], + "properties": { + "name": { + "type": "string" + }, + "type": { + "enum": [ + "inbox", + "webfinger", + "actor", + "object", + "fanout", + "collection", + "failure", + "mixed" + ] + }, + "load": { + "$ref": "#/$defs/loadSummary" + }, + "requests": { + "$ref": "#/$defs/requestSummary" + }, + "throughputPerSec": { + "type": "number" + }, + "client": { + "$ref": "#/$defs/clientMetrics" + }, + "server": { + "anyOf": [ + { + "$ref": "#/$defs/serverMetrics" + }, + { + "type": "null" + } + ] + }, + "errors": { + "type": "array", + "items": { + "$ref": "#/$defs/errorBucket" + } + }, + "expectations": { + "type": "array", + "items": { + "$ref": "#/$defs/expectResult" + } + }, + "passed": { + "type": "boolean" + }, + "histogram": { + "$ref": "#/$defs/serializedHistogram" + }, + "deliveryThroughputPerSec": { + "type": "number" + }, + "runCount": { + "type": "integer", + "minimum": 1 + }, + "runs": { + "type": "array", + "minItems": 2, + "items": { + "$ref": "#/$defs/scenarioRunResult" + } + } + }, + "allOf": [ + { + "if": { + "required": [ + "runCount" + ], + "properties": { + "runCount": { + "minimum": 2 + } + } + }, + "then": { + "required": [ + "runs" + ] + } + } + ] + }, + "serializedHistogram": { + "type": "object", + "additionalProperties": false, + "required": [ + "version", + "subBucketCount", + "count", + "zeroCount", + "min", + "max", + "sum", + "indices", + "counts" + ], + "properties": { + "version": { + "const": 1 + }, + "subBucketCount": { + "type": "integer", + "minimum": 1 + }, + "count": { + "type": "integer", + "minimum": 0 + }, + "zeroCount": { + "type": "integer", + "minimum": 0 + }, + "min": { + "type": "number" + }, + "max": { + "type": "number" + }, + "sum": { + "type": "number" + }, + "indices": { + "type": "array", + "items": { + "type": "integer" + } + }, + "counts": { + "type": "array", + "items": { + "type": "integer", + "minimum": 0 + } + } + } + }, + "scenarioRunResult": { + "type": "object", + "additionalProperties": false, + "required": [ + "run", + "requests", + "throughputPerSec", + "client", + "server", + "errors" + ], + "properties": { + "run": { + "type": "integer", + "minimum": 1 + }, + "requests": { + "$ref": "#/$defs/requestSummary" + }, + "throughputPerSec": { + "type": "number" + }, + "deliveryThroughputPerSec": { + "type": "number" + }, + "client": { + "$ref": "#/$defs/clientMetrics" + }, + "server": { + "anyOf": [ + { + "$ref": "#/$defs/serverMetrics" + }, + { + "type": "null" + } + ] + }, + "errors": { + "type": "array", + "items": { + "$ref": "#/$defs/errorBucket" + } + }, + "histogram": { + "$ref": "#/$defs/serializedHistogram" + } + } + } + } +} diff --git a/schema/index.html b/schema/index.html index 54ff4cc44..939aefc09 100644 --- a/schema/index.html +++ b/schema/index.html @@ -132,16 +132,43 @@

Fedify JSON Schemas

Benchmarking (fedify bench)

    +
  • + bench/scenario-v2.json +
    + The current benchmark scenario suite format (input). YAML or JSON. +
    +
  • bench/scenario-v1.json
    - The benchmark scenario suite format (input). YAML or JSON. + The previous benchmark scenario suite format (input). YAML or + JSON. +
    +
  • +
  • + bench/report-v3.json +
    + The current benchmark report format (output). The canonical + machine form. +
    +
  • +
  • + bench/report-v2.json +
    + The previous benchmark report format (output).
  • bench/report-v1.json
    - The benchmark report format (output). The canonical machine form. + The original benchmark report format (output). +
    +
  • +
  • + bench/compare-report-v1.json +
    + The benchmark comparison report format generated by + fedify bench compare.
@@ -151,7 +178,7 @@

Editor support

Add a schema reference to your scenario file for autocomplete and validation in editors with the YAML Language Server:

-
# yaml-language-server: $schema=https://json-schema.fedify.dev/bench/scenario-v1.json
+      
# yaml-language-server: $schema=https://json-schema.fedify.dev/bench/scenario-v2.json
 version: 1
 target: http://localhost:3000
 # …