diff --git a/cspell.json b/cspell.json
index a046e6d4..bd69e85a 100644
--- a/cspell.json
+++ b/cspell.json
@@ -12,8 +12,10 @@
 	"words": [
 		// page-cluster clustering/distance terminology
 		"jaccard",
+		"medoid",
 		"medoids",
 		"hrefs",
+		"Murtagh",
 
 		//
 		"gaxios",
diff --git a/packages/@d-zero/page-cluster/src/resolve-structural-cluster-keys.spec.ts b/packages/@d-zero/page-cluster/src/resolve-structural-cluster-keys.spec.ts
new file mode 100644
index 00000000..f8b7c7fc
--- /dev/null
+++ b/packages/@d-zero/page-cluster/src/resolve-structural-cluster-keys.spec.ts
@@ -0,0 +1,282 @@
+import { describe, expect, test } from 'vitest';
+
+import { jaccardSimilarity } from './jaccard-similarity.js';
+import { resolveStructuralClusterKeys } from './resolve-structural-cluster-keys.js';
+
+/**
+ * Naive, obviously-correct reference implementation of threshold-cut
+ * complete-linkage clustering: repeatedly rescans every live cluster pair
+ * and merges the single best (highest minimum-pairwise-similarity) pair,
+ * with no NN-chain bookkeeping. Used only to differentially verify the
+ * production NN-chain implementation, which computes the exact same
+ * clustering faster (O(n²) vs this function's O(n³)) — see
+ * resolve-structural-cluster-keys.ts's JSDoc for why NN-chain is a genuine
+ * speedup, not an approximation. Returns numeric cluster labels rather than
+ * `cluster:N` strings; label *values* are allowed to differ from the
+ * production function's own numbering (traversal order differs between the
+ * two algorithms), only the partition (which pages end up together) must
+ * match, which `samePartition` below checks.
+ * @param tokenSets
+ * @param threshold
+ */
+function bruteForceCompleteLinkage(
+	tokenSets: readonly ReadonlySet<string>[],
+	threshold: number,
+): number[] {
+	let clusters: number[][] = tokenSets.map((_, index) => [index]);
+
+	for (;;) {
+		let bestPair: [number, number] | undefined;
+		let bestScore = Number.NEGATIVE_INFINITY;
+		for (let i = 0; i < clusters.length; i++) {
+			for (let j = i + 1; j < clusters.length; j++) {
+				let minSimilarity = Number.POSITIVE_INFINITY;
+				for (const p of clusters[i] ?? []) {
+					for (const q of clusters[j] ?? []) {
+						minSimilarity = Math.min(
+							minSimilarity,
+							jaccardSimilarity(tokenSets[p] ?? new Set(), tokenSets[q] ?? new Set()),
+						);
+					}
+				}
+				if (minSimilarity > bestScore) {
+					bestScore = minSimilarity;
+					bestPair = [i, j];
+				}
+			}
+		}
+
+		if (!bestPair || bestScore < threshold) {
+			break;
+		}
+
+		const [i, j] = bestPair;
+		clusters[i] = [...(clusters[i] ?? []), ...(clusters[j] ?? [])];
+		clusters = clusters.filter((_, index) => index !== j);
+	}
+
+	const labels = Array.from({ length: tokenSets.length });
+	for (const [clusterIndex, members] of clusters.entries()) {
+		for (const member of members) {
+			labels[member] = clusterIndex;
+		}
+	}
+	return labels;
+}
+
+/**
+ * Whether two label arrays (of any label type) describe the same partition
+ * — i.e. every pair of positions is grouped together in one array if and
+ * only if it is in the other. Deliberately ignores the concrete label
+ * values themselves, since two different (but equally valid) clustering
+ * algorithms/traversal orders may number the same groups differently.
+ * @param a
+ * @param b
+ */
+function samePartition(a: readonly unknown[], b: readonly unknown[]): boolean {
+	if (a.length !== b.length) {
+		return false;
+	}
+	for (let i = 0; i < a.length; i++) {
+		for (let j = i + 1; j < a.length; j++) {
+			if ((a[i] === a[j]) !== (b[i] === b[j])) {
+				return false;
+			}
+		}
+	}
+	return true;
+}
+
+/**
+ * Deterministic PRNG (mulberry32) so the property test is reproducible across CI runs.
+ * @param seed
+ */
+function mulberry32(seed: number): () => number {
+	let state = seed;
+	return () => {
+		// `| 0` intentionally wraps to a signed 32-bit integer (mulberry32's
+		// overflow behavior); `Math.trunc()` alone would not wrap, so the
+		// usual unicorn/prefer-math-trunc autofix would silently change this
+		// PRNG's output sequence.
+		// eslint-disable-next-line unicorn/prefer-math-trunc
+		state = (state + 0x6d_2b_79_f5) | 0;
+		let t = state;
+		t = Math.imul(t ^ (t >>> 15), t | 1);
+		t ^= t + Math.imul(t ^ (t >>> 7), t | 61);
+		return ((t ^ (t >>> 14)) >>> 0) / 4_294_967_296;
+	};
+}
+
+/**
+ * Generates `count` random token sets drawn from a `vocabularySize`-word
+ * vocabulary (each word independently included with 50% probability), for
+ * the differential property test below.
+ * @param seed
+ * @param count
+ * @param vocabularySize
+ */
+function randomTokenSets(
+	seed: number,
+	count: number,
+	vocabularySize: number,
+): Set<string>[] {
+	const random = mulberry32(seed);
+	const vocabulary = Array.from(
+		{ length: vocabularySize },
+		(_, index) => `token-${index}`,
+	);
+	return Array.from({ length: count }, () => {
+		const tokens = vocabulary.filter(() => random() < 0.5);
+		return new Set(tokens);
+	});
+}
+
+describe('resolveStructuralClusterKeys', () => {
+	test('an empty array returns an empty array', () => {
+		expect(resolveStructuralClusterKeys([])).toEqual([]);
+	});
+
+	test('a single token set forms its own cluster', () => {
+		const result = resolveStructuralClusterKeys([new Set(['body>header'])]);
+		expect(result).toEqual(['cluster:0']);
+	});
+
+	test('two identical token sets share a cluster key', () => {
+		const a = new Set(['body>header', 'body>main>.card', 'body>footer']);
+		const b = new Set(['body>header', 'body>main>.card', 'body>footer']);
+		const result = resolveStructuralClusterKeys([a, b]);
+		expect(result[0]).toBe(result[1]);
+	});
+
+	test('a pair at exactly the default threshold (0.8) still merges', () => {
+		// shared = 8 tokens; a/b each add one unique token: intersection = 8,
+		// union = 10, similarity = 8/10 = 0.8 (the >= boundary is inclusive)
+		const shared = Array.from({ length: 8 }, (_, index) => `shared-${index}`);
+		const a = new Set([...shared, 'unique-a']);
+		const b = new Set([...shared, 'unique-b']);
+		const result = resolveStructuralClusterKeys([a, b]);
+		expect(result[0]).toBe(result[1]);
+	});
+
+	test('a pair just below the default threshold does not merge', () => {
+		// shared = 7 tokens; a/b each add two unique tokens: intersection = 7,
+		// union = 11, similarity = 7/11 ≈ 0.636, below the default 0.8
+		const shared = Array.from({ length: 7 }, (_, index) => `shared-${index}`);
+		const a = new Set([...shared, 'a1', 'a2']);
+		const b = new Set([...shared, 'b1', 'b2']);
+		const result = resolveStructuralClusterKeys([a, b]);
+		expect(result[0]).not.toBe(result[1]);
+	});
+
+	test('a threshold assembled from arithmetic (0.1 + 0.2) still merges a pair at the equivalent exact boundary', () => {
+		// 0.1 + 0.2 === 0.30000000000000004, not the mathematically equivalent
+		// 0.3 — regression test for the floating-point boundary bug found by
+		// /code-review xhigh: comparing a pair's exact similarity against this
+		// threshold with no epsilon tolerance would wrongly reject a pair the
+		// caller intended to be at the (inclusive) boundary.
+		// shared = 3, a-only = 4, b-only = 3: intersection = 3, union = 10,
+		// similarity = 3/10 = 0.3 exactly
+		const a = new Set(['s1', 's2', 's3', 'a1', 'a2', 'a3', 'a4']);
+		const b = new Set(['s1', 's2', 's3', 'b1', 'b2', 'b3']);
+		const result = resolveStructuralClusterKeys([a, b], {
+			similarityThreshold: 0.1 + 0.2,
+		});
+		expect(result[0]).toBe(result[1]);
+	});
+
+	test('similarityThreshold: 0 merges every page into a single cluster, however dissimilar', () => {
+		const result = resolveStructuralClusterKeys(
+			[new Set(['a']), new Set(['b']), new Set(['c'])],
+			{ similarityThreshold: 0 },
+		);
+		expect(result[0]).toBe(result[1]);
+		expect(result[1]).toBe(result[2]);
+	});
+
+	test('similarityThreshold: 1 only merges pages with an identical token set', () => {
+		const result = resolveStructuralClusterKeys(
+			[new Set(['a', 'b']), new Set(['a', 'b']), new Set(['a', 'b', 'c'])],
+			{ similarityThreshold: 1 },
+		);
+		expect(result[0]).toBe(result[1]);
+		expect(result[2]).not.toBe(result[0]);
+	});
+
+	test('complete-linkage refuses to chain A into C through a shared bridge B', () => {
+		// similarity(A,B) = |{a,b}| / |{a,b,c}| = 2/3 ≈ 0.667
+		// similarity(B,C) = |{a,c}| / |{a,b,c,d}| = 2/4 = 0.5
+		// similarity(A,C) = |{a}| / |{a,b,c,d}| = 1/4 = 0.25
+		// With threshold 0.5: A-B and B-C both clear it, but A-C does not.
+		// Single-linkage/connected-components would merge all three via B;
+		// complete-linkage must not, because {A,B,C} would require every pair
+		// (including A-C) to clear the threshold.
+		const a = new Set(['a', 'b']);
+		const b = new Set(['a', 'b', 'c']);
+		const c = new Set(['a', 'c', 'd']);
+		const result = resolveStructuralClusterKeys([a, b, c], { similarityThreshold: 0.5 });
+
+		expect(result[0]).toBe(result[1]);
+		expect(result[2]).not.toBe(result[0]);
+	});
+
+	test('matches the exact output documented in the JSDoc @example', () => {
+		// Kept in sync with resolveStructuralClusterKeys's @example: if this
+		// ever fails, the JSDoc example is out of date and must be corrected
+		// alongside the implementation, not the other way around.
+		const result = resolveStructuralClusterKeys([
+			new Set(['body>header', 'body>main>.card', 'body>footer']),
+			new Set(['body>header', 'body>main>.card', 'body>footer']),
+			new Set(['body>nav', 'body>main>form']),
+		]);
+		expect(result).toEqual(['cluster:0', 'cluster:0', 'cluster:1']);
+	});
+
+	test('three mutually dissimilar token sets each form their own cluster', () => {
+		const result = resolveStructuralClusterKeys([
+			new Set(['body>header']),
+			new Set(['body>nav', 'body>main>form']),
+			new Set(['body>aside', 'body>footer', 'body>footer>small']),
+		]);
+		expect(new Set(result).size).toBe(3);
+	});
+
+	test.each([-0.1, 1.1, Number.NaN])(
+		'rejects a similarityThreshold outside [0, 1] (%s)',
+		(similarityThreshold) => {
+			expect(() => resolveStructuralClusterKeys([], { similarityThreshold })).toThrow(
+				RangeError,
+			);
+		},
+	);
+
+	test.each([0, 1])(
+		'accepts the boundary similarityThreshold values (%s)',
+		(similarityThreshold) => {
+			expect(() =>
+				resolveStructuralClusterKeys([], { similarityThreshold }),
+			).not.toThrow();
+		},
+	);
+
+	const propertyTestCases = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].flatMap((seed) =>
+		[4, 6, 10].flatMap((vocabularySize) =>
+			[0.3, 0.5, 0.8].map(
+				(threshold) => [seed, vocabularySize, threshold] as [number, number, number],
+			),
+		),
+	);
+
+	test.each(propertyTestCases)(
+		'matches a naive brute-force complete-linkage reference on random inputs (seed %s, vocabulary %s, threshold %s)',
+		(seed, vocabularySize, threshold) => {
+			const tokenSets = randomTokenSets(seed, 8, vocabularySize);
+
+			const actual = resolveStructuralClusterKeys(tokenSets, {
+				similarityThreshold: threshold,
+			});
+			const expected = bruteForceCompleteLinkage(tokenSets, threshold);
+
+			expect(samePartition(actual, expected)).toBe(true);
+		},
+	);
+});
diff --git a/packages/@d-zero/page-cluster/src/resolve-structural-cluster-keys.ts b/packages/@d-zero/page-cluster/src/resolve-structural-cluster-keys.ts
new file mode 100644
index 00000000..a537b3ed
--- /dev/null
+++ b/packages/@d-zero/page-cluster/src/resolve-structural-cluster-keys.ts
@@ -0,0 +1,263 @@
+import { jaccardSimilarity } from './jaccard-similarity.js';
+
+/**
+ * @see resolveStructuralClusterKeys
+ */
+export type ResolveStructuralClusterKeysOptions = {
+	/**
+	 * Minimum `jaccardSimilarity()` score required between *every* pair of
+	 * pages within a cluster (complete-linkage criterion) for those pages to
+	 * be grouped together. Must be a number in `[0, 1]` (`RangeError`
+	 * otherwise). 0.8 is a starting-point heuristic, not validated against
+	 * real corpora — tune per site once real cluster boundaries are
+	 * inspected.
+	 */
+	similarityThreshold?: number;
+};
+
+const DEFAULT_SIMILARITY_THRESHOLD = 0.8;
+
+/**
+ * `jaccardSimilarity()` returns `intersectionSize / unionSize`, a
+ * floating-point division that can land a hair below the caller's intended
+ * threshold even when the two are mathematically equal (e.g. a threshold
+ * assembled from arithmetic like `0.1 + 0.2` is `0.30000000000000004`, not
+ * `0.3`), which would otherwise make a pair at the documented inclusive
+ * boundary fail the `>=` check it should pass. Subtracting this epsilon
+ * before comparing absorbs that rounding noise (same technique and value as
+ * `BOUNDARY_EPSILON` in `split-tokens-by-frequency.ts`).
+ */
+const BOUNDARY_EPSILON = 1e-9;
+
+/**
+ * Reads `values[index]`, throwing instead of returning `undefined`. Every
+ * call site here indexes within bounds it just established itself (loop
+ * ranges, or an index freshly returned by the same array's own scan), so the
+ * thrown branch is unreachable in practice; it exists to satisfy
+ * `noUncheckedIndexedAccess` without a non-null assertion (same rationale as
+ * `readDpValue` in `array-edit-distance.ts`, generalized to any array-like).
+ * @param values
+ * @param index
+ */
+function requireIndex<T>(values: ArrayLike<T>, index: number): T {
+	const value = values[index];
+	if (value === undefined) {
+		throw new Error('resolveStructuralClusterKeys: index out of bounds');
+	}
+	return value;
+}
+
+/**
+ * Finds the representative (root) of `index`'s set, compressing every
+ * traversed link so future lookups on the same path are near-constant time.
+ * @param parent
+ * @param index
+ */
+function find(parent: Int32Array, index: number): number {
+	let root = index;
+	while (requireIndex(parent, root) !== root) {
+		root = requireIndex(parent, root);
+	}
+	let current = index;
+	while (current !== root) {
+		const next = requireIndex(parent, current);
+		parent[current] = root;
+		current = next;
+	}
+	return root;
+}
+
+/**
+ * Complete-linkage hierarchical clustering of `tokenSets`, cut at
+ * `threshold`, computed via the NN-chain algorithm (Murtagh, F., 1983, "A
+ * Survey of Recent Advances in Hierarchical Clustering Algorithms," The
+ * Computer Journal 26(4)). NN-chain produces the exact same dendrogram as
+ * naively re-scanning every live cluster pair for the best merge at each
+ * step, but in O(n²) time instead of O(n³): each cluster follows a chain of
+ * mutually-improving nearest neighbors until it lands on a pair that are
+ * each other's nearest neighbor (a "reciprocal nearest neighbor", RNN); that
+ * pair's merge is provably a valid next step in the correct dendrogram. This
+ * is a genuine algorithmic speedup, not an approximation — see
+ * `resolveStructuralClusterKeys`'s JSDoc for why an approximation was
+ * rejected.
+ *
+ * Complete-linkage was chosen over single-linkage (connected components of
+ * the threshold graph) because single-linkage's "chaining" lets one
+ * unrepresentative page transitively merge two otherwise-unrelated
+ * templates — the opposite of what template detection needs. Complete-
+ * linkage requires *every* pair across two clusters to clear the threshold
+ * before merging them, which rules that out. Cluster-to-cluster similarity
+ * is maintained via the Lance-Williams update for complete-linkage:
+ * `similarity(merged, Z) = min(similarity(X, Z), similarity(Y, Z))`.
+ *
+ * The algorithm always runs every one of the `size - 1` possible merges to
+ * completion (down to a single root), never stopping early at `threshold`.
+ * This looks wasteful but isn't optional: Lance-Williams monotonicity
+ * (Lance, G. N. & Williams, W. T., 1967, "A General Theory of Classificatory
+ * Sorting Strategies," The Computer Journal 9(4)) guarantees no height
+ * inversions inside the dendrogram itself (a merge's similarity is always ≥
+ * the similarity of every merge nested inside it), but says nothing about
+ * the chronological order in which independent, not-yet-connected
+ * chains happen to resolve their own RNN pairs — one chain can easily
+ * stumble onto a low-similarity RNN pair before a different, still-unvisited
+ * chain uncovers a high-similarity one elsewhere. Stopping the whole
+ * algorithm at the first below-threshold merge would therefore discard
+ * later, still-valid above-threshold merges (confirmed by this file's
+ * differential test against a naive reference — an earlier version of this
+ * function that broke early on the first below-threshold RNN pair failed it
+ * for exactly this reason). Instead, every merge is always folded into the
+ * `active`/`similarity` bookkeeping so the algorithm can keep discovering
+ * the rest of the true dendrogram, but only merges scoring `>= threshold`
+ * are recorded in `parent` (the union-find used for final membership).
+ * Monotonicity guarantees this is safe: any merge scoring `>= threshold` was
+ * necessarily built out of children merges that scored at least as high, so
+ * restricting the union-find to threshold-clearing merges — regardless of
+ * the chronological order they were discovered in — reconstructs exactly
+ * the correct threshold cut.
+ * @param tokenSets
+ * @param threshold
+ */
+function clusterByCompleteLinkage(
+	tokenSets: readonly ReadonlySet<string>[],
+	threshold: number,
+): number[] {
+	const size = tokenSets.length;
+	const parent = Int32Array.from({ length: size }, (_, index) => index);
+
+	const similarity = new Float64Array(size * size);
+	for (let i = 0; i < size; i++) {
+		for (let j = i + 1; j < size; j++) {
+			const score = jaccardSimilarity(
+				requireIndex(tokenSets, i),
+				requireIndex(tokenSets, j),
+			);
+			similarity[i * size + j] = score;
+			similarity[j * size + i] = score;
+		}
+	}
+
+	const active = new Uint8Array(size).fill(1);
+	const chain: number[] = [];
+
+	const findFreshStart = (): number => {
+		for (let index = 0; index < size; index++) {
+			if (requireIndex(active, index) === 1) {
+				return index;
+			}
+		}
+		throw new Error(
+			'resolveStructuralClusterKeys: no active cluster left to resume from',
+		);
+	};
+
+	let activeCount = size;
+	while (activeCount > 1) {
+		if (chain.length === 0) {
+			chain.push(findFreshStart());
+		}
+
+		const top = requireIndex(chain, chain.length - 1);
+		let best = -1;
+		let bestScore = Number.NEGATIVE_INFINITY;
+		for (let candidate = 0; candidate < size; candidate++) {
+			if (candidate !== top && requireIndex(active, candidate) === 1) {
+				const score = requireIndex(similarity, top * size + candidate);
+				if (score > bestScore) {
+					bestScore = score;
+					best = candidate;
+				}
+			}
+		}
+
+		const secondFromTop = chain.length >= 2 ? chain.at(-2) : undefined;
+		if (best === secondFromTop) {
+			chain.pop();
+			chain.pop();
+
+			const survivor = Math.min(top, best);
+			const dead = Math.max(top, best);
+			for (let candidate = 0; candidate < size; candidate++) {
+				if (
+					candidate !== top &&
+					candidate !== best &&
+					requireIndex(active, candidate) === 1
+				) {
+					const merged = Math.min(
+						requireIndex(similarity, top * size + candidate),
+						requireIndex(similarity, best * size + candidate),
+					);
+					similarity[survivor * size + candidate] = merged;
+					similarity[candidate * size + survivor] = merged;
+				}
+			}
+
+			active[dead] = 0;
+			if (bestScore >= threshold - BOUNDARY_EPSILON) {
+				parent[find(parent, dead)] = find(parent, survivor);
+			}
+			activeCount--;
+		} else {
+			chain.push(best);
+		}
+	}
+
+	return Array.from({ length: size }, (_, index) => find(parent, index));
+}
+
+/**
+ * Resolves, within a single already-blocked group of pages (e.g. one key
+ * from {@link ./resolve-blocking-group-keys.js | resolveBlockingGroupKeys}),
+ * which pages share a structural template. Returns one cluster key per
+ * page, in the same order as `tokenSets`. Does not call
+ * {@link ./tokenize.js | tokenize} itself (callers pass pages already
+ * tokenized and turned into `Set`s, mirroring
+ * {@link ./compute-document-frequency.js | computeDocumentFrequency}'s
+ * contract) and does not orchestrate multiple blocks — a heterogeneous
+ * corpus should be split into blocks by the caller before reaching this
+ * function.
+ *
+ * MinHash/LSH-based approximation and medoid-based refinement of these
+ * clusters are intentionally out of scope: NN-chain already computes the
+ * exact complete-linkage clustering in O(n²), so there is no accuracy being
+ * traded away by not approximating, and no evidence yet that O(n²) is a
+ * real bottleneck at the block sizes this function actually sees.
+ * @param tokenSets
+ * @param options
+ * @example
+ * ```ts
+ * resolveStructuralClusterKeys([
+ * 	new Set(['body>header', 'body>main>.card', 'body>footer']),
+ * 	new Set(['body>header', 'body>main>.card', 'body>footer']),
+ * 	new Set(['body>nav', 'body>main>form']),
+ * ]);
+ * // ['cluster:0', 'cluster:0', 'cluster:1']
+ * ```
+ */
+export function resolveStructuralClusterKeys(
+	tokenSets: readonly ReadonlySet<string>[],
+	options?: ResolveStructuralClusterKeysOptions,
+): string[] {
+	const similarityThreshold =
+		options?.similarityThreshold ?? DEFAULT_SIMILARITY_THRESHOLD;
+	if (!(similarityThreshold >= 0 && similarityThreshold <= 1)) {
+		throw new RangeError(
+			`resolveStructuralClusterKeys: similarityThreshold must be between 0 and 1, got ${similarityThreshold}`,
+		);
+	}
+
+	if (tokenSets.length === 0) {
+		return [];
+	}
+
+	const roots = clusterByCompleteLinkage(tokenSets, similarityThreshold);
+
+	const rootToLabel = new Map<number, string>();
+	return roots.map((root) => {
+		let label = rootToLabel.get(root);
+		if (label === undefined) {
+			label = `cluster:${rootToLabel.size}`;
+			rootToLabel.set(root, label);
+		}
+		return label;
+	});
+}