diff --git a/src/domain/graph/builder/helpers.ts b/src/domain/graph/builder/helpers.ts
index c6cbd4845..4b3665a5d 100644
--- a/src/domain/graph/builder/helpers.ts
+++ b/src/domain/graph/builder/helpers.ts
@@ -76,108 +76,117 @@ export function passesIncludeExclude(
   return true;
 }
 
+/** Per-walk state computed once at the top-level invocation. */
+interface CollectContext {
+  readonly rootDir: string;
+  readonly includeRegexes: readonly RegExp[];
+  readonly excludeRegexes: readonly RegExp[];
+  readonly hasGlobFilters: boolean;
+  readonly extraIgnore: Set<string> | null;
+  readonly visited: Set<string>;
+}
+
+/** Detect a symlink loop for `dir`. Returns true if `dir` was already visited. */
+function isSymlinkLoop(dir: string, visited: Set<string>): boolean {
+  let realDir: string;
+  try {
+    realDir = fs.realpathSync(dir);
+  } catch {
+    return true;
+  }
+  if (visited.has(realDir)) {
+    warn(`Symlink loop detected, skipping: ${dir}`);
+    return true;
+  }
+  visited.add(realDir);
+  return false;
+}
+
+/** Read directory entries, returning null on error (already logged). */
+function readDirSafe(dir: string): fs.Dirent[] | null {
+  try {
+    return fs.readdirSync(dir, { withFileTypes: true });
+  } catch (err: unknown) {
+    warn(`Cannot read directory ${dir}: ${(err as Error).message}`);
+    return null;
+  }
+}
+
+/** True if `entry` is a source file we should collect under `ctx`. */
+function isCollectableSourceFile(full: string, entry: fs.Dirent, ctx: CollectContext): boolean {
+  if (!EXTENSIONS.has(path.extname(entry.name))) return false;
+  if (!ctx.hasGlobFilters) return true;
+  const rel = normalizePath(path.relative(ctx.rootDir, full));
+  return passesIncludeExclude(rel, ctx.includeRegexes, ctx.excludeRegexes);
+}
+
+function walkCollect(
+  dir: string,
+  files: string[],
+  directories: Set<string> | null,
+  ctx: CollectContext,
+): void {
+  if (isSymlinkLoop(dir, ctx.visited)) return;
+
+  const entries = readDirSafe(dir);
+  if (!entries) return;
+
+  let hasFiles = false;
+  for (const entry of entries) {
+    if (shouldSkipEntry(entry, ctx.extraIgnore)) continue;
+
+    const full = path.join(dir, entry.name);
+    if (entry.isDirectory()) {
+      walkCollect(full, files, directories, ctx);
+    } else if (isCollectableSourceFile(full, entry, ctx)) {
+      files.push(full);
+      hasFiles = true;
+    }
+  }
+  if (directories && hasFiles) {
+    directories.add(dir);
+  }
+}
+
 /**
  * Recursively collect all source files under `dir`.
  * When `directories` is a Set, also tracks which directories contain files.
  *
- * The first invocation establishes `dir` as the project root against which
- * `config.include` / `config.exclude` globs are matched.
+ * `dir` establishes the project root against which `config.include` /
+ * `config.exclude` globs are matched.
  */
 export function collectFiles(
   dir: string,
   files: string[],
   config: Partial<CodegraphConfig>,
   directories: Set<string>,
-  _visited?: Set<string>,
-  _rootDir?: string,
-  _includeRegexes?: readonly RegExp[],
-  _excludeRegexes?: readonly RegExp[],
 ): { files: string[]; directories: Set<string> };
 export function collectFiles(
   dir: string,
   files?: string[],
   config?: Partial<CodegraphConfig>,
   directories?: null,
-  _visited?: Set<string>,
-  _rootDir?: string,
-  _includeRegexes?: readonly RegExp[],
-  _excludeRegexes?: readonly RegExp[],
 ): string[];
 export function collectFiles(
   dir: string,
   files: string[] = [],
   config: Partial<CodegraphConfig> = {},
   directories: Set<string> | null = null,
-  _visited: Set<string> = new Set(),
-  _rootDir?: string,
-  _includeRegexes?: readonly RegExp[],
-  _excludeRegexes?: readonly RegExp[],
 ): string[] | { files: string[]; directories: Set<string> } {
   const trackDirs = directories instanceof Set;
-  let hasFiles = false;
-
-  // First call: compute root and compile include/exclude patterns once,
-  // then pass them down recursive calls so we don't recompile per directory.
-  const rootDir = _rootDir ?? dir;
-  const includeRegexes = _includeRegexes ?? compileGlobs(config.include);
-  const excludeRegexes = _excludeRegexes ?? compileGlobs(config.exclude);
-  const hasGlobFilters = includeRegexes.length > 0 || excludeRegexes.length > 0;
-
-  // Merge config ignoreDirs with defaults
-  const extraIgnore = config.ignoreDirs ? new Set(config.ignoreDirs) : null;
-
-  // Detect symlink loops (before I/O to avoid wasted readdirSync)
-  let realDir: string;
-  try {
-    realDir = fs.realpathSync(dir);
-  } catch {
-    return trackDirs ? { files, directories: directories as Set<string> } : files;
-  }
-  if (_visited.has(realDir)) {
-    warn(`Symlink loop detected, skipping: ${dir}`);
-    return trackDirs ? { files, directories: directories as Set<string> } : files;
-  }
-  _visited.add(realDir);
-
-  let entries: fs.Dirent[];
-  try {
-    entries = fs.readdirSync(dir, { withFileTypes: true });
-  } catch (err: unknown) {
-    warn(`Cannot read directory ${dir}: ${(err as Error).message}`);
-    return trackDirs ? { files, directories: directories as Set<string> } : files;
-  }
+  const includeRegexes = compileGlobs(config.include);
+  const excludeRegexes = compileGlobs(config.exclude);
+  const ctx: CollectContext = {
+    rootDir: dir,
+    includeRegexes,
+    excludeRegexes,
+    hasGlobFilters: includeRegexes.length > 0 || excludeRegexes.length > 0,
+    extraIgnore: config.ignoreDirs ? new Set(config.ignoreDirs) : null,
+    visited: new Set(),
+  };
 
-  for (const entry of entries) {
-    if (shouldSkipEntry(entry, extraIgnore)) continue;
+  walkCollect(dir, files, trackDirs ? (directories as Set<string>) : null, ctx);
 
-    const full = path.join(dir, entry.name);
-    if (entry.isDirectory()) {
-      if (trackDirs) {
-        collectFiles(
-          full,
-          files,
-          config,
-          directories as Set<string>,
-          _visited,
-          rootDir,
-          includeRegexes,
-          excludeRegexes,
-        );
-      } else {
-        collectFiles(full, files, config, null, _visited, rootDir, includeRegexes, excludeRegexes);
-      }
-    } else if (EXTENSIONS.has(path.extname(entry.name))) {
-      if (hasGlobFilters) {
-        const rel = normalizePath(path.relative(rootDir, full));
-        if (!passesIncludeExclude(rel, includeRegexes, excludeRegexes)) continue;
-      }
-      files.push(full);
-      hasFiles = true;
-    }
-  }
-  if (trackDirs && hasFiles) {
-    (directories as Set<string>).add(dir);
-  }
   return trackDirs ? { files, directories: directories as Set<string> } : files;
 }
 
diff --git a/src/domain/graph/builder/incremental.ts b/src/domain/graph/builder/incremental.ts
index 66853983e..d7aa488ed 100644
--- a/src/domain/graph/builder/incremental.ts
+++ b/src/domain/graph/builder/incremental.ts
@@ -307,6 +307,63 @@ function resolveBarrelImportEdges(
   return edgesAdded;
 }
 
+/** Emit symbol-level `imports-type` edges for a single `import type` statement. */
+function emitTypeOnlySymbolEdges(
+  db: BetterSqlite3Database | null,
+  stmts: IncrementalStmts,
+  imp: ExtractorOutput['imports'][number],
+  resolvedPath: string,
+  fileNodeId: number,
+): number {
+  let edgesAdded = 0;
+  for (const name of imp.names) {
+    const cleanName = name.replace(/^\*\s+as\s+/, '');
+    let targetFile = resolvedPath;
+    if (db && isBarrelFile(db, resolvedPath)) {
+      const actual = resolveBarrelTarget(db, resolvedPath, cleanName);
+      if (actual) targetFile = actual;
+    }
+    const candidates = stmts.findNodeInFile.all(cleanName, targetFile) as Array<{
+      id: number;
+      file: string;
+    }>;
+    if (candidates.length === 0) continue;
+    stmts.insertEdge.run(fileNodeId, candidates[0]!.id, 'imports-type', 1.0, 0);
+    edgesAdded++;
+  }
+  return edgesAdded;
+}
+
+/**
+ * Process a single import statement: emit the file→file edge, any
+ * symbol-level type-only edges, and barrel re-export edges.
+ */
+function emitEdgesForImport(
+  stmts: IncrementalStmts,
+  imp: ExtractorOutput['imports'][number],
+  fileNodeId: number,
+  relPath: string,
+  rootDir: string,
+  aliases: PathAliases,
+  db: BetterSqlite3Database | null,
+): number {
+  const resolvedPath = resolveImportPath(path.join(rootDir, relPath), imp.source, rootDir, aliases);
+  const targetRow = stmts.getNodeId.get(resolvedPath, 'file', resolvedPath, 0);
+  if (!targetRow) return 0;
+
+  const edgeKind = imp.reexport ? 'reexports' : imp.typeOnly ? 'imports-type' : 'imports';
+  stmts.insertEdge.run(fileNodeId, targetRow.id, edgeKind, 1.0, 0);
+  let edgesAdded = 1;
+
+  if (imp.typeOnly) {
+    edgesAdded += emitTypeOnlySymbolEdges(db, stmts, imp, resolvedPath, fileNodeId);
+  }
+  if (!imp.reexport && db) {
+    edgesAdded += resolveBarrelImportEdges(db, stmts, fileNodeId, resolvedPath, imp);
+  }
+  return edgesAdded;
+}
+
 function buildImportEdges(
   stmts: IncrementalStmts,
   relPath: string,
@@ -318,44 +375,7 @@ function buildImportEdges(
 ): number {
   let edgesAdded = 0;
   for (const imp of symbols.imports) {
-    const resolvedPath = resolveImportPath(
-      path.join(rootDir, relPath),
-      imp.source,
-      rootDir,
-      aliases,
-    );
-    const targetRow = stmts.getNodeId.get(resolvedPath, 'file', resolvedPath, 0);
-    if (targetRow) {
-      const edgeKind = imp.reexport ? 'reexports' : imp.typeOnly ? 'imports-type' : 'imports';
-      stmts.insertEdge.run(fileNodeId, targetRow.id, edgeKind, 1.0, 0);
-      edgesAdded++;
-
-      // Type-only imports: create symbol-level edges so the target symbols
-      // get fan-in credit and aren't falsely classified as dead code.
-      if (imp.typeOnly) {
-        for (const name of imp.names) {
-          const cleanName = name.replace(/^\*\s+as\s+/, '');
-          let targetFile = resolvedPath;
-          if (db && isBarrelFile(db, resolvedPath)) {
-            const actual = resolveBarrelTarget(db, resolvedPath, cleanName);
-            if (actual) targetFile = actual;
-          }
-          const candidates = stmts.findNodeInFile.all(cleanName, targetFile) as Array<{
-            id: number;
-            file: string;
-          }>;
-          if (candidates.length > 0) {
-            stmts.insertEdge.run(fileNodeId, candidates[0]!.id, 'imports-type', 1.0, 0);
-            edgesAdded++;
-          }
-        }
-      }
-
-      // Barrel resolution: create edges through re-export chains
-      if (!imp.reexport && db) {
-        edgesAdded += resolveBarrelImportEdges(db, stmts, fileNodeId, resolvedPath, imp);
-      }
-    }
+    edgesAdded += emitEdgesForImport(stmts, imp, fileNodeId, relPath, rootDir, aliases, db);
   }
   return edgesAdded;
 }
@@ -491,6 +511,122 @@ function buildCallEdges(
 
 // ── Main entry point ────────────────────────────────────────────────────
 
+/** Build the "this file was deleted" result returned by `rebuildFile`. */
+function buildDeletionResult(
+  relPath: string,
+  oldNodes: number,
+  oldSymbols: unknown[],
+  diffSymbols: ((old: unknown[], new_: unknown[]) => unknown) | undefined,
+): RebuildResult {
+  const symbolDiff = diffSymbols ? diffSymbols(oldSymbols, []) : null;
+  return {
+    file: relPath,
+    nodesAdded: 0,
+    nodesRemoved: oldNodes,
+    edgesAdded: 0,
+    deleted: true,
+    event: 'deleted',
+    symbolDiff,
+    nodesBefore: oldNodes,
+    nodesAfter: 0,
+  };
+}
+
+/** Rebuild all edges originating in the single (just-parsed) target file. */
+function rebuildEdgesForTargetFile(
+  db: BetterSqlite3Database,
+  stmts: IncrementalStmts,
+  relPath: string,
+  symbols: ExtractorOutput,
+  fileNodeRow: { id: number },
+  rootDir: string,
+): number {
+  const aliases: PathAliases = { baseUrl: null, paths: {} };
+  let edgesAdded = buildContainmentEdges(db, stmts, relPath, symbols);
+  edgesAdded += rebuildDirContainment(db, stmts, relPath);
+  edgesAdded += buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeRow.id, aliases, db);
+  const importedNames = buildImportedNamesMap(symbols, rootDir, relPath, aliases);
+  edgesAdded += buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames);
+  return edgesAdded;
+}
+
+/**
+ * Re-parse the reverse-deps and delete their outgoing edges so the cascade
+ * can rebuild them.
+ */
+async function parseReverseDeps(
+  db: BetterSqlite3Database,
+  rootDir: string,
+  reverseDeps: string[],
+  engineOpts: EngineOpts,
+  cache: unknown,
+): Promise<Map<string, ExtractorOutput>> {
+  const depSymbols = new Map<string, ExtractorOutput>();
+  for (const depRelPath of reverseDeps) {
+    const symbols_ = await parseReverseDep(rootDir, depRelPath, engineOpts, cache);
+    if (symbols_) {
+      deleteOutgoingEdges(db, depRelPath);
+      depSymbols.set(depRelPath, symbols_);
+    }
+  }
+  return depSymbols;
+}
+
+/**
+ * Pass 2 of the reverse-dep cascade: now that the changed file's `reexports`
+ * edges exist, resolve barrel imports for every reverse-dep so transitive
+ * call edges through the barrel still find their targets.
+ */
+function emitBarrelImportEdgesForReverseDeps(
+  db: BetterSqlite3Database,
+  stmts: IncrementalStmts,
+  depSymbols: Map<string, ExtractorOutput>,
+  rootDir: string,
+): number {
+  let edgesAdded = 0;
+  for (const [depRelPath, symbols_] of depSymbols) {
+    const fileNodeRow_ = stmts.getNodeId.get(depRelPath, 'file', depRelPath, 0);
+    if (!fileNodeRow_) continue;
+    const aliases_: PathAliases = { baseUrl: null, paths: {} };
+    for (const imp of symbols_.imports) {
+      if (imp.reexport) continue;
+      const resolvedPath = resolveImportPath(
+        path.join(rootDir, depRelPath),
+        imp.source,
+        rootDir,
+        aliases_,
+      );
+      edgesAdded += resolveBarrelImportEdges(db, stmts, fileNodeRow_.id, resolvedPath, imp);
+    }
+  }
+  return edgesAdded;
+}
+
+/**
+ * Two-pass reverse-dep cascade:
+ *   1. Rebuild direct edges (creating `reexports` edges for barrels).
+ *   2. Add barrel import edges (which need `reexports` edges to exist).
+ */
+async function runReverseDepCascade(
+  db: BetterSqlite3Database,
+  rootDir: string,
+  reverseDeps: string[],
+  stmts: IncrementalStmts,
+  engineOpts: EngineOpts,
+  cache: unknown,
+): Promise<number> {
+  const depSymbols = await parseReverseDeps(db, rootDir, reverseDeps, engineOpts, cache);
+
+  let edgesAdded = 0;
+  // Pass 1: direct edges only (no barrel resolution) — creates reexports edges
+  for (const [depRelPath, symbols_] of depSymbols) {
+    edgesAdded += rebuildReverseDepEdges(db, rootDir, depRelPath, symbols_, stmts, true);
+  }
+  // Pass 2: add barrel import edges (reexports edges now exist)
+  edgesAdded += emitBarrelImportEdgesForReverseDeps(db, stmts, depSymbols, rootDir);
+  return edgesAdded;
+}
+
 /**
  * Parse a single file and update the database incrementally.
  */
@@ -519,18 +655,7 @@ export async function rebuildFile(
 
   if (!fs.existsSync(filePath)) {
     if (cache) (cache as { remove(p: string): void }).remove(filePath);
-    const symbolDiff = diffSymbols ? diffSymbols(oldSymbols, []) : null;
-    return {
-      file: relPath,
-      nodesAdded: 0,
-      nodesRemoved: oldNodes,
-      edgesAdded: 0,
-      deleted: true,
-      event: 'deleted',
-      symbolDiff,
-      nodesBefore: oldNodes,
-      nodesAfter: 0,
-    };
+    return buildDeletionResult(relPath, oldNodes, oldSymbols, diffSymbols);
   }
 
   let code: string;
@@ -553,45 +678,8 @@ export async function rebuildFile(
   if (!fileNodeRow)
     return { file: relPath, nodesAdded: newNodes, nodesRemoved: oldNodes, edgesAdded: 0 };
 
-  const aliases: PathAliases = { baseUrl: null, paths: {} };
-
-  let edgesAdded = buildContainmentEdges(db, stmts, relPath, symbols);
-  edgesAdded += rebuildDirContainment(db, stmts, relPath);
-  edgesAdded += buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeRow.id, aliases, db);
-  const importedNames = buildImportedNamesMap(symbols, rootDir, relPath, aliases);
-  edgesAdded += buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames);
-
-  // Cascade: rebuild outgoing edges for reverse-dep files.
-  // Two-pass approach: first rebuild direct edges (creating reexports edges for barrels),
-  // then add barrel import edges (which need reexports edges to exist for resolution).
-  const depSymbols = new Map<string, ExtractorOutput>();
-  for (const depRelPath of reverseDeps) {
-    const symbols_ = await parseReverseDep(rootDir, depRelPath, engineOpts, cache);
-    if (symbols_) {
-      deleteOutgoingEdges(db, depRelPath);
-      depSymbols.set(depRelPath, symbols_);
-    }
-  }
-  // Pass 1: direct edges only (no barrel resolution) — creates reexports edges
-  for (const [depRelPath, symbols_] of depSymbols) {
-    edgesAdded += rebuildReverseDepEdges(db, rootDir, depRelPath, symbols_, stmts, true);
-  }
-  // Pass 2: add barrel import edges (reexports edges now exist)
-  for (const [depRelPath, symbols_] of depSymbols) {
-    const fileNodeRow_ = stmts.getNodeId.get(depRelPath, 'file', depRelPath, 0);
-    if (!fileNodeRow_) continue;
-    const aliases_: PathAliases = { baseUrl: null, paths: {} };
-    for (const imp of symbols_.imports) {
-      if (imp.reexport) continue;
-      const resolvedPath = resolveImportPath(
-        path.join(rootDir, depRelPath),
-        imp.source,
-        rootDir,
-        aliases_,
-      );
-      edgesAdded += resolveBarrelImportEdges(db, stmts, fileNodeRow_.id, resolvedPath, imp);
-    }
-  }
+  let edgesAdded = rebuildEdgesForTargetFile(db, stmts, relPath, symbols, fileNodeRow, rootDir);
+  edgesAdded += await runReverseDepCascade(db, rootDir, reverseDeps, stmts, engineOpts, cache);
 
   const symbolDiff = diffSymbols ? diffSymbols(oldSymbols, newSymbols) : null;
   const event = oldNodes === 0 ? 'added' : 'modified';
diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts
index b18d3c473..ff4ee5e5d 100644
--- a/src/domain/graph/builder/pipeline.ts
+++ b/src/domain/graph/builder/pipeline.ts
@@ -8,52 +8,24 @@ import fs from 'node:fs';
 import path from 'node:path';
 import { performance } from 'node:perf_hooks';
 import {
-  acquireAdvisoryLock,
   closeDb,
   closeDbPair,
   getBuildMeta,
   initSchema,
   MIGRATIONS,
   openDb,
-  purgeFilesData,
-  releaseAdvisoryLock,
-  setBuildMeta,
 } from '../../../db/index.js';
 import { detectWorkspaces, loadConfig } from '../../../infrastructure/config.js';
 import { debug, info, warn } from '../../../infrastructure/logger.js';
 import { loadNative } from '../../../infrastructure/native.js';
-import { semverCompare } from '../../../infrastructure/update-check.js';
-import { normalizePath } from '../../../shared/constants.js';
 import { toErrorMessage } from '../../../shared/errors.js';
 import { CODEGRAPH_VERSION } from '../../../shared/version.js';
-import type {
-  BetterSqlite3Database,
-  BuildGraphOpts,
-  BuildResult,
-  Definition,
-  ExtractorOutput,
-  SqliteStatement,
-} from '../../../types.js';
-import {
-  classifyNativeDrops,
-  formatDropExtensionSummary,
-  getActiveEngine,
-  getInstalledWasmExtensions,
-  NATIVE_SUPPORTED_EXTENSIONS,
-  parseFilesWasmForBackfill,
-} from '../../parser.js';
+import type { BuildGraphOpts, BuildResult } from '../../../types.js';
+import { getActiveEngine } from '../../parser.js';
 import { writeJournalHeader } from '../journal.js';
 import { setWorkspaces } from '../resolve.js';
 import { PipelineContext } from './context.js';
-import {
-  batchInsertNodes,
-  collectFiles as collectFilesUtil,
-  fileHash,
-  fileStat,
-  loadPathAliases,
-  readFileSafe,
-} from './helpers.js';
-import { NativeDbProxy } from './native-db-proxy.js';
+import { loadPathAliases } from './helpers.js';
 import { buildEdges } from './stages/build-edges.js';
 import { buildStructure } from './stages/build-structure.js';
 // Pipeline stages
@@ -61,10 +33,24 @@ import { collectFiles } from './stages/collect-files.js';
 import { detectChanges, detectNoChanges } from './stages/detect-changes.js';
 import { finalize } from './stages/finalize.js';
 import { insertNodes } from './stages/insert-nodes.js';
+import {
+  closeNativeDb,
+  refreshJsDb,
+  reopenNativeDb,
+  suspendNativeDb,
+} from './stages/native-db-lifecycle.js';
+import { tryNativeOrchestrator } from './stages/native-orchestrator.js';
 import { parseFiles } from './stages/parse-files.js';
 import { resolveImports } from './stages/resolve-imports.js';
 import { runAnalyses } from './stages/run-analyses.js';
 
+// Re-export computeWasmOnlyStaleFiles for backward compatibility with tests
+// that import from this module path (#1073 unit tests).
+export {
+  computeWasmOnlyStaleFiles,
+  type WasmOnlyStaleFilesInput,
+} from './stages/native-orchestrator.js';
+
 // ── Setup helpers ───────────────────────────────────────────────────────
 
 function initializeEngine(ctx: PipelineContext): void {
@@ -237,932 +223,8 @@ function formatTimingResult(ctx: PipelineContext): BuildResult {
   };
 }
 
-// ── NativeDb lifecycle helpers ──────────────────────────────────────────
-
-/** Checkpoint WAL through rusqlite and close the native connection. */
-function closeNativeDb(ctx: PipelineContext, label: string): void {
-  if (!ctx.nativeDb) return;
-  try {
-    ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
-  } catch (e) {
-    debug(`${label} WAL checkpoint failed: ${toErrorMessage(e)}`);
-  }
-  try {
-    ctx.nativeDb.close();
-  } catch (e) {
-    debug(`${label} nativeDb close failed: ${toErrorMessage(e)}`);
-  }
-  ctx.nativeDb = undefined;
-}
-
-/** Try to reopen the native connection for a given pipeline phase. */
-function reopenNativeDb(ctx: PipelineContext, label: string): void {
-  if ((ctx.opts.engine ?? 'auto') === 'wasm') return;
-  const native = loadNative();
-  if (!native?.NativeDatabase) return;
-  try {
-    ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
-  } catch (e) {
-    debug(`reopen nativeDb for ${label} failed: ${toErrorMessage(e)}`);
-    ctx.nativeDb = undefined;
-  }
-}
-
-/** Close nativeDb and clear stale references in engineOpts. */
-function suspendNativeDb(ctx: PipelineContext, label: string): void {
-  closeNativeDb(ctx, label);
-  if (ctx.engineOpts?.nativeDb) {
-    ctx.engineOpts.nativeDb = undefined;
-  }
-}
-
-/**
- * After native writes, reopen the JS db connection to get a fresh page cache.
- * Rusqlite WAL truncation invalidates better-sqlite3's internal WAL index,
- * causing SQLITE_CORRUPT on the next read (#715, #736).
- */
-function refreshJsDb(ctx: PipelineContext): void {
-  try {
-    ctx.db.close();
-  } catch (e) {
-    debug(`refreshJsDb close failed: ${toErrorMessage(e)}`);
-  }
-  ctx.db = openDb(ctx.dbPath);
-}
-
-// ── Native orchestrator types ──────────────────────────────────────────
-
-interface NativeOrchestratorResult {
-  phases: Record<string, number>;
-  earlyExit?: boolean;
-  nodeCount?: number;
-  edgeCount?: number;
-  fileCount?: number;
-  changedFiles?: string[];
-  changedCount?: number;
-  removedCount?: number;
-  isFullBuild?: boolean;
-  /** Whether the Rust pipeline handled the structure phase (small-incremental fast path). */
-  structureHandled?: boolean;
-  /** Whether the Rust pipeline wrote AST/complexity/CFG/dataflow to DB. */
-  analysisComplete?: boolean;
-}
-
-// ── Native orchestrator helpers ───────────────────────────────────────
-
-/** Determine whether the native orchestrator should be skipped. Returns a reason string, or null if it should run. */
-function shouldSkipNativeOrchestrator(ctx: PipelineContext): string | null {
-  if (ctx.forceFullRebuild) return 'forceFullRebuild';
-  // v3.9.0 addon had buggy incremental purge (wrong SQL on analysis tables,
-  // scoped removal over-detection). Fixed in v3.9.1 by PR #865. Gate on
-  // < 3.9.1 so v3.9.1+ uses the fast Rust orchestrator path.
-  const orchestratorBuggy = !!ctx.engineVersion && semverCompare(ctx.engineVersion, '3.9.1') < 0;
-  if (orchestratorBuggy) return `buggy addon ${ctx.engineVersion}`;
-  if (ctx.engineName !== 'native') return `engine=${ctx.engineName}`;
-  return null;
-}
-
-/** Checkpoint WAL through rusqlite, close nativeDb, and reopen better-sqlite3.
- *  Returns false if the DB reopen fails (caller should return partial result). */
-function handoffWalAfterNativeBuild(ctx: PipelineContext): boolean {
-  closeNativeDb(ctx, 'post-native-build');
-  try {
-    ctx.db.close();
-  } catch (e) {
-    debug(`handoffWal JS db close failed: ${toErrorMessage(e)}`);
-  }
-  try {
-    ctx.db = openDb(ctx.dbPath);
-    return true;
-  } catch (reopenErr) {
-    warn(`Failed to reopen DB after native build: ${(reopenErr as Error).message}`);
-    return false;
-  }
-}
-
-/**
- * Reconstruct fileSymbols from the DB after a native orchestrator build.
- * When `scopeFiles` is provided, only loads those files (for analysis-only).
- * When omitted, loads all files (needed for structure rebuilds).
- */
-function reconstructFileSymbolsFromDb(
-  ctx: PipelineContext,
-  scopeFiles?: string[],
-): Map<string, ExtractorOutput> {
-  let query =
-    'SELECT file, name, kind, line, end_line as endLine FROM nodes WHERE file IS NOT NULL';
-  const params: string[] = [];
-  if (scopeFiles && scopeFiles.length > 0) {
-    const placeholders = scopeFiles.map(() => '?').join(',');
-    query += ` AND file IN (${placeholders})`;
-    params.push(...scopeFiles);
-  }
-  query += ' ORDER BY file, line';
-
-  const rows = ctx.db.prepare(query).all(...params) as {
-    file: string;
-    name: string;
-    kind: string;
-    line: number;
-    endLine: number | null;
-  }[];
-
-  const fileSymbols = new Map<string, ExtractorOutput>();
-  for (const row of rows) {
-    let entry = fileSymbols.get(row.file);
-    if (!entry) {
-      entry = {
-        definitions: [],
-        calls: [],
-        imports: [],
-        classes: [],
-        exports: [],
-        typeMap: new Map(),
-      };
-      fileSymbols.set(row.file, entry);
-    }
-    entry.definitions.push({
-      name: row.name,
-      kind: row.kind as Definition['kind'],
-      line: row.line,
-      endLine: row.endLine ?? undefined,
-    });
-  }
-
-  // Populate import/export counts from DB edges so buildStructure
-  // computes correct import_count/export_count in node_metrics.
-  // The extractor arrays aren't persisted to the DB, so we derive
-  // counts from edge data instead (#804).
-  const importCountRows = ctx.db
-    .prepare(
-      `SELECT n.file, COUNT(*) AS cnt
-       FROM edges e JOIN nodes n ON e.source_id = n.id
-       WHERE e.kind IN ('imports', 'imports-type', 'dynamic-imports')
-         AND n.file IS NOT NULL
-       GROUP BY n.file`,
-    )
-    .all() as { file: string; cnt: number }[];
-  for (const row of importCountRows) {
-    const entry = fileSymbols.get(row.file);
-    if (entry) entry.imports = new Array(row.cnt) as ExtractorOutput['imports'];
-  }
-
-  const exportCountRows = ctx.db
-    .prepare(
-      `SELECT n_tgt.file, COUNT(DISTINCT n_tgt.id) AS cnt
-       FROM edges e
-       JOIN nodes n_tgt ON e.target_id = n_tgt.id
-       JOIN nodes n_src ON e.source_id = n_src.id
-       WHERE e.kind IN ('imports', 'imports-type', 'reexports')
-         AND n_tgt.file IS NOT NULL
-         AND n_src.file != n_tgt.file
-       GROUP BY n_tgt.file`,
-    )
-    .all() as { file: string; cnt: number }[];
-  for (const row of exportCountRows) {
-    const entry = fileSymbols.get(row.file);
-    if (entry) entry.exports = new Array(row.cnt) as ExtractorOutput['exports'];
-  }
-
-  return fileSymbols;
-}
-
-/**
- * Run JS buildStructure() after native orchestrator to fill directory nodes + contains edges.
- * For full builds, passes changedFiles=null (full rebuild).
- * For incremental builds, passes the changed file list to scope the update.
- */
-async function runPostNativeStructure(
-  ctx: PipelineContext,
-  allFileSymbols: Map<string, ExtractorOutput>,
-  isFullBuild: boolean,
-  changedFiles: string[] | undefined,
-): Promise<number> {
-  const structureStart = performance.now();
-  try {
-    const directories = new Set<string>();
-    for (const relPath of allFileSymbols.keys()) {
-      const parts = relPath.split('/');
-      for (let i = 1; i < parts.length; i++) {
-        directories.add(parts.slice(0, i).join('/'));
-      }
-    }
-
-    const lineCountMap = new Map<string, number>();
-    const cachedLineCounts = ctx.db
-      .prepare(
-        `SELECT n.name AS file, m.line_count
-         FROM node_metrics m JOIN nodes n ON m.node_id = n.id
-         WHERE n.kind = 'file'`,
-      )
-      .all() as Array<{ file: string; line_count: number }>;
-    for (const row of cachedLineCounts) {
-      lineCountMap.set(row.file, row.line_count);
-    }
-
-    // Full builds need null (rebuild everything). Incremental builds pass the
-    // changed file list so buildStructure only updates those files' metrics
-    // and contains edges — matching the JS pipeline's medium-incremental path.
-    const changedFilePaths = isFullBuild || !changedFiles?.length ? null : changedFiles;
-    const { buildStructure: buildStructureFn } = (await import(
-      '../../../features/structure.js'
-    )) as {
-      buildStructure: (
-        db: typeof ctx.db,
-        fileSymbols: Map<string, ExtractorOutput>,
-        rootDir: string,
-        lineCountMap: Map<string, number>,
-        directories: Set<string>,
-        changedFiles: string[] | null,
-      ) => void;
-    };
-    buildStructureFn(
-      ctx.db,
-      allFileSymbols,
-      ctx.rootDir,
-      lineCountMap,
-      directories,
-      changedFilePaths,
-    );
-    debug(
-      `Structure phase completed after native orchestrator${changedFilePaths ? ` (${changedFilePaths.length} files)` : ' (full)'}`,
-    );
-  } catch (err) {
-    warn(`Structure phase failed after native build: ${toErrorMessage(err)}`);
-  }
-  return performance.now() - structureStart;
-}
-
-/**
- * JS fallback for AST/complexity/CFG/dataflow analysis after native orchestrator.
- * Used when the Rust addon doesn't include analysis persistence (older addon
- * version) or when analysis failed on the Rust side.
- */
-async function runPostNativeAnalysis(
-  ctx: PipelineContext,
-  allFileSymbols: Map<string, ExtractorOutput>,
-  changedFiles: string[] | undefined,
-): Promise<{ astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number }> {
-  const timing = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 };
-
-  // Scope analysis fileSymbols to changed files only
-  let analysisFileSymbols: Map<string, ExtractorOutput>;
-  if (changedFiles && changedFiles.length > 0) {
-    analysisFileSymbols = new Map();
-    for (const f of changedFiles) {
-      const entry = allFileSymbols.get(f);
-      if (entry) analysisFileSymbols.set(f, entry);
-    }
-  } else {
-    analysisFileSymbols = allFileSymbols;
-  }
-
-  // Reopen nativeDb for analysis features (suspend/resume WAL pattern).
-  const native = loadNative();
-  if (native?.NativeDatabase) {
-    try {
-      ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
-      if (ctx.engineOpts) ctx.engineOpts.nativeDb = ctx.nativeDb;
-    } catch {
-      ctx.nativeDb = undefined;
-      if (ctx.engineOpts) ctx.engineOpts.nativeDb = undefined;
-    }
-  }
-
-  // Flush JS WAL pages once so Rust can see them, then no-op callbacks.
-  // Previously each feature called wal_checkpoint(TRUNCATE) individually
-  // (~68ms each × 3-4 features). One FULL checkpoint suffices.
-  if (ctx.nativeDb && ctx.engineOpts) {
-    ctx.db.pragma('wal_checkpoint(FULL)');
-    ctx.engineOpts.suspendJsDb = () => {};
-    ctx.engineOpts.resumeJsDb = () => {};
-  }
-
-  try {
-    const { runAnalyses: runAnalysesFn } = (await import('../../../ast-analysis/engine.js')) as {
-      runAnalyses: (
-        db: BetterSqlite3Database,
-        fileSymbols: Map<string, ExtractorOutput>,
-        rootDir: string,
-        opts: Record<string, unknown>,
-        engineOpts?: Record<string, unknown>,
-      ) => Promise<{ astMs?: number; complexityMs?: number; cfgMs?: number; dataflowMs?: number }>;
-    };
-    const result = await runAnalysesFn(
-      ctx.db,
-      analysisFileSymbols,
-      ctx.rootDir,
-      ctx.opts as Record<string, unknown>,
-      ctx.engineOpts as unknown as Record<string, unknown> | undefined,
-    );
-    timing.astMs = result.astMs ?? 0;
-    timing.complexityMs = result.complexityMs ?? 0;
-    timing.cfgMs = result.cfgMs ?? 0;
-    timing.dataflowMs = result.dataflowMs ?? 0;
-  } catch (err) {
-    warn(`Analysis phases failed after native build: ${toErrorMessage(err)}`);
-  }
-
-  // Close nativeDb after analyses — TRUNCATE checkpoint flushes all Rust
-  // WAL writes so JS and external readers can see them. Runs once after
-  // all analysis features complete (not per-feature).
-  if (ctx.nativeDb) {
-    try {
-      ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
-    } catch {
-      /* ignore checkpoint errors */
-    }
-    try {
-      ctx.nativeDb.close();
-    } catch {
-      /* ignore close errors */
-    }
-    ctx.nativeDb = undefined;
-    if (ctx.engineOpts) {
-      ctx.engineOpts.nativeDb = undefined;
-      ctx.engineOpts.suspendJsDb = undefined;
-      ctx.engineOpts.resumeJsDb = undefined;
-    }
-  }
-
-  return timing;
-}
-
-/** Format timing result from native orchestrator phases + JS post-processing. */
-function formatNativeTimingResult(
-  p: Record<string, number>,
-  structurePatchMs: number,
-  analysisTiming: { astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number },
-): BuildResult {
-  return {
-    phases: {
-      setupMs: +(p.setupMs ?? 0).toFixed(1),
-      collectMs: +(p.collectMs ?? 0).toFixed(1),
-      detectMs: +(p.detectMs ?? 0).toFixed(1),
-      parseMs: +(p.parseMs ?? 0).toFixed(1),
-      insertMs: +(p.insertMs ?? 0).toFixed(1),
-      resolveMs: +(p.resolveMs ?? 0).toFixed(1),
-      edgesMs: +(p.edgesMs ?? 0).toFixed(1),
-      structureMs: +((p.structureMs ?? 0) + structurePatchMs).toFixed(1),
-      rolesMs: +(p.rolesMs ?? 0).toFixed(1),
-      astMs: +(analysisTiming.astMs ?? 0).toFixed(1),
-      complexityMs: +(analysisTiming.complexityMs ?? 0).toFixed(1),
-      cfgMs: +(analysisTiming.cfgMs ?? 0).toFixed(1),
-      dataflowMs: +(analysisTiming.dataflowMs ?? 0).toFixed(1),
-      finalizeMs: +(p.finalizeMs ?? 0).toFixed(1),
-    },
-  };
-}
-
-/** Try the native build orchestrator. Returns a BuildResult on success, undefined to fall through to JS pipeline. */
-async function tryNativeOrchestrator(
-  ctx: PipelineContext,
-): Promise<BuildResult | undefined | 'early-exit'> {
-  const skipReason = shouldSkipNativeOrchestrator(ctx);
-  if (skipReason) {
-    debug(`Skipping native orchestrator: ${skipReason}`);
-    return undefined;
-  }
-
-  // Open NativeDatabase on demand — deferred from setupPipeline to skip the
-  // ~60ms cost on no-op/early-exit builds. Close the better-sqlite3 connection
-  // first to avoid dual-connection WAL corruption.
-  if (!ctx.nativeDb && ctx.nativeAvailable) {
-    const native = loadNative();
-    if (native?.NativeDatabase) {
-      try {
-        // Close better-sqlite3 before opening rusqlite to avoid WAL conflicts.
-        // Uses raw close() instead of closeDb() intentionally — the advisory lock
-        // is kept and transferred to the NativeDbProxy below, not released here.
-        ctx.db.close();
-        acquireAdvisoryLock(ctx.dbPath);
-        ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
-        ctx.nativeDb.initSchema();
-        // Replace ctx.db with a NativeDbProxy so post-native JS fallback
-        // (structure, analysis) can use it without reopening better-sqlite3.
-        const proxy = new NativeDbProxy(ctx.nativeDb);
-        proxy.__lockPath = `${ctx.dbPath}.lock`;
-        ctx.db = proxy as unknown as typeof ctx.db;
-        ctx.nativeFirstProxy = true;
-      } catch (err) {
-        warn(`NativeDatabase setup failed, falling back to JS: ${toErrorMessage(err)}`);
-        try {
-          ctx.nativeDb?.close();
-        } catch (e) {
-          debug(`tryNativeOrchestrator: close failed during fallback: ${toErrorMessage(e)}`);
-        }
-        ctx.nativeDb = undefined;
-        ctx.nativeFirstProxy = false; // defensive: reset in case future refactors move the assignment above throwing lines
-        releaseAdvisoryLock(`${ctx.dbPath}.lock`);
-        // Reopen better-sqlite3 for JS pipeline fallback
-        ctx.db = openDb(ctx.dbPath);
-      }
-    }
-  }
-
-  if (!ctx.nativeDb?.buildGraph) return undefined;
-
-  const resultJson = ctx.nativeDb.buildGraph(
-    ctx.rootDir,
-    JSON.stringify(ctx.config),
-    JSON.stringify(ctx.aliases),
-    JSON.stringify(ctx.opts),
-  );
-  const result = JSON.parse(resultJson) as NativeOrchestratorResult;
-
-  if (result.earlyExit) {
-    info('No changes detected');
-    // Even on no-op rebuilds, dropped-language files added since the last
-    // full build are still missing from `nodes`/`file_hashes` (#1083), and
-    // WASM-only files deleted from disk leave stale rows behind (#1073).
-    // The orchestrator's file_collector skipped them, so its earlyExit
-    // doesn't imply DB consistency. Run the gap repair before returning.
-    const gap = detectDroppedLanguageGap(ctx);
-    if (gap.missingAbs.length > 0 || gap.staleRel.length > 0) {
-      await backfillNativeDroppedFiles(ctx, gap);
-    }
-    closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
-    return 'early-exit';
-  }
-
-  // Log incremental status to match JS pipeline output
-  const changed = result.changedCount ?? 0;
-  const removed = result.removedCount ?? 0;
-  if (!result.isFullBuild && (changed > 0 || removed > 0)) {
-    info(`Incremental: ${changed} changed, ${removed} removed`);
-  }
-
-  const p = result.phases;
-
-  // Sync build_meta so JS-side version/engine checks work on next build.
-  // Use the binary's CARGO_PKG_VERSION (ctx.nativeBinaryVersion), not the
-  // platform package.json version (ctx.engineVersion). The Rust side's
-  // check_version_mismatch compares against CARGO_PKG_VERSION; writing
-  // the package.json value would create a permanent mismatch whenever
-  // the binary and platform package.json diverge — e.g., CI hot-swap
-  // via ci-install-native.mjs (#1066) — forcing every subsequent build
-  // to be a full rebuild.
-  //
-  // When the native addon doesn't expose engineVersion() (older addon),
-  // fall back to CODEGRAPH_VERSION — same fallback used by both
-  // checkEngineSchemaMismatch (read path) and persistBuildMetadata
-  // (the JS-pipeline write path in finalize.ts). Using ctx.engineVersion
-  // here would re-introduce the asymmetry this PR fixes for that case.
-  const nativeVersionForMeta = ctx.nativeBinaryVersion || CODEGRAPH_VERSION;
-  setBuildMeta(ctx.db, {
-    engine: ctx.engineName,
-    engine_version: nativeVersionForMeta,
-    codegraph_version: nativeVersionForMeta,
-    schema_version: String(ctx.schemaVersion),
-    built_at: new Date().toISOString(),
-  });
-
-  info(
-    `Native build orchestrator completed: ${result.nodeCount ?? 0} nodes, ${result.edgeCount ?? 0} edges, ${result.fileCount ?? 0} files`,
-  );
-
-  // ── Post-native structure + analysis ──────────────────────────────
-  let analysisTiming = {
-    astMs: +(p.astMs ?? 0),
-    complexityMs: +(p.complexityMs ?? 0),
-    cfgMs: +(p.cfgMs ?? 0),
-    dataflowMs: +(p.dataflowMs ?? 0),
-  };
-  let structurePatchMs = 0;
-  // Skip JS structure when the Rust pipeline's small-incremental fast path
-  // already handled it. For full builds and large incrementals where Rust
-  // skipped structure, we must run the JS fallback.
-  const needsStructure = !result.structureHandled;
-  // When the Rust addon doesn't include analysis persistence (older addon
-  // version or analysis failed), fall back to JS-side analysis.
-  const needsAnalysisFallback =
-    !result.analysisComplete &&
-    (ctx.opts.ast !== false ||
-      ctx.opts.complexity !== false ||
-      ctx.opts.cfg !== false ||
-      ctx.opts.dataflow !== false);
-
-  if (needsStructure || needsAnalysisFallback) {
-    // When analysis fallback is needed, handoff to better-sqlite3 — the
-    // analysis engine uses the suspend/resume WAL pattern that requires a
-    // real better-sqlite3 connection, not the NativeDbProxy.
-    if (needsAnalysisFallback && ctx.nativeFirstProxy) {
-      closeNativeDb(ctx, 'pre-analysis-fallback');
-      ctx.db = openDb(ctx.dbPath);
-      ctx.nativeFirstProxy = false;
-    } else if (!ctx.nativeFirstProxy && !handoffWalAfterNativeBuild(ctx)) {
-      // DB reopen failed — return partial result
-      return formatNativeTimingResult(p, 0, analysisTiming);
-    }
-
-    const fileSymbols = reconstructFileSymbolsFromDb(ctx);
-
-    if (needsStructure) {
-      structurePatchMs = await runPostNativeStructure(
-        ctx,
-        fileSymbols,
-        !!result.isFullBuild,
-        result.changedFiles,
-      );
-    }
-
-    if (needsAnalysisFallback) {
-      analysisTiming = await runPostNativeAnalysis(ctx, fileSymbols, result.changedFiles);
-    }
-  }
-
-  // Engine parity: the native orchestrator silently drops files whose
-  // Rust extractor/grammar is missing or fails (e.g. HCL, Scala, Swift on
-  // stale native binaries). WASM handles those — backfill via WASM so both
-  // engines process the same file set (#967).
-  //
-  // Detect the gap once (fs walk + 2 DB queries, ~20–30ms) and use it for
-  // both gating and the backfill itself. On dirty incrementals/full builds
-  // the orchestrator signals trigger backfill, so the walk happens once
-  // (instead of redundantly inside backfill). On quiet incrementals we
-  // still pay the walk so we can detect brand-new files in dropped-language
-  // extensions — a gap that the orchestrator's `detect_removed_files`
-  // filter (#1070) leaves open (#1083, #1091). The pre-check is cheap
-  // because the expensive part (WASM re-parse of the missing set) is
-  // gated below.
-  const removedCount = result.removedCount ?? 0;
-  const changedCount = result.changedCount ?? 0;
-  const gap = detectDroppedLanguageGap(ctx);
-  if (
-    result.isFullBuild ||
-    removedCount > 0 ||
-    changedCount > 0 ||
-    gap.missingAbs.length > 0 ||
-    gap.staleRel.length > 0
-  ) {
-    await backfillNativeDroppedFiles(ctx, gap);
-  }
-
-  closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
-  return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
-}
-
-/** Files the native orchestrator silently dropped — the working set for backfill. */
-interface DroppedLanguageGap {
-  /** Relative paths (normalized) of files missing from `nodes` or `file_hashes`. */
-  missingRel: string[];
-  /** Absolute paths, aligned by index with `missingRel`. */
-  missingAbs: string[];
-  /**
-   * Relative paths of WASM-only files present in DB but absent from disk (#1073).
-   * Rust's `detect_removed_files` filter (#1070) skips these, so the JS-side
-   * backfill must purge them. Always disjoint from `missingRel`.
-   */
-  staleRel: string[];
-}
-
-/**
- * Inputs to {@link computeWasmOnlyStaleFiles}. Sets are passed in so the helper
- * is pure and unit-testable independently of `getInstalledWasmExtensions` and
- * the `NATIVE_SUPPORTED_EXTENSIONS` global state.
- */
-export interface WasmOnlyStaleFilesInput {
-  /** Distinct `file` values from the `nodes` table. */
-  existingNodes: ReadonlySet<string>;
-  /** Distinct `file` values from the `file_hashes` table. */
-  existingHashes: ReadonlySet<string>;
-  /** Relative paths currently on disk (from `collectFilesUtil`). */
-  expected: ReadonlySet<string>;
-  /** Lowercased extensions whose WASM grammar is installed. */
-  installedExts: ReadonlySet<string>;
-  /** Extensions covered by the Rust addon — Rust owns deletion for these. */
-  nativeSupported: ReadonlySet<string>;
-}
-
-/**
- * Compute the WASM-only files present in the DB but missing from disk (#1073).
- *
- * Returns relative paths that:
- *   - appear in `existingNodes` or `existingHashes` (in DB),
- *   - are absent from `expected` (not on disk),
- *   - have an extension installed for WASM, AND
- *   - have an extension NOT covered by `nativeSupported` — Rust's
- *     `purge_changed_files` handles deletion for natively-supported extensions
- *     via its own `detect_removed_files`, so the caller must not double-purge.
- *
- * Extensions are lowercased before lookup to match the registry and Rust's
- * `LanguageKind::from_extension` (which normalises case for the languages
- * where both cases are conventional, e.g. R's `.r` / `.R`).
- *
- * DB paths are forced to forward slashes before comparison with `expected`
- * (which is always normalised). The on-disk invariant is that DB rows are
- * written with forward slashes, but a stale row written by older code on
- * Windows could carry back-slashes — normalising here makes the comparison
- * platform-safe and prevents false-positive purges of live rows. We replace
- * `\\` explicitly (rather than calling `normalizePath`, which only touches
- * `path.sep`) so the defence works when running on POSIX against a DB that
- * was migrated from Windows.
- *
- * Exported for unit testing.
- */
-export function computeWasmOnlyStaleFiles(input: WasmOnlyStaleFilesInput): string[] {
-  const { existingNodes, existingHashes, expected, installedExts, nativeSupported } = input;
-  const stale: string[] = [];
-  const seen = new Set<string>();
-  const consider = (rawRel: string): void => {
-    const rel = rawRel.replace(/\\/g, '/');
-    if (expected.has(rel) || seen.has(rel)) return;
-    const ext = path.extname(rel).toLowerCase();
-    if (nativeSupported.has(ext)) return;
-    if (!installedExts.has(ext)) return;
-    seen.add(rel);
-    // Push the ORIGINAL raw path (not the normalised form) so the eventual
-    // `DELETE FROM nodes WHERE file = ?` predicate in `purgeFilesData`
-    // matches the actual stored row. The dedup `seen` set keeps the
-    // normalised form so a file written once with `\` and once with `/`
-    // is still treated as one entry — but the value the SQL sees has to
-    // be byte-identical to what's on disk in the DB.
-    stale.push(rawRel);
-  };
-  for (const rel of existingNodes) consider(rel);
-  for (const rel of existingHashes) consider(rel);
-  return stale;
-}
-
-/**
- * Group relative paths by their lowercased extension. Shape matches the bucket
- * type that `formatDropExtensionSummary` consumes, so callers can render a
- * log-friendly per-extension summary without going through `classifyNativeDrops`
- * when the reason is already known (e.g. the stale-purge path where every path
- * is guaranteed `unsupported-by-native`).
- */
-function groupByExtension(relPaths: Iterable<string>): Map<string, string[]> {
-  const buckets = new Map<string, string[]>();
-  for (const rel of relPaths) {
-    const ext = path.extname(rel).toLowerCase();
-    let list = buckets.get(ext);
-    if (!list) {
-      list = [];
-      buckets.set(ext, list);
-    }
-    list.push(rel);
-  }
-  return buckets;
-}
-
-/**
- * Detect files the native orchestrator silently dropped.
- *
- * Walks the filesystem and compares against `nodes` + `file_hashes`. A file
- * is "missing" if it's absent from EITHER table — both must be present for
- * the fast-skip pre-flight (#1054) to work, and the two can diverge (e.g.
- * legacy DBs where `nodes` was populated but `file_hashes` was not).
- *
- * Restricted to files with an installed WASM grammar; extensions in
- * `LANGUAGE_REGISTRY` without a shipped grammar (e.g. groovy on minimal
- * installs) can't be parsed by either engine, so they're not a native
- * regression — excluding them keeps the warn count in
- * `backfillNativeDroppedFiles` meaningful.
- *
- * Also detects WASM-only files deleted from disk (#1073). Rust's
- * `detect_removed_files` filter (#1070) skips files outside its supported
- * extensions, so deletions of WASM-only languages don't reach the native
- * purge path; the rest of the backfill only inserts rows, so without this
- * step stale `nodes`/`file_hashes` rows would linger across incremental
- * rebuilds until the next full rebuild.
- *
- * Cheap (no DB handoff, no parsing): used both to gate the backfill call
- * and as its working set. NativeDbProxy supports `.prepare().all()`, so
- * this works whether `ctx.db` is a proxy or a real better-sqlite3
- * connection — letting us skip the close-native / reopen-better-sqlite3
- * cost when there's nothing to backfill.
- */
-function detectDroppedLanguageGap(ctx: PipelineContext): DroppedLanguageGap {
-  const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set<string>());
-  const expected = new Set(
-    collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))),
-  );
-
-  const existingNodeRows = ctx.db
-    .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
-    .all() as Array<{ file: string }>;
-  const existingNodes = new Set(existingNodeRows.map((r) => r.file));
-
-  let existingHashes = new Set<string>();
-  try {
-    const existingHashRows = ctx.db
-      .prepare('SELECT DISTINCT file FROM file_hashes')
-      .all() as Array<{ file: string }>;
-    existingHashes = new Set(existingHashRows.map((r) => r.file));
-  } catch (e) {
-    // file_hashes table may not exist on legacy DBs; treat as fully missing
-    // so the backfill writes rows on the upsert path below.
-    debug(
-      `detectDroppedLanguageGap: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`,
-    );
-  }
-
-  const installedExts = getInstalledWasmExtensions();
-  const missingRel: string[] = [];
-  const missingAbs: string[] = [];
-  for (const rel of expected) {
-    if (existingNodes.has(rel) && existingHashes.has(rel)) continue;
-    const ext = path.extname(rel).toLowerCase();
-    if (!installedExts.has(ext)) continue;
-    missingRel.push(rel);
-    missingAbs.push(path.join(ctx.rootDir, rel));
-  }
-
-  const staleRel = computeWasmOnlyStaleFiles({
-    existingNodes,
-    existingHashes,
-    expected,
-    installedExts,
-    nativeSupported: NATIVE_SUPPORTED_EXTENSIONS,
-  });
-
-  return { missingRel, missingAbs, staleRel };
-}
-
-/**
- * Backfill files that the native orchestrator silently dropped during parse.
- * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
- *
- * Also purges stale rows for WASM-only files deleted from disk (#1073), which
- * Rust's `detect_removed_files` filter (#1070) skips.
- *
- * Accepts a pre-computed `gap` from `detectDroppedLanguageGap` so the caller
- * can use the same scan for both gating and the actual backfill — avoiding
- * a redundant fs walk when the orchestrator's signals already triggered.
- */
-async function backfillNativeDroppedFiles(
-  ctx: PipelineContext,
-  gap: DroppedLanguageGap,
-): Promise<void> {
-  const { missingRel, missingAbs, staleRel } = gap;
-  if (missingAbs.length === 0 && staleRel.length === 0) return;
-
-  // Now that we know there's work to do, hand off to better-sqlite3 (needed
-  // for the INSERT path below).
-  if (ctx.nativeFirstProxy) {
-    closeNativeDb(ctx, 'pre-parity-backfill');
-    ctx.db = openDb(ctx.dbPath);
-    ctx.nativeFirstProxy = false;
-  }
-
-  const dbConn = ctx.db as unknown as BetterSqlite3Database;
-
-  // Purge WASM-only files that were deleted from disk (#1073). Rust's
-  // detect_removed_files skips them and the insert path below never visits
-  // them, so without this their rows would persist across rebuilds until the
-  // next full rebuild reset the DB.
-  if (staleRel.length > 0) {
-    // `computeWasmOnlyStaleFiles` guarantees every path here has an extension
-    // outside NATIVE_SUPPORTED_EXTENSIONS, so `classifyNativeDrops` would
-    // always bucket 100% into `unsupported-by-native`. Build the extension
-    // summary directly to avoid a redundant classification pass.
-    const staleByExt = groupByExtension(staleRel);
-    info(
-      `Detected ${staleRel.length} deleted WASM-only file(s) the native orchestrator skipped; purging stale rows: ${formatDropExtensionSummary(staleByExt)}`,
-    );
-    purgeFilesData(dbConn, staleRel);
-  }
-
-  if (missingAbs.length === 0) return;
-
-  // Classify drops so users see per-extension reasons instead of just a count
-  // (#1011). `unsupported-by-native` is a legitimate parser limit (no Rust
-  // extractor); `native-extractor-failure` indicates a real native bug since
-  // the language IS supported by the addon yet the file was dropped anyway.
-  const { byReason, totals } = classifyNativeDrops(missingRel);
-  if (totals['unsupported-by-native'] > 0) {
-    info(
-      `Native orchestrator skipped ${totals['unsupported-by-native']} file(s) in languages without a Rust extractor; backfilling via WASM: ${formatDropExtensionSummary(byReason['unsupported-by-native'])}`,
-    );
-  }
-  if (totals['native-extractor-failure'] > 0) {
-    warn(
-      `Native orchestrator dropped ${totals['native-extractor-failure']} file(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM: ${formatDropExtensionSummary(byReason['native-extractor-failure'])}`,
-    );
-  }
-  const wasmResults = await parseFilesWasmForBackfill(missingAbs, ctx.rootDir);
-
-  const rows: unknown[][] = [];
-  const exportKeys: unknown[][] = [];
-  for (const [relPath, symbols] of wasmResults) {
-    // File row — mirrors insertDefinitionsAndExports: qualified_name is null.
-    rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]);
-    for (const def of symbols.definitions ?? []) {
-      // Populate qualified_name/scope the same way the JS fallback does so
-      // downstream queries (cross-file references, "go to definition") find
-      // these symbols.
-      const dotIdx = def.name.lastIndexOf('.');
-      const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null;
-      rows.push([
-        def.name,
-        def.kind,
-        relPath,
-        def.line,
-        def.endLine ?? null,
-        null,
-        def.name,
-        scope,
-        def.visibility ?? null,
-      ]);
-    }
-    // Exports: insert the row (INSERT OR IGNORE — a matching definition row
-    // is a no-op) and queue a key for the second-pass exported=1 update, so
-    // queries filtering on exported=1 find backfilled symbols (#970).
-    for (const exp of symbols.exports ?? []) {
-      rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]);
-      exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
-    }
-  }
-  const db = dbConn;
-  batchInsertNodes(db, rows);
-
-  // Mark exported symbols in batches — mirrors insertDefinitionsAndExports.
-  if (exportKeys.length > 0) {
-    const EXPORT_CHUNK = 500;
-    const exportStmtCache = new Map<number, SqliteStatement>();
-    for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) {
-      const end = Math.min(i + EXPORT_CHUNK, exportKeys.length);
-      const chunkSize = end - i;
-      let updateStmt = exportStmtCache.get(chunkSize);
-      if (!updateStmt) {
-        const conditions = Array.from(
-          { length: chunkSize },
-          () => '(name = ? AND kind = ? AND file = ? AND line = ?)',
-        ).join(' OR ');
-        updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`);
-        exportStmtCache.set(chunkSize, updateStmt);
-      }
-      const vals: unknown[] = [];
-      for (let j = i; j < end; j++) {
-        const k = exportKeys[j] as unknown[];
-        vals.push(k[0], k[1], k[2], k[3]);
-      }
-      updateStmt.run(...vals);
-    }
-  }
-
-  // Persist file_hashes rows for every backfilled file. The Rust orchestrator
-  // only hashes files it parsed itself, so without this step files in
-  // optional-language extensions (e.g. .clj when no Rust extractor exists)
-  // would be missing from `file_hashes` — permanently breaking the JS-side
-  // fast-skip pre-flight (#1054), which rejects on `collected file missing
-  // from file_hashes` and forces every no-op rebuild back through the full
-  // ~2s native pipeline (#1068).
-  //
-  // Iterates `missingRel` (every collected file the Rust orchestrator
-  // dropped), not `wasmResults`, so files that produced zero symbols still
-  // get a row.
-  try {
-    const upsertHash = db.prepare(
-      'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)',
-    );
-    const writeHashes = db.transaction(() => {
-      for (let i = 0; i < missingRel.length; i++) {
-        const relPath = missingRel[i];
-        const absPath = missingAbs[i];
-        if (!relPath || !absPath) continue;
-        let code: string | null;
-        try {
-          code = readFileSafe(absPath);
-        } catch (e) {
-          debug(`backfillNativeDroppedFiles: read failed for ${relPath}: ${toErrorMessage(e)}`);
-          continue;
-        }
-        if (code === null) continue;
-        const stat = fileStat(absPath);
-        const mtime = stat ? stat.mtime : 0;
-        const size = stat ? stat.size : 0;
-        upsertHash.run(relPath, fileHash(code), mtime, size);
-      }
-    });
-    writeHashes();
-  } catch (e) {
-    debug(
-      `backfillNativeDroppedFiles: file_hashes write failed (table may not exist): ${toErrorMessage(e)}`,
-    );
-  }
-
-  // Free WASM parse trees from the inline backfill path (#1058).
-  // `parseFilesWasmInline` sets `symbols._tree` (a live web-tree-sitter Tree
-  // backed by WASM linear memory) on every result, but these symbols are
-  // consumed locally for DB row construction and never added to
-  // `ctx.allSymbols`, so the finalize-stage `releaseWasmTrees` sweep never
-  // sees them. Without this, trees leak WASM memory until process exit —
-  // bounded per run but cumulative across in-process integration tests.
-  // Mirrors the cleanup discipline established for #931.
-  for (const [, symbols] of wasmResults) {
-    const tree = (symbols as { _tree?: { delete?: () => void } })._tree;
-    if (tree && typeof tree.delete === 'function') {
-      try {
-        tree.delete();
-      } catch {
-        /* ignore cleanup errors */
-      }
-    }
-    (symbols as { _tree?: unknown; _langId?: unknown })._tree = undefined;
-    (symbols as { _tree?: unknown; _langId?: unknown })._langId = undefined;
-  }
-}
+// Native db lifecycle and orchestrator helpers live in dedicated stage
+// modules — see `./stages/native-db-lifecycle.ts` and `./stages/native-orchestrator.ts`.
 
 // ── Pipeline stages execution ───────────────────────────────────────────
 
diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts
index fc08160b3..9a531ed5c 100644
--- a/src/domain/graph/builder/stages/build-edges.ts
+++ b/src/domain/graph/builder/stages/build-edges.ts
@@ -89,12 +89,74 @@ function setupNodeLookups(ctx: PipelineContext, allNodes: QueryNodeRow[]): void
 
 // ── Import edges ────────────────────────────────────────────────────────
 
+/** Pick the edge kind for an import statement based on its modifiers. */
+function importEdgeKind(imp: Import): string {
+  if (imp.reexport) return 'reexports';
+  if (imp.typeOnly) return 'imports-type';
+  if (imp.dynamicImport) return 'dynamic-imports';
+  return 'imports';
+}
+
+/**
+ * For a `import type` statement, emit symbol-level `imports-type` edges so
+ * the target symbols get fan-in credit and aren't classified as dead code.
+ */
+function emitTypeOnlySymbolEdges(
+  ctx: PipelineContext,
+  imp: Import,
+  resolvedPath: string,
+  fileNodeId: number,
+  allEdgeRows: EdgeRowTuple[],
+): void {
+  if (!ctx.nodesByNameAndFile) return;
+  for (const name of imp.names) {
+    const cleanName = name.replace(/^\*\s+as\s+/, '');
+    let targetFile = resolvedPath;
+    if (isBarrelFile(ctx, resolvedPath)) {
+      const actual = resolveBarrelExport(ctx, resolvedPath, cleanName);
+      if (actual) targetFile = actual;
+    }
+    const candidates = ctx.nodesByNameAndFile.get(`${cleanName}|${targetFile}`);
+    if (candidates && candidates.length > 0) {
+      allEdgeRows.push([fileNodeId, candidates[0]!.id, 'imports-type', 1.0, 0]);
+    }
+  }
+}
+
+/**
+ * Process a single import statement and emit all resulting edges (file→file,
+ * type-only symbol-level, and barrel re-export targets).
+ */
+function emitEdgesForImport(
+  ctx: PipelineContext,
+  imp: Import,
+  fileNodeId: number,
+  relPath: string,
+  getNodeIdStmt: NodeIdStmt,
+  allEdgeRows: EdgeRowTuple[],
+): void {
+  const resolvedPath = getResolved(ctx, path.join(ctx.rootDir, relPath), imp.source);
+  const targetRow = getNodeIdStmt.get(resolvedPath, 'file', resolvedPath, 0);
+  if (!targetRow) return;
+
+  const edgeKind = importEdgeKind(imp);
+  allEdgeRows.push([fileNodeId, targetRow.id, edgeKind, 1.0, 0]);
+
+  if (imp.typeOnly) {
+    emitTypeOnlySymbolEdges(ctx, imp, resolvedPath, fileNodeId, allEdgeRows);
+  }
+
+  if (!imp.reexport && isBarrelFile(ctx, resolvedPath)) {
+    buildBarrelEdges(ctx, imp, resolvedPath, fileNodeId, edgeKind, getNodeIdStmt, allEdgeRows);
+  }
+}
+
 function buildImportEdges(
   ctx: PipelineContext,
   getNodeIdStmt: NodeIdStmt,
   allEdgeRows: EdgeRowTuple[],
 ): void {
-  const { fileSymbols, barrelOnlyFiles, rootDir } = ctx;
+  const { fileSymbols, barrelOnlyFiles } = ctx;
 
   for (const [relPath, symbols] of fileSymbols) {
     const isBarrelOnly = barrelOnlyFiles.has(relPath);
@@ -105,40 +167,7 @@ function buildImportEdges(
     for (const imp of symbols.imports) {
       // Barrel-only files: only emit reexport edges, skip regular imports
       if (isBarrelOnly && !imp.reexport) continue;
-
-      const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source);
-      const targetRow = getNodeIdStmt.get(resolvedPath, 'file', resolvedPath, 0);
-      if (!targetRow) continue;
-
-      const edgeKind = imp.reexport
-        ? 'reexports'
-        : imp.typeOnly
-          ? 'imports-type'
-          : imp.dynamicImport
-            ? 'dynamic-imports'
-            : 'imports';
-      allEdgeRows.push([fileNodeId, targetRow.id, edgeKind, 1.0, 0]);
-
-      // Type-only imports: create symbol-level edges so the target symbols
-      // get fan-in credit and aren't falsely classified as dead code.
-      if (imp.typeOnly && ctx.nodesByNameAndFile) {
-        for (const name of imp.names) {
-          const cleanName = name.replace(/^\*\s+as\s+/, '');
-          let targetFile = resolvedPath;
-          if (isBarrelFile(ctx, resolvedPath)) {
-            const actual = resolveBarrelExport(ctx, resolvedPath, cleanName);
-            if (actual) targetFile = actual;
-          }
-          const candidates = ctx.nodesByNameAndFile.get(`${cleanName}|${targetFile}`);
-          if (candidates && candidates.length > 0) {
-            allEdgeRows.push([fileNodeId, candidates[0]!.id, 'imports-type', 1.0, 0]);
-          }
-        }
-      }
-
-      if (!imp.reexport && isBarrelFile(ctx, resolvedPath)) {
-        buildBarrelEdges(ctx, imp, resolvedPath, fileNodeId, edgeKind, getNodeIdStmt, allEdgeRows);
-      }
+      emitEdgesForImport(ctx, imp, fileNodeId, relPath, getNodeIdStmt, allEdgeRows);
     }
   }
 }
@@ -174,83 +203,98 @@ function buildBarrelEdges(
 
 // ── Import edges (native engine) ────────────────────────────────────────
 
-function buildImportEdgesNative(
-  ctx: PipelineContext,
-  getNodeIdStmt: NodeIdStmt,
-  allEdgeRows: EdgeRowTuple[],
-  native: NativeAddon,
-): void {
-  const { fileSymbols, barrelOnlyFiles, rootDir } = ctx;
+/** Native FFI input shape for a single import statement. */
+interface NativeImportInfo {
+  source: string;
+  names: string[];
+  reexport: boolean;
+  typeOnly: boolean;
+  dynamicImport: boolean;
+  wildcardReexport: boolean;
+}
 
-  // 1. Build per-file input data
-  const files: Array<{
-    file: string;
-    fileNodeId: number;
-    isBarrelOnly: boolean;
-    imports: Array<{
-      source: string;
-      names: string[];
-      reexport: boolean;
-      typeOnly: boolean;
-      dynamicImport: boolean;
-      wildcardReexport: boolean;
-    }>;
-    definitionNames: string[];
-  }> = [];
-
-  // Collect all file node IDs we'll need (sources + targets)
-  const fileNodeIds: Array<{ file: string; nodeId: number }> = [];
-  const seenNodeFiles = new Set<string>();
-
-  const addFileNodeId = (relPath: string): { id: number } | undefined => {
-    if (seenNodeFiles.has(relPath)) return fileNodeRowCache.get(relPath);
-    const row = getNodeIdStmt.get(relPath, 'file', relPath, 0);
-    if (row) {
-      seenNodeFiles.add(relPath);
-      fileNodeIds.push({ file: relPath, nodeId: row.id });
-      fileNodeRowCache.set(relPath, row);
-    }
-    return row;
+/** Native FFI input shape for a single file. */
+interface NativeFileInput {
+  file: string;
+  fileNodeId: number;
+  isBarrelOnly: boolean;
+  imports: NativeImportInfo[];
+  definitionNames: string[];
+}
+
+/** Native FFI input shape for re-exports of a single file. */
+interface NativeReexportInput {
+  file: string;
+  reexports: Array<{ source: string; names: string[]; wildcardReexport: boolean }>;
+}
+
+/** Lazily-resolving cache of file-node rows for the native input arrays. */
+interface FileNodeIdRegistry {
+  ids: Array<{ file: string; nodeId: number }>;
+  add(relPath: string): { id: number } | undefined;
+}
+
+function createFileNodeIdRegistry(getNodeIdStmt: NodeIdStmt): FileNodeIdRegistry {
+  const ids: Array<{ file: string; nodeId: number }> = [];
+  const seen = new Set<string>();
+  const cache = new Map<string, { id: number }>();
+  return {
+    ids,
+    add(relPath: string) {
+      if (seen.has(relPath)) return cache.get(relPath);
+      const row = getNodeIdStmt.get(relPath, 'file', relPath, 0);
+      if (row) {
+        seen.add(relPath);
+        ids.push({ file: relPath, nodeId: row.id });
+        cache.set(relPath, row);
+      }
+      return row;
+    },
   };
-  const fileNodeRowCache = new Map<string, { id: number }>();
+}
 
-  // 2. Pre-resolve all imports and build resolved imports array.
-  // Keys use forward-slash-normalized rootDir + "/" + relPath to match the Rust
-  // lookup format (format!("{}/{}", root_dir.replace('\\', "/"), file)).
-  // On Windows, rootDir has backslashes but Rust normalizes them — the JS side
-  // must do the same or every resolve key lookup misses (#750).
-  const resolvedImports: Array<{ key: string; resolvedPath: string }> = [];
+function toNativeImportInfo(imp: Import): NativeImportInfo {
+  return {
+    source: imp.source,
+    names: imp.names,
+    reexport: !!imp.reexport,
+    typeOnly: !!imp.typeOnly,
+    dynamicImport: !!imp.dynamicImport,
+    wildcardReexport: !!imp.wildcardReexport,
+  };
+}
+
+/**
+ * Pre-resolve every import for the given files, registering each resolved
+ * target with the registry so the native side has full node-id coverage.
+ *
+ * Resolved-import keys use forward-slash-normalized rootDir + "/" + relPath to
+ * match the Rust lookup format. On Windows, rootDir has backslashes but Rust
+ * normalizes them — the JS side must do the same or every key lookup misses
+ * (#750).
+ */
+function buildNativeFileInputs(
+  ctx: PipelineContext,
+  registry: FileNodeIdRegistry,
+): {
+  files: NativeFileInput[];
+  resolvedImports: Array<{ key: string; resolvedPath: string }>;
+} {
+  const { fileSymbols, barrelOnlyFiles, rootDir } = ctx;
   const fwdRootDir = rootDir.replace(/\\/g, '/');
+  const files: NativeFileInput[] = [];
+  const resolvedImports: Array<{ key: string; resolvedPath: string }> = [];
 
   for (const [relPath, symbols] of fileSymbols) {
-    const fileNodeRow = addFileNodeId(relPath);
+    const fileNodeRow = registry.add(relPath);
     if (!fileNodeRow) continue;
 
-    const importInfos: Array<{
-      source: string;
-      names: string[];
-      reexport: boolean;
-      typeOnly: boolean;
-      dynamicImport: boolean;
-      wildcardReexport: boolean;
-    }> = [];
-
+    const importInfos: NativeImportInfo[] = [];
     for (const imp of symbols.imports) {
-      // Pre-resolve and register target file node
       const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source);
-      addFileNodeId(resolvedPath);
-
-      // Key matches Rust's format!("{}/{}", root_dir.replace('\\', "/"), file_input.file)
+      registry.add(resolvedPath);
       resolvedImports.push({ key: `${fwdRootDir}/${relPath}|${imp.source}`, resolvedPath });
-
-      importInfos.push({
-        source: imp.source,
-        names: imp.names,
-        reexport: !!imp.reexport,
-        typeOnly: !!imp.typeOnly,
-        dynamicImport: !!imp.dynamicImport,
-        wildcardReexport: !!imp.wildcardReexport,
-      });
+      importInfos.push(toNativeImportInfo(imp));
     }
 
     files.push({
@@ -261,61 +305,75 @@ function buildImportEdgesNative(
       definitionNames: symbols.definitions.map((d) => d.name),
     });
   }
+  return { files, resolvedImports };
+}
 
-  // 4. Flatten reexportMap
-  const fileReexports: Array<{
-    file: string;
-    reexports: Array<{
-      source: string;
-      names: string[];
-      wildcardReexport: boolean;
-    }>;
-  }> = [];
-  if (ctx.reexportMap) {
-    for (const [file, entries] of ctx.reexportMap) {
-      const reexports = (
-        entries as Array<{ source: string; names: string[]; wildcardReexport: boolean }>
-      ).map((re) => ({
-        source: re.source,
-        names: re.names,
-        wildcardReexport: !!re.wildcardReexport,
-      }));
-      fileReexports.push({ file, reexports });
+/** Flatten `ctx.reexportMap` into the array shape the native side expects. */
+function buildNativeReexports(
+  ctx: PipelineContext,
+  registry: FileNodeIdRegistry,
+): NativeReexportInput[] {
+  const fileReexports: NativeReexportInput[] = [];
+  if (!ctx.reexportMap) return fileReexports;
+
+  for (const [file, entries] of ctx.reexportMap) {
+    const reexports = (
+      entries as Array<{ source: string; names: string[]; wildcardReexport: boolean }>
+    ).map((re) => ({
+      source: re.source,
+      names: re.names,
+      wildcardReexport: !!re.wildcardReexport,
+    }));
+    fileReexports.push({ file, reexports });
 
-      // Register reexport target files for node ID lookup
-      for (const re of reexports) {
-        addFileNodeId(re.source);
-      }
+    for (const re of reexports) {
+      registry.add(re.source);
     }
   }
+  return fileReexports;
+}
 
-  // 5. Compute barrel file list
+function collectBarrelFiles(ctx: PipelineContext): string[] {
   const barrelFiles: string[] = [];
-  for (const [relPath] of fileSymbols) {
-    if (isBarrelFile(ctx, relPath)) {
-      barrelFiles.push(relPath);
-    }
+  for (const [relPath] of ctx.fileSymbols) {
+    if (isBarrelFile(ctx, relPath)) barrelFiles.push(relPath);
   }
+  return barrelFiles;
+}
 
-  // 6. Build symbol node entries for type-only import resolution
+function collectSymbolNodes(
+  ctx: PipelineContext,
+): Array<{ name: string; file: string; nodeId: number }> {
   const symbolNodes: Array<{ name: string; file: string; nodeId: number }> = [];
-  if (ctx.nodesByNameAndFile) {
-    for (const [key, nodes] of ctx.nodesByNameAndFile) {
-      if (nodes.length > 0) {
-        const [name, file] = key.split('|');
-        symbolNodes.push({ name: name!, file: file!, nodeId: nodes[0]!.id });
-      }
-    }
+  if (!ctx.nodesByNameAndFile) return symbolNodes;
+  for (const [key, nodes] of ctx.nodesByNameAndFile) {
+    if (nodes.length === 0) continue;
+    const [name, file] = key.split('|');
+    symbolNodes.push({ name: name!, file: file!, nodeId: nodes[0]!.id });
   }
+  return symbolNodes;
+}
+
+function buildImportEdgesNative(
+  ctx: PipelineContext,
+  getNodeIdStmt: NodeIdStmt,
+  allEdgeRows: EdgeRowTuple[],
+  native: NativeAddon,
+): void {
+  const registry = createFileNodeIdRegistry(getNodeIdStmt);
+
+  const { files, resolvedImports } = buildNativeFileInputs(ctx, registry);
+  const fileReexports = buildNativeReexports(ctx, registry);
+  const barrelFiles = collectBarrelFiles(ctx);
+  const symbolNodes = collectSymbolNodes(ctx);
 
-  // 7. Call native
   const nativeEdges = native.buildImportEdges!(
     files,
     resolvedImports,
     fileReexports,
-    fileNodeIds,
+    registry.ids,
     barrelFiles,
-    rootDir,
+    ctx.rootDir,
     symbolNodes,
   ) as NativeEdge[];
 
diff --git a/src/domain/graph/builder/stages/build-structure.ts b/src/domain/graph/builder/stages/build-structure.ts
index 1a59353be..144537dfe 100644
--- a/src/domain/graph/builder/stages/build-structure.ts
+++ b/src/domain/graph/builder/stages/build-structure.ts
@@ -11,87 +11,104 @@ import type { ExtractorOutput } from '../../../../types.js';
 import type { PipelineContext } from '../context.js';
 import { readFileSafe } from '../helpers.js';
 
-export async function buildStructure(ctx: PipelineContext): Promise<void> {
-  const { db, fileSymbols, rootDir, discoveredDirs, allSymbols, isFullBuild } = ctx;
-
-  // Build line count map (prefer cached _lineCount from parser)
+/** Populate `ctx.lineCountMap` from cached parser results, falling back to disk. */
+function populateLineCountMap(ctx: PipelineContext): void {
+  const { fileSymbols, rootDir } = ctx;
   ctx.lineCountMap = new Map();
   for (const [relPath, symbols] of fileSymbols) {
     const lineCount =
       (symbols as ExtractorOutput & { lineCount?: number }).lineCount ?? symbols._lineCount;
     if (lineCount) {
       ctx.lineCountMap.set(relPath, lineCount);
-    } else {
-      const absPath = path.join(rootDir, relPath);
-      try {
-        const content = readFileSafe(absPath);
-        ctx.lineCountMap.set(relPath, content.split('\n').length);
-      } catch {
-        ctx.lineCountMap.set(relPath, 0);
-      }
+      continue;
+    }
+    const absPath = path.join(rootDir, relPath);
+    try {
+      const content = readFileSafe(absPath);
+      ctx.lineCountMap.set(relPath, content.split('\n').length);
+    } catch {
+      ctx.lineCountMap.set(relPath, 0);
     }
   }
+}
 
-  const changedFileList = isFullBuild ? null : [...allSymbols.keys()];
-
-  // For small incremental builds on large codebases, use a fast path that
-  // updates only the changed files' metrics via targeted SQL instead of
-  // loading ALL definitions from DB (~8ms) and recomputing ALL metrics (~15ms).
-  // Gate: ≤smallFilesThreshold changed files AND significantly more existing files (>20) to
-  // avoid triggering on small test fixtures where directory metrics matter.
+/** Count file-kind nodes already in the DB, preferring the native connection. */
+function countExistingFiles(ctx: PipelineContext): number {
   const useNativeReads = ctx.engineName === 'native' && !!ctx.nativeDb;
-  const existingFileCount = !isFullBuild
-    ? (
-        (useNativeReads
-          ? ctx.nativeDb!.queryGet("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'", [])
-          : db.prepare("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'").get()) as {
-          c: number;
-        }
-      ).c
-    : 0;
-  const useSmallIncrementalFastPath =
-    !isFullBuild &&
-    changedFileList != null &&
-    changedFileList.length <= ctx.config.build.smallFilesThreshold &&
-    existingFileCount > 20;
-
-  if (!isFullBuild && !useSmallIncrementalFastPath) {
-    // Medium/large incremental: load unchanged files from DB for complete structure
-    loadUnchangedFilesFromDb(ctx);
-  }
+  const row = (
+    useNativeReads
+      ? ctx.nativeDb!.queryGet("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'", [])
+      : ctx.db.prepare("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'").get()
+  ) as { c: number };
+  return row.c;
+}
 
-  // Build directory structure
-  const t0 = performance.now();
+/**
+ * Build directory structure + metrics. Chooses between the fast incremental
+ * path (a handful of files changed on a large codebase) and the full path
+ * (delegated to `features/structure`).
+ */
+async function buildDirectoryStructure(
+  ctx: PipelineContext,
+  changedFileList: string[] | null,
+  useSmallIncrementalFastPath: boolean,
+): Promise<void> {
   if (useSmallIncrementalFastPath) {
     updateChangedFileMetrics(ctx, changedFileList!);
-  } else {
-    const relDirs = new Set<string>();
-    for (const absDir of discoveredDirs) {
-      relDirs.add(normalizePath(path.relative(rootDir, absDir)));
-    }
-    try {
-      const { buildStructure: buildStructureFn } = (await import(
-        '../../../../features/structure.js'
-      )) as {
-        buildStructure: (
-          db: PipelineContext['db'],
-          fileSymbols: Map<string, ExtractorOutput>,
-          rootDir: string,
-          lineCountMap: Map<string, number>,
-          directories: Set<string>,
-          changedFiles: string[] | null,
-        ) => void;
-      };
-      const changedFilePaths = isFullBuild ? null : [...allSymbols.keys()];
-      buildStructureFn(db, fileSymbols, rootDir, ctx.lineCountMap, relDirs, changedFilePaths);
-    } catch (err) {
-      debug(`Structure analysis failed: ${(err as Error).message}`);
-    }
+    return;
   }
-  ctx.timing.structureMs = performance.now() - t0;
 
-  // Classify node roles (incremental: only reclassify changed files' nodes)
-  const t1 = performance.now();
+  const { db, fileSymbols, rootDir, discoveredDirs, allSymbols, isFullBuild } = ctx;
+  const relDirs = new Set<string>();
+  for (const absDir of discoveredDirs) {
+    relDirs.add(normalizePath(path.relative(rootDir, absDir)));
+  }
+  try {
+    const { buildStructure: buildStructureFn } = (await import(
+      '../../../../features/structure.js'
+    )) as {
+      buildStructure: (
+        db: PipelineContext['db'],
+        fileSymbols: Map<string, ExtractorOutput>,
+        rootDir: string,
+        lineCountMap: Map<string, number>,
+        directories: Set<string>,
+        changedFiles: string[] | null,
+      ) => void;
+    };
+    const changedFilePaths = isFullBuild ? null : [...allSymbols.keys()];
+    buildStructureFn(db, fileSymbols, rootDir, ctx.lineCountMap, relDirs, changedFilePaths);
+  } catch (err) {
+    debug(`Structure analysis failed: ${(err as Error).message}`);
+  }
+}
+
+/** Convert a `NativeDatabase.classifyRoles*` result into the JS summary shape. */
+function nativeRoleSummaryToRecord(
+  nativeResult: NonNullable<
+    ReturnType<NonNullable<PipelineContext['nativeDb']>['classifyRolesFull']>
+  >,
+): Record<string, number> {
+  return {
+    entry: nativeResult.entry,
+    core: nativeResult.core,
+    utility: nativeResult.utility,
+    adapter: nativeResult.adapter,
+    dead: nativeResult.dead,
+    'dead-leaf': nativeResult.deadLeaf,
+    'dead-entry': nativeResult.deadEntry,
+    'dead-ffi': nativeResult.deadFfi,
+    'dead-unresolved': nativeResult.deadUnresolved,
+    'test-only': nativeResult.testOnly,
+    leaf: nativeResult.leaf,
+  };
+}
+
+async function classifyRoles(
+  ctx: PipelineContext,
+  changedFileList: string[] | null,
+): Promise<void> {
+  const useNativeReads = ctx.engineName === 'native' && !!ctx.nativeDb;
   try {
     let roleSummary: Record<string, number> | null = null;
 
@@ -103,24 +120,9 @@ export async function buildStructure(ctx: PipelineContext): Promise<void> {
         changedFileList && changedFileList.length > 0
           ? ctx.nativeDb.classifyRolesIncremental(changedFileList)
           : ctx.nativeDb.classifyRolesFull();
-      if (nativeResult) {
-        roleSummary = {
-          entry: nativeResult.entry,
-          core: nativeResult.core,
-          utility: nativeResult.utility,
-          adapter: nativeResult.adapter,
-          dead: nativeResult.dead,
-          'dead-leaf': nativeResult.deadLeaf,
-          'dead-entry': nativeResult.deadEntry,
-          'dead-ffi': nativeResult.deadFfi,
-          'dead-unresolved': nativeResult.deadUnresolved,
-          'test-only': nativeResult.testOnly,
-          leaf: nativeResult.leaf,
-        };
-      }
+      if (nativeResult) roleSummary = nativeRoleSummaryToRecord(nativeResult);
     }
 
-    // Fall back to JS path
     if (!roleSummary) {
       const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as {
         classifyNodeRoles: (
@@ -141,6 +143,37 @@ export async function buildStructure(ctx: PipelineContext): Promise<void> {
   } catch (err) {
     debug(`Role classification failed: ${(err as Error).message}`);
   }
+}
+
+export async function buildStructure(ctx: PipelineContext): Promise<void> {
+  const { allSymbols, isFullBuild } = ctx;
+
+  populateLineCountMap(ctx);
+
+  const changedFileList = isFullBuild ? null : [...allSymbols.keys()];
+
+  // For small incremental builds on large codebases, use a fast path that
+  // updates only the changed files' metrics via targeted SQL instead of
+  // loading ALL definitions from DB (~8ms) and recomputing ALL metrics (~15ms).
+  // Gate: ≤smallFilesThreshold changed files AND significantly more existing files (>20) to
+  // avoid triggering on small test fixtures where directory metrics matter.
+  const existingFileCount = !isFullBuild ? countExistingFiles(ctx) : 0;
+  const useSmallIncrementalFastPath =
+    !isFullBuild &&
+    changedFileList != null &&
+    changedFileList.length <= ctx.config.build.smallFilesThreshold &&
+    existingFileCount > 20;
+
+  if (!isFullBuild && !useSmallIncrementalFastPath) {
+    loadUnchangedFilesFromDb(ctx);
+  }
+
+  const t0 = performance.now();
+  await buildDirectoryStructure(ctx, changedFileList, useSmallIncrementalFastPath);
+  ctx.timing.structureMs = performance.now() - t0;
+
+  const t1 = performance.now();
+  await classifyRoles(ctx, changedFileList);
   ctx.timing.rolesMs = performance.now() - t1;
 }
 
diff --git a/src/domain/graph/builder/stages/detect-changes.ts b/src/domain/graph/builder/stages/detect-changes.ts
index cc51155dc..222d92e42 100644
--- a/src/domain/graph/builder/stages/detect-changes.ts
+++ b/src/domain/graph/builder/stages/detect-changes.ts
@@ -162,14 +162,14 @@ function tryJournalTier(
   return { changed, removed: [...removedSet], isFullBuild: false };
 }
 
-function mtimeAndHashTiers(
+/** Tier 1: mtime+size triage. Returns the files that still need hashing. */
+function tierMtimeSize(
   existing: Map<string, FileHashRow>,
   allFiles: string[],
   rootDir: string,
-  removed: string[],
-): ChangeResult {
+): { needsHash: NeedsHashItem[]; skipped: number } {
   const needsHash: NeedsHashItem[] = [];
-  const skipped: string[] = [];
+  let skipped = 0;
 
   for (const file of allFiles) {
     const relPath = normalizePath(path.relative(rootDir, file));
@@ -183,16 +183,17 @@ function mtimeAndHashTiers(
     const storedMtime = record.mtime || 0;
     const storedSize = record.size || 0;
     if (storedSize > 0 && stat.mtime === storedMtime && stat.size === storedSize) {
-      skipped.push(relPath);
+      skipped++;
       continue;
     }
     needsHash.push({ file, relPath, stat });
   }
 
-  if (needsHash.length > 0) {
-    debug(`Tier 1: ${skipped.length} skipped by mtime+size, ${needsHash.length} need hash check`);
-  }
+  return { needsHash, skipped };
+}
 
+/** Tier 2: hash candidates from tier 1, classifying changed vs metadata-only. */
+function tierHash(existing: Map<string, FileHashRow>, needsHash: NeedsHashItem[]): ChangedFile[] {
   const changed: ChangedFile[] = [];
   for (const item of needsHash) {
     let content: string | undefined;
@@ -217,11 +218,26 @@ function mtimeAndHashTiers(
       });
     }
   }
+  return changed;
+}
+
+function mtimeAndHashTiers(
+  existing: Map<string, FileHashRow>,
+  allFiles: string[],
+  rootDir: string,
+  removed: string[],
+): ChangeResult {
+  const { needsHash, skipped } = tierMtimeSize(existing, allFiles, rootDir);
+  if (needsHash.length > 0) {
+    debug(`Tier 1: ${skipped} skipped by mtime+size, ${needsHash.length} need hash check`);
+  }
+
+  const changed = tierHash(existing, needsHash);
 
-  const parseChanged = changed.filter((c) => !c.metadataOnly);
   if (needsHash.length > 0) {
+    const parseChangedLen = changed.filter((c) => !c.metadataOnly).length;
     debug(
-      `Tier 2: ${parseChanged.length} actually changed, ${changed.length - parseChanged.length} metadata-only`,
+      `Tier 2: ${parseChangedLen} actually changed, ${changed.length - parseChangedLen} metadata-only`,
     );
   }
 
@@ -512,61 +528,43 @@ function handleIncrementalBuild(ctx: PipelineContext): void {
   purgeAndAddReverseDeps(ctx, changePaths, reverseDeps);
 }
 
-/**
- * Read-only pre-flight check for the native orchestrator.
- *
- * Returns true iff every collected source file has matching mtime+size in
- * `file_hashes` and no DB-tracked file has been removed. When true, the
- * caller can short-circuit before invoking the native orchestrator —
- * matching WASM's ~20 ms early-exit path and avoiding the ~2s flat
- * per-call native rebuild overhead seen in CI (#1054).
- *
- * Intentionally Tier-0/Tier-1 only (journal + mtime/size). Tier-2 content
- * hashing is left to the native side: when this returns false the caller
- * falls through to the orchestrator, which performs its own complete
- * detection and is the source of truth.
- *
- * Conservatively returns false when CFG or dataflow analysis is enabled
- * but the corresponding tables are empty — otherwise the fast-skip would
- * silently suppress the pending-analysis pass that the JS path runs via
- * `runPendingAnalysis`, and CFG/dataflow data would never populate on
- * repos where source files don't change between builds.
- *
- * Pure read of `db` and the filesystem — never mutates either.
- */
-export function detectNoChanges(
-  db: BetterSqlite3Database,
-  allFiles: string[],
-  rootDir: string,
-  opts?: Record<string, unknown>,
-): boolean {
-  // Diagnostic logging gated by env var — used by the bench gate to surface
-  // why the fast-skip is not firing on CI runners (#1066). Off by default to
-  // avoid noise on every regular incremental build.
+/** Diagnostic logger gated by env var, used by both `detectNoChanges` branches. */
+function makeFastSkipLogger(): (reason: string) => void {
   const diag = process.env.CODEGRAPH_FAST_SKIP_DIAG === '1';
-  const log = (reason: string): void => {
+  return (reason: string): void => {
     if (diag) info(`[fast-skip] ${reason}`);
   };
+}
 
-  let hasTable = false;
+/**
+ * Load the `file_hashes` table for the no-change pre-flight.  Returns null
+ * if the table is missing or empty (both → caller must fall through).
+ */
+function loadFileHashesForPreflight(
+  db: BetterSqlite3Database,
+  log: (reason: string) => void,
+): Map<string, FileHashRow> | null {
   try {
     db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get();
-    hasTable = true;
   } catch {
-    /* table missing — first build */
-  }
-  if (!hasTable) {
     log('false: file_hashes table missing');
-    return false;
+    return null;
   }
-
   const rows = db.prepare('SELECT file, hash, mtime, size FROM file_hashes').all() as FileHashRow[];
   if (rows.length === 0) {
     log('false: file_hashes table empty');
-    return false;
+    return null;
   }
-  const existing = new Map<string, FileHashRow>(rows.map((r) => [r.file, r]));
+  return new Map<string, FileHashRow>(rows.map((r) => [r.file, r]));
+}
 
+/** Returns true iff every file in `allFiles` matches a stored mtime+size record. */
+function allFilesMatchStoredStat(
+  existing: Map<string, FileHashRow>,
+  allFiles: string[],
+  rootDir: string,
+  log: (reason: string) => void,
+): boolean {
   const currentFiles = new Set<string>();
   for (const file of allFiles) {
     currentFiles.add(normalizePath(path.relative(rootDir, file)));
@@ -603,21 +601,66 @@ export function detectNoChanges(
       return false;
     }
   }
+  return true;
+}
 
-  // Pending-analysis guard: if CFG/dataflow is enabled but the corresponding
-  // table is empty (analysis newly enabled, or tables wiped between builds),
-  // fall through so the orchestrator / JS pipeline can run runPendingAnalysis.
-  // Mirrors the check at the top of runPendingAnalysis (see line ~244).
-  if (opts) {
-    if (opts.cfg !== false && hasEmptyAnalysisTable(db, 'cfg_blocks')) {
-      log('false: pending-analysis guard — cfg_blocks is empty');
-      return false;
-    }
-    if (opts.dataflow !== false && hasEmptyAnalysisTable(db, 'dataflow')) {
-      log('false: pending-analysis guard — dataflow is empty');
-      return false;
-    }
+/**
+ * Pending-analysis guard: if CFG/dataflow is enabled but the corresponding
+ * table is empty (analysis newly enabled, or tables wiped between builds),
+ * fall through so the orchestrator / JS pipeline can run runPendingAnalysis.
+ * Mirrors the check at the top of runPendingAnalysis.
+ */
+function passesPendingAnalysisGuard(
+  db: BetterSqlite3Database,
+  opts: Record<string, unknown> | undefined,
+  log: (reason: string) => void,
+): boolean {
+  if (!opts) return true;
+  if (opts.cfg !== false && hasEmptyAnalysisTable(db, 'cfg_blocks')) {
+    log('false: pending-analysis guard — cfg_blocks is empty');
+    return false;
   }
+  if (opts.dataflow !== false && hasEmptyAnalysisTable(db, 'dataflow')) {
+    log('false: pending-analysis guard — dataflow is empty');
+    return false;
+  }
+  return true;
+}
+
+/**
+ * Read-only pre-flight check for the native orchestrator.
+ *
+ * Returns true iff every collected source file has matching mtime+size in
+ * `file_hashes` and no DB-tracked file has been removed. When true, the
+ * caller can short-circuit before invoking the native orchestrator —
+ * matching WASM's ~20 ms early-exit path and avoiding the ~2s flat
+ * per-call native rebuild overhead seen in CI (#1054).
+ *
+ * Intentionally Tier-0/Tier-1 only (journal + mtime/size). Tier-2 content
+ * hashing is left to the native side: when this returns false the caller
+ * falls through to the orchestrator, which performs its own complete
+ * detection and is the source of truth.
+ *
+ * Conservatively returns false when CFG or dataflow analysis is enabled
+ * but the corresponding tables are empty — otherwise the fast-skip would
+ * silently suppress the pending-analysis pass that the JS path runs via
+ * `runPendingAnalysis`, and CFG/dataflow data would never populate on
+ * repos where source files don't change between builds.
+ *
+ * Pure read of `db` and the filesystem — never mutates either.
+ */
+export function detectNoChanges(
+  db: BetterSqlite3Database,
+  allFiles: string[],
+  rootDir: string,
+  opts?: Record<string, unknown>,
+): boolean {
+  const log = makeFastSkipLogger();
+  const existing = loadFileHashesForPreflight(db, log);
+  if (!existing) return false;
+
+  if (!allFilesMatchStoredStat(existing, allFiles, rootDir, log)) return false;
+  if (!passesPendingAnalysisGuard(db, opts, log)) return false;
 
   log(`true: all checks passed (${allFiles.length} files)`);
   return true;
diff --git a/src/domain/graph/builder/stages/finalize.ts b/src/domain/graph/builder/stages/finalize.ts
index d59fe016a..ab2e1d429 100644
--- a/src/domain/graph/builder/stages/finalize.ts
+++ b/src/domain/graph/builder/stages/finalize.ts
@@ -136,82 +136,72 @@ function persistBuildMetadata(
   }
 }
 
-/**
- * Run advisory checks on full builds: orphaned embeddings, stale embeddings,
- * and unused exports. Informational only — does not affect correctness.
- */
-function runAdvisoryChecks(ctx: PipelineContext, hasEmbeddings: boolean, buildNow: Date): void {
-  // Batched native path: single napi call for all 3 advisory checks
-  if (ctx.engineName === 'native' && ctx.nativeDb?.runAdvisoryChecks) {
-    const result = ctx.nativeDb.runAdvisoryChecks(hasEmbeddings);
-    if (result.orphanedEmbeddings > 0) {
-      warn(
-        `${result.orphanedEmbeddings} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`,
-      );
-    }
-    if (result.embedBuiltAt) {
-      const embedTime = new Date(result.embedBuiltAt).getTime();
-      if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) {
-        warn(
-          'Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.',
-        );
-      }
-    }
-    if (result.unusedExports > 0) {
-      warn(
-        `${result.unusedExports} exported symbol${result.unusedExports > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports <file> --unused" to inspect.`,
-      );
+/** Format the "X exports have zero consumers" warning, with correct plural agreement. */
+function unusedExportsMessage(count: number): string {
+  return `${count} exported symbol${count > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports <file> --unused" to inspect.`;
+}
+
+/** Run all three advisory checks via the batched native FFI. */
+function runAdvisoryChecksNative(
+  ctx: PipelineContext,
+  hasEmbeddings: boolean,
+  buildNow: Date,
+): void {
+  const result = ctx.nativeDb!.runAdvisoryChecks!(hasEmbeddings);
+  if (result.orphanedEmbeddings > 0) {
+    warn(
+      `${result.orphanedEmbeddings} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`,
+    );
+  }
+  if (result.embedBuiltAt) {
+    const embedTime = new Date(result.embedBuiltAt).getTime();
+    if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) {
+      warn('Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.');
     }
-    return;
   }
+  if (result.unusedExports > 0) {
+    warn(unusedExportsMessage(result.unusedExports));
+  }
+}
 
-  const { db } = ctx;
-
-  // Orphaned embeddings warning
-  if (hasEmbeddings) {
-    try {
-      const orphaned = (
-        db
-          .prepare(
-            'SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)',
-          )
-          .get() as { c: number }
-      ).c;
-      if (orphaned > 0) {
-        warn(
-          `${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`,
-        );
-      }
-    } catch {
-      /* ignore - embeddings table may have been dropped */
+function checkOrphanedEmbeddings(ctx: PipelineContext): void {
+  try {
+    const orphaned = (
+      ctx.db
+        .prepare('SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)')
+        .get() as { c: number }
+    ).c;
+    if (orphaned > 0) {
+      warn(
+        `${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`,
+      );
     }
+  } catch {
+    /* ignore - embeddings table may have been dropped */
   }
+}
 
-  // Stale embeddings warning (built before current graph rebuild)
-  if (hasEmbeddings) {
-    try {
-      const embedBuiltAt = (
-        db.prepare("SELECT value FROM embedding_meta WHERE key = 'built_at'").get() as
-          | { value: string }
-          | undefined
-      )?.value;
-      if (embedBuiltAt) {
-        const embedTime = new Date(embedBuiltAt).getTime();
-        if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) {
-          warn(
-            'Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.',
-          );
-        }
-      }
-    } catch {
-      /* ignore - embedding_meta table may not exist */
+function checkStaleEmbeddings(ctx: PipelineContext, buildNow: Date): void {
+  try {
+    const embedBuiltAt = (
+      ctx.db.prepare("SELECT value FROM embedding_meta WHERE key = 'built_at'").get() as
+        | { value: string }
+        | undefined
+    )?.value;
+    if (!embedBuiltAt) return;
+    const embedTime = new Date(embedBuiltAt).getTime();
+    if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) {
+      warn('Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.');
     }
+  } catch {
+    /* ignore - embedding_meta table may not exist */
   }
+}
 
-  // Unused exports warning
+function checkUnusedExports(ctx: PipelineContext): void {
   try {
     const unusedCount = (
-      db
+      ctx.db
         .prepare(
           `SELECT COUNT(*) as c FROM nodes
          WHERE exported = 1 AND kind != 'file'
@@ -224,16 +214,28 @@ function runAdvisoryChecks(ctx: PipelineContext, hasEmbeddings: boolean, buildNo
         )
         .get() as { c: number }
     ).c;
-    if (unusedCount > 0) {
-      warn(
-        `${unusedCount} exported symbol${unusedCount > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports <file> --unused" to inspect.`,
-      );
-    }
+    if (unusedCount > 0) warn(unusedExportsMessage(unusedCount));
   } catch {
     /* exported column may not exist on older DBs */
   }
 }
 
+/**
+ * Run advisory checks on full builds: orphaned embeddings, stale embeddings,
+ * and unused exports. Informational only — does not affect correctness.
+ */
+function runAdvisoryChecks(ctx: PipelineContext, hasEmbeddings: boolean, buildNow: Date): void {
+  if (ctx.engineName === 'native' && ctx.nativeDb?.runAdvisoryChecks) {
+    runAdvisoryChecksNative(ctx, hasEmbeddings, buildNow);
+    return;
+  }
+  if (hasEmbeddings) {
+    checkOrphanedEmbeddings(ctx);
+    checkStaleEmbeddings(ctx, buildNow);
+  }
+  checkUnusedExports(ctx);
+}
+
 export async function finalize(ctx: PipelineContext): Promise<void> {
   const { allSymbols, rootDir, isFullBuild, hasEmbeddings, opts } = ctx;
 
diff --git a/src/domain/graph/builder/stages/insert-nodes.ts b/src/domain/graph/builder/stages/insert-nodes.ts
index 88e403ec9..09aad25d8 100644
--- a/src/domain/graph/builder/stages/insert-nodes.ts
+++ b/src/domain/graph/builder/stages/insert-nodes.ts
@@ -92,23 +92,69 @@ function marshalSymbolBatches(allSymbols: Map<string, ExtractorOutput>): InsertN
   return batches;
 }
 
+/** A single file_hashes row. */
+interface FileHashRecord {
+  file: string;
+  hash: string;
+  mtime: number;
+  size: number;
+}
+
+/** Resolve the (hash, mtime, size) tuple for a relPath, reading from disk if needed. */
+function resolveHashFromPrecomputed(
+  relPath: string,
+  precomputed: PrecomputedFileData,
+  rootDir: string,
+  caller: string,
+): FileHashRecord | null {
+  if (precomputed.hash) {
+    let mtime: number;
+    let size: number;
+    if (precomputed.stat) {
+      mtime = precomputed.stat.mtime;
+      size = precomputed.stat.size;
+    } else {
+      const rawStat = fileStat(path.join(rootDir, relPath));
+      mtime = rawStat ? rawStat.mtime : 0;
+      size = rawStat ? rawStat.size : 0;
+    }
+    return { file: relPath, hash: precomputed.hash, mtime, size };
+  }
+
+  const absPath = path.join(rootDir, relPath);
+  let code: string | null;
+  try {
+    code = readFileSafe(absPath);
+  } catch (e) {
+    debug(`${caller}: readFileSafe failed for ${relPath}: ${toErrorMessage(e)}`);
+    code = null;
+  }
+  if (code === null) return null;
+  const stat = fileStat(absPath);
+  return {
+    file: relPath,
+    hash: fileHash(code),
+    mtime: stat ? stat.mtime : 0,
+    size: stat ? stat.size : 0,
+  };
+}
+
 /**
- * Build file hash entries for every collected file, including those that
- * produced zero symbols (empty files, parsers that silently no-op'd, or
- * optional-language extensions whose grammar wasn't installed). Iterating the
- * symbol map instead would skip such files and leave them missing from
- * `file_hashes`, which permanently breaks the JS-side fast-skip pre-flight on
- * any subsequent no-op rebuild (#1068).
+ * Walk every collected file once and yield a `FileHashRecord` for it, plus one
+ * record per metadata-only update.  Shared by `buildFileHashes` (native path)
+ * and `updateFileHashes` (JS fallback) so the iteration and hash-resolution
+ * logic stays in one place.
  *
- * Exported for unit testing.
+ * Files marked `_reverseDepOnly` are skipped — their hashes are already
+ * correct in the DB.
  */
-export function buildFileHashes(
+function* iterFileHashRecords(
   filesToParse: FileToParse[],
   precomputedData: Map<string, PrecomputedFileData>,
   metadataUpdates: MetadataUpdate[],
   rootDir: string,
-): Array<{ file: string; hash: string; mtime: number; size: number }> {
-  const fileHashes: Array<{ file: string; hash: string; mtime: number; size: number }> = [];
+  caller: string,
+): Generator<FileHashRecord> {
   const seen = new Set<string>();
 
   for (const item of filesToParse) {
@@ -117,47 +163,53 @@ export function buildFileHashes(
     seen.add(relPath);
 
     const precomputed = precomputedData.get(relPath);
-    if (precomputed?._reverseDepOnly) {
-      continue; // file unchanged, hash already correct
-    }
-    if (precomputed?.hash) {
-      let mtime: number;
-      let size: number;
-      if (precomputed.stat) {
-        mtime = precomputed.stat.mtime;
-        size = precomputed.stat.size;
-      } else {
-        const rawStat = fileStat(path.join(rootDir, relPath));
-        mtime = rawStat ? rawStat.mtime : 0;
-        size = rawStat ? rawStat.size : 0;
-      }
-      fileHashes.push({ file: relPath, hash: precomputed.hash, mtime, size });
-    } else {
-      const absPath = path.join(rootDir, relPath);
-      let code: string | null;
-      try {
-        code = readFileSafe(absPath);
-      } catch (e) {
-        debug(`buildFileHashes: readFileSafe failed for ${relPath}: ${toErrorMessage(e)}`);
-        code = null;
-      }
-      if (code !== null) {
-        const stat = fileStat(absPath);
-        const mtime = stat ? stat.mtime : 0;
-        const size = stat ? stat.size : 0;
-        fileHashes.push({ file: relPath, hash: fileHash(code), mtime, size });
-      }
-    }
+    if (precomputed?._reverseDepOnly) continue;
+
+    const record = resolveHashFromPrecomputed(
+      relPath,
+      precomputed ?? ({} as PrecomputedFileData),
+      rootDir,
+      caller,
+    );
+    if (record) yield record;
   }
 
-  // Also include metadata-only updates (self-heal mtime/size without re-parse)
+  // Metadata-only updates (self-heal mtime/size without re-parse)
   for (const item of metadataUpdates) {
-    const mtime = item.stat ? item.stat.mtime : 0;
-    const size = item.stat ? item.stat.size : 0;
-    fileHashes.push({ file: item.relPath, hash: item.hash, mtime, size });
+    yield {
+      file: item.relPath,
+      hash: item.hash,
+      mtime: item.stat ? item.stat.mtime : 0,
+      size: item.stat ? item.stat.size : 0,
+    };
   }
+}
 
-  return fileHashes;
+/**
+ * Build file hash entries for every collected file, including those that
+ * produced zero symbols (empty files, parsers that silently no-op'd, or
+ * optional-language extensions whose grammar wasn't installed). Iterating the
+ * symbol map instead would skip such files and leave them missing from
+ * `file_hashes`, which permanently breaks the JS-side fast-skip pre-flight on
+ * any subsequent no-op rebuild (#1068).
+ *
+ * Exported for unit testing.
+ */
+export function buildFileHashes(
+  filesToParse: FileToParse[],
+  precomputedData: Map<string, PrecomputedFileData>,
+  metadataUpdates: MetadataUpdate[],
+  rootDir: string,
+): FileHashRecord[] {
+  return [
+    ...iterFileHashRecords(
+      filesToParse,
+      precomputedData,
+      metadataUpdates,
+      rootDir,
+      'buildFileHashes',
+    ),
+  ];
 }
 
 // ── Native fast-path ─────────────────────────────────────────────────
@@ -260,36 +312,38 @@ function insertDefinitionsAndExports(
 
 // ── JS fallback: Phase 2+3 ──────────────────────────────────────────
 
-function insertChildrenAndEdges(
+/** Build the in-memory `name|kind|line` → node-id map for a single file. */
+function loadFileNodeIdMap(db: BetterSqlite3Database, relPath: string): Map<string, number> {
+  const map = new Map<string, number>();
+  for (const row of bulkNodeIdsByFile(db, relPath)) {
+    map.set(`${row.name}|${row.kind}|${row.line}`, row.id);
+  }
+  return map;
+}
+
+/**
+ * First pass: for every file, emit file→def containment edges and collect
+ * the child-node insertion rows.
+ */
+function collectChildRowsAndFileEdges(
   db: BetterSqlite3Database,
   allSymbols: Map<string, ExtractorOutput>,
+  childRows: unknown[][],
+  edgeRows: unknown[][],
 ): void {
-  const childRows: unknown[][] = [];
-  const edgeRows: unknown[][] = [];
-
   for (const [relPath, symbols] of allSymbols) {
-    // First pass: collect file→def edges and child rows
-    const nodeIdMap = new Map<string, number>();
-    for (const row of bulkNodeIdsByFile(db, relPath)) {
-      nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
-    }
-
+    const nodeIdMap = loadFileNodeIdMap(db, relPath);
     const fileId = nodeIdMap.get(`${relPath}|file|0`);
 
     for (const def of symbols.definitions) {
       const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);
 
-      // Containment edge: file -> definition
       if (fileId && defId) {
         edgeRows.push([fileId, defId, 'contains', 1.0, 0]);
       }
-
-      if (!def.children?.length) continue;
-      if (!defId) continue;
+      if (!def.children?.length || !defId) continue;
 
       for (const child of def.children) {
-        // Child node
-        const qualifiedName = `${def.name}.${child.name}`;
         childRows.push([
           child.name,
           child.kind,
@@ -297,39 +351,55 @@ function insertChildrenAndEdges(
           child.line,
           child.endLine || null,
           defId,
-          qualifiedName,
+          `${def.name}.${child.name}`,
           def.name,
           child.visibility || null,
         ]);
       }
     }
   }
+}
 
-  // Insert children first (so they exist for edge lookup)
-  batchInsertNodes(db, childRows);
-
-  // Now re-fetch IDs to include newly-inserted children, then add child edges
+/**
+ * Second pass (after child nodes have been inserted): emit def→child
+ * containment edges and child→def `parameter_of` edges.
+ */
+function collectChildEdges(
+  db: BetterSqlite3Database,
+  allSymbols: Map<string, ExtractorOutput>,
+  edgeRows: unknown[][],
+): void {
   for (const [relPath, symbols] of allSymbols) {
-    const nodeIdMap = new Map<string, number>();
-    for (const row of bulkNodeIdsByFile(db, relPath)) {
-      nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
-    }
+    const nodeIdMap = loadFileNodeIdMap(db, relPath);
     for (const def of symbols.definitions) {
       if (!def.children?.length) continue;
       const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);
       if (!defId) continue;
       for (const child of def.children) {
         const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`);
-        if (childId) {
-          edgeRows.push([defId, childId, 'contains', 1.0, 0]);
-          if (child.kind === 'parameter') {
-            edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]);
-          }
+        if (!childId) continue;
+        edgeRows.push([defId, childId, 'contains', 1.0, 0]);
+        if (child.kind === 'parameter') {
+          edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]);
         }
       }
     }
   }
+}
+
+function insertChildrenAndEdges(
+  db: BetterSqlite3Database,
+  allSymbols: Map<string, ExtractorOutput>,
+): void {
+  const childRows: unknown[][] = [];
+  const edgeRows: unknown[][] = [];
 
+  collectChildRowsAndFileEdges(db, allSymbols, childRows, edgeRows);
+
+  // Insert children first (so they exist for edge lookup)
+  batchInsertNodes(db, childRows);
+
+  collectChildEdges(db, allSymbols, edgeRows);
   batchInsertEdges(db, edgeRows);
 }
 
@@ -348,50 +418,14 @@ function updateFileHashes(
   // Iterate every collected file (#1068): files that produced zero symbols
   // (empty, parser no-op, or grammar-missing optional language) still need a
   // hash row, otherwise the next no-op rebuild's fast-skip pre-flight rejects.
-  const seen = new Set<string>();
-  for (const item of filesToParse) {
-    const relPath = item.relPath ?? normalizePath(path.relative(rootDir, item.file));
-    if (seen.has(relPath)) continue;
-    seen.add(relPath);
-
-    const precomputed = precomputedData.get(relPath);
-    if (precomputed?._reverseDepOnly) {
-      // no-op: file unchanged, hash already correct
-    } else if (precomputed?.hash) {
-      let mtime: number;
-      let size: number;
-      if (precomputed.stat) {
-        mtime = precomputed.stat.mtime;
-        size = precomputed.stat.size;
-      } else {
-        const rawStat = fileStat(path.join(rootDir, relPath));
-        mtime = rawStat ? rawStat.mtime : 0;
-        size = rawStat ? rawStat.size : 0;
-      }
-      upsertHash.run(relPath, precomputed.hash, mtime, size);
-    } else {
-      const absPath = path.join(rootDir, relPath);
-      let code: string | null;
-      try {
-        code = readFileSafe(absPath);
-      } catch (e) {
-        debug(`updateFileHashes: readFileSafe failed for ${relPath}: ${toErrorMessage(e)}`);
-        code = null;
-      }
-      if (code !== null) {
-        const stat = fileStat(absPath);
-        const mtime = stat ? stat.mtime : 0;
-        const size = stat ? stat.size : 0;
-        upsertHash.run(relPath, fileHash(code), mtime, size);
-      }
-    }
-  }
-
-  // Also update metadata-only entries (self-heal mtime/size without re-parse)
-  for (const item of metadataUpdates) {
-    const mtime = item.stat ? item.stat.mtime : 0;
-    const size = item.stat ? item.stat.size : 0;
-    upsertHash.run(item.relPath, item.hash, mtime, size);
+  for (const record of iterFileHashRecords(
+    filesToParse,
+    precomputedData,
+    metadataUpdates,
+    rootDir,
+    'updateFileHashes',
+  )) {
+    upsertHash.run(record.file, record.hash, record.mtime, record.size);
   }
 }
 
diff --git a/src/domain/graph/builder/stages/native-db-lifecycle.ts b/src/domain/graph/builder/stages/native-db-lifecycle.ts
new file mode 100644
index 000000000..ac9e2568f
--- /dev/null
+++ b/src/domain/graph/builder/stages/native-db-lifecycle.ts
@@ -0,0 +1,74 @@
+/**
+ * NativeDatabase connection lifecycle helpers.
+ *
+ * The Rust orchestrator and the JS pipeline stages both juggle the same
+ * `nativeDb` handle (rusqlite) alongside `ctx.db` (better-sqlite3). These
+ * helpers centralise the open/close/reopen sequence so both call sites
+ * preserve the same WAL-safety invariants:
+ *
+ *   - Always checkpoint WAL before closing rusqlite — otherwise better-sqlite3's
+ *     internal WAL index can drift and surface as SQLITE_CORRUPT on the next
+ *     read (#715, #736).
+ *   - Always reopen better-sqlite3 after rusqlite writes to drop the stale
+ *     page cache.
+ *
+ * Lives in its own module so `tryNativeOrchestrator` (in `native-orchestrator.ts`)
+ * and the JS pipeline stages driver (in `pipeline.ts`) can share the helpers
+ * without either file importing the other.
+ */
+import { openDb } from '../../../../db/index.js';
+import { debug } from '../../../../infrastructure/logger.js';
+import { loadNative } from '../../../../infrastructure/native.js';
+import { toErrorMessage } from '../../../../shared/errors.js';
+import type { PipelineContext } from '../context.js';
+
+/** Checkpoint WAL through rusqlite and close the native connection. */
+export function closeNativeDb(ctx: PipelineContext, label: string): void {
+  if (!ctx.nativeDb) return;
+  try {
+    ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
+  } catch (e) {
+    debug(`${label} WAL checkpoint failed: ${toErrorMessage(e)}`);
+  }
+  try {
+    ctx.nativeDb.close();
+  } catch (e) {
+    debug(`${label} nativeDb close failed: ${toErrorMessage(e)}`);
+  }
+  ctx.nativeDb = undefined;
+}
+
+/** Try to reopen the native connection for a given pipeline phase. */
+export function reopenNativeDb(ctx: PipelineContext, label: string): void {
+  if ((ctx.opts.engine ?? 'auto') === 'wasm') return;
+  const native = loadNative();
+  if (!native?.NativeDatabase) return;
+  try {
+    ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
+  } catch (e) {
+    debug(`reopen nativeDb for ${label} failed: ${toErrorMessage(e)}`);
+    ctx.nativeDb = undefined;
+  }
+}
+
+/** Close nativeDb and clear stale references in engineOpts. */
+export function suspendNativeDb(ctx: PipelineContext, label: string): void {
+  closeNativeDb(ctx, label);
+  if (ctx.engineOpts?.nativeDb) {
+    ctx.engineOpts.nativeDb = undefined;
+  }
+}
+
+/**
+ * After native writes, reopen the JS db connection to get a fresh page cache.
+ * Rusqlite WAL truncation invalidates better-sqlite3's internal WAL index,
+ * causing SQLITE_CORRUPT on the next read (#715, #736).
+ */
+export function refreshJsDb(ctx: PipelineContext): void {
+  try {
+    ctx.db.close();
+  } catch (e) {
+    debug(`refreshJsDb close failed: ${toErrorMessage(e)}`);
+  }
+  ctx.db = openDb(ctx.dbPath);
+}
diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts
new file mode 100644
index 000000000..934dd8d05
--- /dev/null
+++ b/src/domain/graph/builder/stages/native-orchestrator.ts
@@ -0,0 +1,942 @@
+/**
+ * Native build orchestrator stage — runs the full Rust pipeline when available,
+ * with WASM fallback for files the native engine drops.
+ *
+ * Extracted from `pipeline.ts` to break the name-collision cycle between
+ * `buildGraph()` (this module's caller) and `ctx.nativeDb.buildGraph()` (the
+ * Rust orchestrator entry point invoked here). Codegraph's name-based call
+ * resolver previously conflated the two and reported a false-positive
+ * function-level cycle (`buildGraph ↔ tryNativeOrchestrator`).
+ *
+ * The orchestrator-selection strategy lives here so `pipeline.ts` stays a thin
+ * top-level controller: detect changes, try native, fall back to JS stages.
+ */
+import path from 'node:path';
+import { performance } from 'node:perf_hooks';
+import {
+  acquireAdvisoryLock,
+  closeDbPair,
+  openDb,
+  purgeFilesData,
+  releaseAdvisoryLock,
+  setBuildMeta,
+} from '../../../../db/index.js';
+import { debug, info, warn } from '../../../../infrastructure/logger.js';
+import { loadNative } from '../../../../infrastructure/native.js';
+import { semverCompare } from '../../../../infrastructure/update-check.js';
+import { normalizePath } from '../../../../shared/constants.js';
+import { toErrorMessage } from '../../../../shared/errors.js';
+import { CODEGRAPH_VERSION } from '../../../../shared/version.js';
+import type {
+  BetterSqlite3Database,
+  BuildResult,
+  Definition,
+  ExtractorOutput,
+  SqliteStatement,
+} from '../../../../types.js';
+import {
+  classifyNativeDrops,
+  formatDropExtensionSummary,
+  getInstalledWasmExtensions,
+  NATIVE_SUPPORTED_EXTENSIONS,
+  parseFilesWasmForBackfill,
+} from '../../../parser.js';
+import type { PipelineContext } from '../context.js';
+import {
+  batchInsertNodes,
+  collectFiles as collectFilesUtil,
+  fileHash,
+  fileStat,
+  readFileSafe,
+} from '../helpers.js';
+import { NativeDbProxy } from '../native-db-proxy.js';
+import { closeNativeDb } from './native-db-lifecycle.js';
+
+// ── Native orchestrator types ──────────────────────────────────────────
+
+interface NativeOrchestratorResult {
+  phases: Record<string, number>;
+  earlyExit?: boolean;
+  nodeCount?: number;
+  edgeCount?: number;
+  fileCount?: number;
+  changedFiles?: string[];
+  changedCount?: number;
+  removedCount?: number;
+  isFullBuild?: boolean;
+  /** Whether the Rust pipeline handled the structure phase (small-incremental fast path). */
+  structureHandled?: boolean;
+  /** Whether the Rust pipeline wrote AST/complexity/CFG/dataflow to DB. */
+  analysisComplete?: boolean;
+}
+
+/** Files the native orchestrator silently dropped — the working set for backfill. */
+interface DroppedLanguageGap {
+  /** Relative paths (normalized) of files missing from `nodes` or `file_hashes`. */
+  missingRel: string[];
+  /** Absolute paths, aligned by index with `missingRel`. */
+  missingAbs: string[];
+  /**
+   * Relative paths of WASM-only files present in DB but absent from disk (#1073).
+   * Rust's `detect_removed_files` filter (#1070) skips these, so the JS-side
+   * backfill must purge them. Always disjoint from `missingRel`.
+   */
+  staleRel: string[];
+}
+
+/**
+ * Inputs to {@link computeWasmOnlyStaleFiles}. Sets are passed in so the helper
+ * is pure and unit-testable independently of `getInstalledWasmExtensions` and
+ * the `NATIVE_SUPPORTED_EXTENSIONS` global state.
+ */
+export interface WasmOnlyStaleFilesInput {
+  /** Distinct `file` values from the `nodes` table. */
+  existingNodes: ReadonlySet<string>;
+  /** Distinct `file` values from the `file_hashes` table. */
+  existingHashes: ReadonlySet<string>;
+  /** Relative paths currently on disk (from `collectFilesUtil`). */
+  expected: ReadonlySet<string>;
+  /** Lowercased extensions whose WASM grammar is installed. */
+  installedExts: ReadonlySet<string>;
+  /** Extensions covered by the Rust addon — Rust owns deletion for these. */
+  nativeSupported: ReadonlySet<string>;
+}
+
+// ── Native orchestrator helpers ───────────────────────────────────────
+
+/** Determine whether the native orchestrator should be skipped. Returns a reason string, or null if it should run. */
+function shouldSkipNativeOrchestrator(ctx: PipelineContext): string | null {
+  if (ctx.forceFullRebuild) return 'forceFullRebuild';
+  // v3.9.0 addon had buggy incremental purge (wrong SQL on analysis tables,
+  // scoped removal over-detection). Fixed in v3.9.1 by PR #865. Gate on
+  // < 3.9.1 so v3.9.1+ uses the fast Rust orchestrator path.
+  const orchestratorBuggy = !!ctx.engineVersion && semverCompare(ctx.engineVersion, '3.9.1') < 0;
+  if (orchestratorBuggy) return `buggy addon ${ctx.engineVersion}`;
+  if (ctx.engineName !== 'native') return `engine=${ctx.engineName}`;
+  return null;
+}
+
+/** Checkpoint WAL through rusqlite, close nativeDb, and reopen better-sqlite3.
+ *  Returns false if the DB reopen fails (caller should return partial result). */
+function handoffWalAfterNativeBuild(ctx: PipelineContext): boolean {
+  closeNativeDb(ctx, 'post-native-build');
+  try {
+    ctx.db.close();
+  } catch (e) {
+    debug(`handoffWal JS db close failed: ${toErrorMessage(e)}`);
+  }
+  try {
+    ctx.db = openDb(ctx.dbPath);
+    return true;
+  } catch (reopenErr) {
+    warn(`Failed to reopen DB after native build: ${(reopenErr as Error).message}`);
+    return false;
+  }
+}
+
+/**
+ * Reconstruct fileSymbols from the DB after a native orchestrator build.
+ * When `scopeFiles` is provided, only loads those files (for analysis-only).
+ * When omitted, loads all files (needed for structure rebuilds).
+ */
+function reconstructFileSymbolsFromDb(
+  ctx: PipelineContext,
+  scopeFiles?: string[],
+): Map<string, ExtractorOutput> {
+  let query =
+    'SELECT file, name, kind, line, end_line as endLine FROM nodes WHERE file IS NOT NULL';
+  const params: string[] = [];
+  if (scopeFiles && scopeFiles.length > 0) {
+    const placeholders = scopeFiles.map(() => '?').join(',');
+    query += ` AND file IN (${placeholders})`;
+    params.push(...scopeFiles);
+  }
+  query += ' ORDER BY file, line';
+
+  const rows = ctx.db.prepare(query).all(...params) as {
+    file: string;
+    name: string;
+    kind: string;
+    line: number;
+    endLine: number | null;
+  }[];
+
+  const fileSymbols = new Map<string, ExtractorOutput>();
+  for (const row of rows) {
+    let entry = fileSymbols.get(row.file);
+    if (!entry) {
+      entry = {
+        definitions: [],
+        calls: [],
+        imports: [],
+        classes: [],
+        exports: [],
+        typeMap: new Map(),
+      };
+      fileSymbols.set(row.file, entry);
+    }
+    entry.definitions.push({
+      name: row.name,
+      kind: row.kind as Definition['kind'],
+      line: row.line,
+      endLine: row.endLine ?? undefined,
+    });
+  }
+
+  // Populate import/export counts from DB edges so buildStructure
+  // computes correct import_count/export_count in node_metrics.
+  // The extractor arrays aren't persisted to the DB, so we derive
+  // counts from edge data instead (#804).
+  const importCountRows = ctx.db
+    .prepare(
+      `SELECT n.file, COUNT(*) AS cnt
+       FROM edges e JOIN nodes n ON e.source_id = n.id
+       WHERE e.kind IN ('imports', 'imports-type', 'dynamic-imports')
+         AND n.file IS NOT NULL
+       GROUP BY n.file`,
+    )
+    .all() as { file: string; cnt: number }[];
+  for (const row of importCountRows) {
+    const entry = fileSymbols.get(row.file);
+    if (entry) entry.imports = new Array(row.cnt) as ExtractorOutput['imports'];
+  }
+
+  const exportCountRows = ctx.db
+    .prepare(
+      `SELECT n_tgt.file, COUNT(DISTINCT n_tgt.id) AS cnt
+       FROM edges e
+       JOIN nodes n_tgt ON e.target_id = n_tgt.id
+       JOIN nodes n_src ON e.source_id = n_src.id
+       WHERE e.kind IN ('imports', 'imports-type', 'reexports')
+         AND n_tgt.file IS NOT NULL
+         AND n_src.file != n_tgt.file
+       GROUP BY n_tgt.file`,
+    )
+    .all() as { file: string; cnt: number }[];
+  for (const row of exportCountRows) {
+    const entry = fileSymbols.get(row.file);
+    if (entry) entry.exports = new Array(row.cnt) as ExtractorOutput['exports'];
+  }
+
+  return fileSymbols;
+}
+
+/**
+ * Run JS buildStructure() after native orchestrator to fill directory nodes + contains edges.
+ * For full builds, passes changedFiles=null (full rebuild).
+ * For incremental builds, passes the changed file list to scope the update.
+ */
+async function runPostNativeStructure(
+  ctx: PipelineContext,
+  allFileSymbols: Map<string, ExtractorOutput>,
+  isFullBuild: boolean,
+  changedFiles: string[] | undefined,
+): Promise<number> {
+  const structureStart = performance.now();
+  try {
+    const directories = new Set<string>();
+    for (const relPath of allFileSymbols.keys()) {
+      const parts = relPath.split('/');
+      for (let i = 1; i < parts.length; i++) {
+        directories.add(parts.slice(0, i).join('/'));
+      }
+    }
+
+    const lineCountMap = new Map<string, number>();
+    const cachedLineCounts = ctx.db
+      .prepare(
+        `SELECT n.name AS file, m.line_count
+         FROM node_metrics m JOIN nodes n ON m.node_id = n.id
+         WHERE n.kind = 'file'`,
+      )
+      .all() as Array<{ file: string; line_count: number }>;
+    for (const row of cachedLineCounts) {
+      lineCountMap.set(row.file, row.line_count);
+    }
+
+    // Full builds need null (rebuild everything). Incremental builds pass the
+    // changed file list so buildStructure only updates those files' metrics
+    // and contains edges — matching the JS pipeline's medium-incremental path.
+    const changedFilePaths = isFullBuild || !changedFiles?.length ? null : changedFiles;
+    const { buildStructure: buildStructureFn } = (await import(
+      '../../../../features/structure.js'
+    )) as {
+      buildStructure: (
+        db: typeof ctx.db,
+        fileSymbols: Map<string, ExtractorOutput>,
+        rootDir: string,
+        lineCountMap: Map<string, number>,
+        directories: Set<string>,
+        changedFiles: string[] | null,
+      ) => void;
+    };
+    buildStructureFn(
+      ctx.db,
+      allFileSymbols,
+      ctx.rootDir,
+      lineCountMap,
+      directories,
+      changedFilePaths,
+    );
+    debug(
+      `Structure phase completed after native orchestrator${changedFilePaths ? ` (${changedFilePaths.length} files)` : ' (full)'}`,
+    );
+  } catch (err) {
+    warn(`Structure phase failed after native build: ${toErrorMessage(err)}`);
+  }
+  return performance.now() - structureStart;
+}
+
+/**
+ * JS fallback for AST/complexity/CFG/dataflow analysis after native orchestrator.
+ * Used when the Rust addon doesn't include analysis persistence (older addon
+ * version) or when analysis failed on the Rust side.
+ */
+async function runPostNativeAnalysis(
+  ctx: PipelineContext,
+  allFileSymbols: Map<string, ExtractorOutput>,
+  changedFiles: string[] | undefined,
+): Promise<{ astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number }> {
+  const timing = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 };
+
+  // Scope analysis fileSymbols to changed files only
+  let analysisFileSymbols: Map<string, ExtractorOutput>;
+  if (changedFiles && changedFiles.length > 0) {
+    analysisFileSymbols = new Map();
+    for (const f of changedFiles) {
+      const entry = allFileSymbols.get(f);
+      if (entry) analysisFileSymbols.set(f, entry);
+    }
+  } else {
+    analysisFileSymbols = allFileSymbols;
+  }
+
+  // Reopen nativeDb for analysis features (suspend/resume WAL pattern).
+  const native = loadNative();
+  if (native?.NativeDatabase) {
+    try {
+      ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
+      if (ctx.engineOpts) ctx.engineOpts.nativeDb = ctx.nativeDb;
+    } catch {
+      ctx.nativeDb = undefined;
+      if (ctx.engineOpts) ctx.engineOpts.nativeDb = undefined;
+    }
+  }
+
+  // Flush JS WAL pages once so Rust can see them, then no-op callbacks.
+  // Previously each feature called wal_checkpoint(TRUNCATE) individually
+  // (~68ms each × 3-4 features). One FULL checkpoint suffices.
+  if (ctx.nativeDb && ctx.engineOpts) {
+    ctx.db.pragma('wal_checkpoint(FULL)');
+    ctx.engineOpts.suspendJsDb = () => {};
+    ctx.engineOpts.resumeJsDb = () => {};
+  }
+
+  try {
+    const { runAnalyses: runAnalysesFn } = (await import('../../../../ast-analysis/engine.js')) as {
+      runAnalyses: (
+        db: BetterSqlite3Database,
+        fileSymbols: Map<string, ExtractorOutput>,
+        rootDir: string,
+        opts: Record<string, unknown>,
+        engineOpts?: Record<string, unknown>,
+      ) => Promise<{ astMs?: number; complexityMs?: number; cfgMs?: number; dataflowMs?: number }>;
+    };
+    const result = await runAnalysesFn(
+      ctx.db,
+      analysisFileSymbols,
+      ctx.rootDir,
+      ctx.opts as Record<string, unknown>,
+      ctx.engineOpts as unknown as Record<string, unknown> | undefined,
+    );
+    timing.astMs = result.astMs ?? 0;
+    timing.complexityMs = result.complexityMs ?? 0;
+    timing.cfgMs = result.cfgMs ?? 0;
+    timing.dataflowMs = result.dataflowMs ?? 0;
+  } catch (err) {
+    warn(`Analysis phases failed after native build: ${toErrorMessage(err)}`);
+  }
+
+  // Close nativeDb after analyses — TRUNCATE checkpoint flushes all Rust
+  // WAL writes so JS and external readers can see them. Runs once after
+  // all analysis features complete (not per-feature).
+  if (ctx.nativeDb) {
+    try {
+      ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
+    } catch {
+      /* ignore checkpoint errors */
+    }
+    try {
+      ctx.nativeDb.close();
+    } catch {
+      /* ignore close errors */
+    }
+    ctx.nativeDb = undefined;
+    if (ctx.engineOpts) {
+      ctx.engineOpts.nativeDb = undefined;
+      ctx.engineOpts.suspendJsDb = undefined;
+      ctx.engineOpts.resumeJsDb = undefined;
+    }
+  }
+
+  return timing;
+}
+
+/** Format timing result from native orchestrator phases + JS post-processing. */
+function formatNativeTimingResult(
+  p: Record<string, number>,
+  structurePatchMs: number,
+  analysisTiming: { astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number },
+): BuildResult {
+  return {
+    phases: {
+      setupMs: +(p.setupMs ?? 0).toFixed(1),
+      collectMs: +(p.collectMs ?? 0).toFixed(1),
+      detectMs: +(p.detectMs ?? 0).toFixed(1),
+      parseMs: +(p.parseMs ?? 0).toFixed(1),
+      insertMs: +(p.insertMs ?? 0).toFixed(1),
+      resolveMs: +(p.resolveMs ?? 0).toFixed(1),
+      edgesMs: +(p.edgesMs ?? 0).toFixed(1),
+      structureMs: +((p.structureMs ?? 0) + structurePatchMs).toFixed(1),
+      rolesMs: +(p.rolesMs ?? 0).toFixed(1),
+      astMs: +(analysisTiming.astMs ?? 0).toFixed(1),
+      complexityMs: +(analysisTiming.complexityMs ?? 0).toFixed(1),
+      cfgMs: +(analysisTiming.cfgMs ?? 0).toFixed(1),
+      dataflowMs: +(analysisTiming.dataflowMs ?? 0).toFixed(1),
+      finalizeMs: +(p.finalizeMs ?? 0).toFixed(1),
+    },
+  };
+}
+
+/**
+ * Compute the WASM-only files present in the DB but missing from disk (#1073).
+ *
+ * Returns relative paths that:
+ *   - appear in `existingNodes` or `existingHashes` (in DB),
+ *   - are absent from `expected` (not on disk),
+ *   - have an extension installed for WASM, AND
+ *   - have an extension NOT covered by `nativeSupported` — Rust's
+ *     `purge_changed_files` handles deletion for natively-supported extensions
+ *     via its own `detect_removed_files`, so the caller must not double-purge.
+ *
+ * Extensions are lowercased before lookup to match the registry and Rust's
+ * `LanguageKind::from_extension` (which normalises case for the languages
+ * where both cases are conventional, e.g. R's `.r` / `.R`).
+ *
+ * DB paths are forced to forward slashes before comparison with `expected`
+ * (which is always normalised). The on-disk invariant is that DB rows are
+ * written with forward slashes, but a stale row written by older code on
+ * Windows could carry back-slashes — normalising here makes the comparison
+ * platform-safe and prevents false-positive purges of live rows. We replace
+ * `\\` explicitly (rather than calling `normalizePath`, which only touches
+ * `path.sep`) so the defence works when running on POSIX against a DB that
+ * was migrated from Windows.
+ *
+ * Exported for unit testing.
+ */
+export function computeWasmOnlyStaleFiles(input: WasmOnlyStaleFilesInput): string[] {
+  const { existingNodes, existingHashes, expected, installedExts, nativeSupported } = input;
+  const stale: string[] = [];
+  const seen = new Set<string>();
+  const consider = (rawRel: string): void => {
+    const rel = rawRel.replace(/\\/g, '/');
+    if (expected.has(rel) || seen.has(rel)) return;
+    const ext = path.extname(rel).toLowerCase();
+    if (nativeSupported.has(ext)) return;
+    if (!installedExts.has(ext)) return;
+    seen.add(rel);
+    // Push the ORIGINAL raw path (not the normalised form) so the eventual
+    // `DELETE FROM nodes WHERE file = ?` predicate in `purgeFilesData`
+    // matches the actual stored row. The dedup `seen` set keeps the
+    // normalised form so a file written once with `\` and once with `/`
+    // is still treated as one entry — but the value the SQL sees has to
+    // be byte-identical to what's on disk in the DB.
+    stale.push(rawRel);
+  };
+  for (const rel of existingNodes) consider(rel);
+  for (const rel of existingHashes) consider(rel);
+  return stale;
+}
+
+/**
+ * Group relative paths by their lowercased extension. Shape matches the bucket
+ * type that `formatDropExtensionSummary` consumes, so callers can render a
+ * log-friendly per-extension summary without going through `classifyNativeDrops`
+ * when the reason is already known (e.g. the stale-purge path where every path
+ * is guaranteed `unsupported-by-native`).
+ */
+function groupByExtension(relPaths: Iterable<string>): Map<string, string[]> {
+  const buckets = new Map<string, string[]>();
+  for (const rel of relPaths) {
+    const ext = path.extname(rel).toLowerCase();
+    let list = buckets.get(ext);
+    if (!list) {
+      list = [];
+      buckets.set(ext, list);
+    }
+    list.push(rel);
+  }
+  return buckets;
+}
+
+/**
+ * Detect files the native orchestrator silently dropped.
+ *
+ * Walks the filesystem and compares against `nodes` + `file_hashes`. A file
+ * is "missing" if it's absent from EITHER table — both must be present for
+ * the fast-skip pre-flight (#1054) to work, and the two can diverge (e.g.
+ * legacy DBs where `nodes` was populated but `file_hashes` was not).
+ *
+ * Restricted to files with an installed WASM grammar; extensions in
+ * `LANGUAGE_REGISTRY` without a shipped grammar (e.g. groovy on minimal
+ * installs) can't be parsed by either engine, so they're not a native
+ * regression — excluding them keeps the warn count in
+ * `backfillNativeDroppedFiles` meaningful.
+ *
+ * Also detects WASM-only files deleted from disk (#1073). Rust's
+ * `detect_removed_files` filter (#1070) skips files outside its supported
+ * extensions, so deletions of WASM-only languages don't reach the native
+ * purge path; the rest of the backfill only inserts rows, so without this
+ * step stale `nodes`/`file_hashes` rows would linger across incremental
+ * rebuilds until the next full rebuild.
+ *
+ * Cheap (no DB handoff, no parsing): used both to gate the backfill call
+ * and as its working set. NativeDbProxy supports `.prepare().all()`, so
+ * this works whether `ctx.db` is a proxy or a real better-sqlite3
+ * connection — letting us skip the close-native / reopen-better-sqlite3
+ * cost when there's nothing to backfill.
+ */
+function detectDroppedLanguageGap(ctx: PipelineContext): DroppedLanguageGap {
+  const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set<string>());
+  const expected = new Set(
+    collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))),
+  );
+
+  const existingNodeRows = ctx.db
+    .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
+    .all() as Array<{ file: string }>;
+  const existingNodes = new Set(existingNodeRows.map((r) => r.file));
+
+  let existingHashes = new Set<string>();
+  try {
+    const existingHashRows = ctx.db
+      .prepare('SELECT DISTINCT file FROM file_hashes')
+      .all() as Array<{ file: string }>;
+    existingHashes = new Set(existingHashRows.map((r) => r.file));
+  } catch (e) {
+    // file_hashes table may not exist on legacy DBs; treat as fully missing
+    // so the backfill writes rows on the upsert path below.
+    debug(
+      `detectDroppedLanguageGap: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`,
+    );
+  }
+
+  const installedExts = getInstalledWasmExtensions();
+  const missingRel: string[] = [];
+  const missingAbs: string[] = [];
+  for (const rel of expected) {
+    if (existingNodes.has(rel) && existingHashes.has(rel)) continue;
+    const ext = path.extname(rel).toLowerCase();
+    if (!installedExts.has(ext)) continue;
+    missingRel.push(rel);
+    missingAbs.push(path.join(ctx.rootDir, rel));
+  }
+
+  const staleRel = computeWasmOnlyStaleFiles({
+    existingNodes,
+    existingHashes,
+    expected,
+    installedExts,
+    nativeSupported: NATIVE_SUPPORTED_EXTENSIONS,
+  });
+
+  return { missingRel, missingAbs, staleRel };
+}
+
+/**
+ * Backfill files that the native orchestrator silently dropped during parse.
+ * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
+ *
+ * Also purges stale rows for WASM-only files deleted from disk (#1073), which
+ * Rust's `detect_removed_files` filter (#1070) skips.
+ *
+ * Accepts a pre-computed `gap` from `detectDroppedLanguageGap` so the caller
+ * can use the same scan for both gating and the actual backfill — avoiding
+ * a redundant fs walk when the orchestrator's signals already triggered.
+ */
+async function backfillNativeDroppedFiles(
+  ctx: PipelineContext,
+  gap: DroppedLanguageGap,
+): Promise<void> {
+  const { missingRel, missingAbs, staleRel } = gap;
+  if (missingAbs.length === 0 && staleRel.length === 0) return;
+
+  // Now that we know there's work to do, hand off to better-sqlite3 (needed
+  // for the INSERT path below).
+  if (ctx.nativeFirstProxy) {
+    closeNativeDb(ctx, 'pre-parity-backfill');
+    ctx.db = openDb(ctx.dbPath);
+    ctx.nativeFirstProxy = false;
+  }
+
+  const dbConn = ctx.db as unknown as BetterSqlite3Database;
+
+  // Purge WASM-only files that were deleted from disk (#1073). Rust's
+  // detect_removed_files skips them and the insert path below never visits
+  // them, so without this their rows would persist across rebuilds until the
+  // next full rebuild reset the DB.
+  if (staleRel.length > 0) {
+    // `computeWasmOnlyStaleFiles` guarantees every path here has an extension
+    // outside NATIVE_SUPPORTED_EXTENSIONS, so `classifyNativeDrops` would
+    // always bucket 100% into `unsupported-by-native`. Build the extension
+    // summary directly to avoid a redundant classification pass.
+    const staleByExt = groupByExtension(staleRel);
+    info(
+      `Detected ${staleRel.length} deleted WASM-only file(s) across ${staleByExt.size} extension(s) the native orchestrator skipped; purging stale rows:${formatDropExtensionSummary(staleByExt)}`,
+    );
+    purgeFilesData(dbConn, staleRel);
+  }
+
+  if (missingAbs.length === 0) return;
+
+  // Classify drops so users see per-extension reasons instead of just a count
+  // (#1011). `unsupported-by-native` is a legitimate parser limit (no Rust
+  // extractor); `native-extractor-failure` indicates a real native bug since
+  // the language IS supported by the addon yet the file was dropped anyway.
+  const { byReason, totals } = classifyNativeDrops(missingRel);
+  if (totals['unsupported-by-native'] > 0) {
+    const buckets = byReason['unsupported-by-native'];
+    info(
+      `Native orchestrator skipped ${totals['unsupported-by-native']} file(s) across ${buckets.size} extension(s) in languages without a Rust extractor; backfilling via WASM:${formatDropExtensionSummary(buckets)}`,
+    );
+  }
+  if (totals['native-extractor-failure'] > 0) {
+    const buckets = byReason['native-extractor-failure'];
+    warn(
+      `Native orchestrator dropped ${totals['native-extractor-failure']} file(s) across ${buckets.size} extension(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM:${formatDropExtensionSummary(buckets)}`,
+    );
+  }
+  const wasmResults = await parseFilesWasmForBackfill(missingAbs, ctx.rootDir);
+
+  const rows: unknown[][] = [];
+  const exportKeys: unknown[][] = [];
+  for (const [relPath, symbols] of wasmResults) {
+    // File row — mirrors insertDefinitionsAndExports: qualified_name is null.
+    rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]);
+    for (const def of symbols.definitions ?? []) {
+      // Populate qualified_name/scope the same way the JS fallback does so
+      // downstream queries (cross-file references, "go to definition") find
+      // these symbols.
+      const dotIdx = def.name.lastIndexOf('.');
+      const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null;
+      rows.push([
+        def.name,
+        def.kind,
+        relPath,
+        def.line,
+        def.endLine ?? null,
+        null,
+        def.name,
+        scope,
+        def.visibility ?? null,
+      ]);
+    }
+    // Exports: insert the row (INSERT OR IGNORE — a matching definition row
+    // is a no-op) and queue a key for the second-pass exported=1 update, so
+    // queries filtering on exported=1 find backfilled symbols (#970).
+    for (const exp of symbols.exports ?? []) {
+      rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]);
+      exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
+    }
+  }
+  const db = dbConn;
+  batchInsertNodes(db, rows);
+
+  // Mark exported symbols in batches — mirrors insertDefinitionsAndExports.
+  if (exportKeys.length > 0) {
+    const EXPORT_CHUNK = 500;
+    const exportStmtCache = new Map<number, SqliteStatement>();
+    for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) {
+      const end = Math.min(i + EXPORT_CHUNK, exportKeys.length);
+      const chunkSize = end - i;
+      let updateStmt = exportStmtCache.get(chunkSize);
+      if (!updateStmt) {
+        const conditions = Array.from(
+          { length: chunkSize },
+          () => '(name = ? AND kind = ? AND file = ? AND line = ?)',
+        ).join(' OR ');
+        updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`);
+        exportStmtCache.set(chunkSize, updateStmt);
+      }
+      const vals: unknown[] = [];
+      for (let j = i; j < end; j++) {
+        const k = exportKeys[j] as unknown[];
+        vals.push(k[0], k[1], k[2], k[3]);
+      }
+      updateStmt.run(...vals);
+    }
+  }
+
+  // Persist file_hashes rows for every backfilled file. The Rust orchestrator
+  // only hashes files it parsed itself, so without this step files in
+  // optional-language extensions (e.g. .clj when no Rust extractor exists)
+  // would be missing from `file_hashes` — permanently breaking the JS-side
+  // fast-skip pre-flight (#1054), which rejects on `collected file missing
+  // from file_hashes` and forces every no-op rebuild back through the full
+  // ~2s native pipeline (#1068).
+  //
+  // Iterates `missingRel` (every collected file the Rust orchestrator
+  // dropped), not `wasmResults`, so files that produced zero symbols still
+  // get a row.
+  try {
+    const upsertHash = db.prepare(
+      'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)',
+    );
+    const writeHashes = db.transaction(() => {
+      for (let i = 0; i < missingRel.length; i++) {
+        const relPath = missingRel[i];
+        const absPath = missingAbs[i];
+        if (!relPath || !absPath) continue;
+        let code: string | null;
+        try {
+          code = readFileSafe(absPath);
+        } catch (e) {
+          debug(`backfillNativeDroppedFiles: read failed for ${relPath}: ${toErrorMessage(e)}`);
+          continue;
+        }
+        if (code === null) continue;
+        const stat = fileStat(absPath);
+        const mtime = stat ? stat.mtime : 0;
+        const size = stat ? stat.size : 0;
+        upsertHash.run(relPath, fileHash(code), mtime, size);
+      }
+    });
+    writeHashes();
+  } catch (e) {
+    debug(
+      `backfillNativeDroppedFiles: file_hashes write failed (table may not exist): ${toErrorMessage(e)}`,
+    );
+  }
+
+  // Free WASM parse trees from the inline backfill path (#1058).
+  // `parseFilesWasmInline` sets `symbols._tree` (a live web-tree-sitter Tree
+  // backed by WASM linear memory) on every result, but these symbols are
+  // consumed locally for DB row construction and never added to
+  // `ctx.allSymbols`, so the finalize-stage `releaseWasmTrees` sweep never
+  // sees them. Without this, trees leak WASM memory until process exit —
+  // bounded per run but cumulative across in-process integration tests.
+  // Mirrors the cleanup discipline established for #931.
+  for (const [, symbols] of wasmResults) {
+    const tree = (symbols as { _tree?: { delete?: () => void } })._tree;
+    if (tree && typeof tree.delete === 'function') {
+      try {
+        tree.delete();
+      } catch {
+        /* ignore cleanup errors */
+      }
+    }
+    (symbols as { _tree?: unknown; _langId?: unknown })._tree = undefined;
+    (symbols as { _tree?: unknown; _langId?: unknown })._langId = undefined;
+  }
+}
+
+/**
+ * Try the native build orchestrator.
+ *
+ * Returns:
+ *   - `BuildResult` on success (caller should return it directly).
+ *   - `'early-exit'` when the orchestrator detected no changes (caller should return undefined).
+ *   - `undefined` when native is unavailable or skipped (caller should fall through to the JS pipeline).
+ *
+ * Encapsulates the orchestrator-selection strategy: open `NativeDatabase`,
+ * invoke `nativeDb.buildGraph()` (the Rust pipeline), and run post-native
+ * structure + analysis fallbacks. Lives in its own file to keep the Rust
+ * orchestrator entry point separated from the JS-side `buildGraph()` driver
+ * in `pipeline.ts`.
+ */
+export async function tryNativeOrchestrator(
+  ctx: PipelineContext,
+): Promise<BuildResult | undefined | 'early-exit'> {
+  const skipReason = shouldSkipNativeOrchestrator(ctx);
+  if (skipReason) {
+    debug(`Skipping native orchestrator: ${skipReason}`);
+    return undefined;
+  }
+
+  // Open NativeDatabase on demand — deferred from setupPipeline to skip the
+  // ~60ms cost on no-op/early-exit builds. Close the better-sqlite3 connection
+  // first to avoid dual-connection WAL corruption.
+  if (!ctx.nativeDb && ctx.nativeAvailable) {
+    const native = loadNative();
+    if (native?.NativeDatabase) {
+      try {
+        // Close better-sqlite3 before opening rusqlite to avoid WAL conflicts.
+        // Uses raw close() instead of closeDb() intentionally — the advisory lock
+        // is kept and transferred to the NativeDbProxy below, not released here.
+        ctx.db.close();
+        acquireAdvisoryLock(ctx.dbPath);
+        ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
+        ctx.nativeDb.initSchema();
+        // Replace ctx.db with a NativeDbProxy so post-native JS fallback
+        // (structure, analysis) can use it without reopening better-sqlite3.
+        const proxy = new NativeDbProxy(ctx.nativeDb);
+        proxy.__lockPath = `${ctx.dbPath}.lock`;
+        ctx.db = proxy as unknown as typeof ctx.db;
+        ctx.nativeFirstProxy = true;
+      } catch (err) {
+        warn(`NativeDatabase setup failed, falling back to JS: ${toErrorMessage(err)}`);
+        try {
+          ctx.nativeDb?.close();
+        } catch (e) {
+          debug(`tryNativeOrchestrator: close failed during fallback: ${toErrorMessage(e)}`);
+        }
+        ctx.nativeDb = undefined;
+        ctx.nativeFirstProxy = false; // defensive: reset in case future refactors move the assignment above throwing lines
+        releaseAdvisoryLock(`${ctx.dbPath}.lock`);
+        // Reopen better-sqlite3 for JS pipeline fallback
+        ctx.db = openDb(ctx.dbPath);
+      }
+    }
+  }
+
+  if (!ctx.nativeDb?.buildGraph) return undefined;
+
+  const resultJson = ctx.nativeDb.buildGraph(
+    ctx.rootDir,
+    JSON.stringify(ctx.config),
+    JSON.stringify(ctx.aliases),
+    JSON.stringify(ctx.opts),
+  );
+  const result = JSON.parse(resultJson) as NativeOrchestratorResult;
+
+  if (result.earlyExit) {
+    info('No changes detected');
+    // Even on no-op rebuilds, dropped-language files added since the last
+    // full build are still missing from `nodes`/`file_hashes` (#1083), and
+    // WASM-only files deleted from disk leave stale rows behind (#1073).
+    // The orchestrator's file_collector skipped them, so its earlyExit
+    // doesn't imply DB consistency. Run the gap repair before returning.
+    const gap = detectDroppedLanguageGap(ctx);
+    if (gap.missingAbs.length > 0 || gap.staleRel.length > 0) {
+      await backfillNativeDroppedFiles(ctx, gap);
+    }
+    closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
+    return 'early-exit';
+  }
+
+  // Log incremental status to match JS pipeline output
+  const changed = result.changedCount ?? 0;
+  const removed = result.removedCount ?? 0;
+  if (!result.isFullBuild && (changed > 0 || removed > 0)) {
+    info(`Incremental: ${changed} changed, ${removed} removed`);
+  }
+
+  const p = result.phases;
+
+  // Sync build_meta so JS-side version/engine checks work on next build.
+  // Use the binary's CARGO_PKG_VERSION (ctx.nativeBinaryVersion), not the
+  // platform package.json version (ctx.engineVersion). The Rust side's
+  // check_version_mismatch compares against CARGO_PKG_VERSION; writing
+  // the package.json value would create a permanent mismatch whenever
+  // the binary and platform package.json diverge — e.g., CI hot-swap
+  // via ci-install-native.mjs (#1066) — forcing every subsequent build
+  // to be a full rebuild.
+  //
+  // When the native addon doesn't expose engineVersion() (older addon),
+  // fall back to CODEGRAPH_VERSION — same fallback used by both
+  // checkEngineSchemaMismatch (read path) and persistBuildMetadata
+  // (the JS-pipeline write path in finalize.ts). Using ctx.engineVersion
+  // here would re-introduce the asymmetry this PR fixes for that case.
+  const nativeVersionForMeta = ctx.nativeBinaryVersion || CODEGRAPH_VERSION;
+  setBuildMeta(ctx.db, {
+    engine: ctx.engineName,
+    engine_version: nativeVersionForMeta,
+    codegraph_version: nativeVersionForMeta,
+    schema_version: String(ctx.schemaVersion),
+    built_at: new Date().toISOString(),
+  });
+
+  info(
+    `Native build orchestrator completed: ${result.nodeCount ?? 0} nodes, ${result.edgeCount ?? 0} edges, ${result.fileCount ?? 0} files`,
+  );
+
+  // ── Post-native structure + analysis ──────────────────────────────
+  let analysisTiming = {
+    astMs: +(p.astMs ?? 0),
+    complexityMs: +(p.complexityMs ?? 0),
+    cfgMs: +(p.cfgMs ?? 0),
+    dataflowMs: +(p.dataflowMs ?? 0),
+  };
+  let structurePatchMs = 0;
+  // Skip JS structure when the Rust pipeline's small-incremental fast path
+  // already handled it. For full builds and large incrementals where Rust
+  // skipped structure, we must run the JS fallback.
+  const needsStructure = !result.structureHandled;
+  // When the Rust addon doesn't include analysis persistence (older addon
+  // version or analysis failed), fall back to JS-side analysis.
+  const needsAnalysisFallback =
+    !result.analysisComplete &&
+    (ctx.opts.ast !== false ||
+      ctx.opts.complexity !== false ||
+      ctx.opts.cfg !== false ||
+      ctx.opts.dataflow !== false);
+
+  if (needsStructure || needsAnalysisFallback) {
+    // When analysis fallback is needed, handoff to better-sqlite3 — the
+    // analysis engine uses the suspend/resume WAL pattern that requires a
+    // real better-sqlite3 connection, not the NativeDbProxy.
+    if (needsAnalysisFallback && ctx.nativeFirstProxy) {
+      closeNativeDb(ctx, 'pre-analysis-fallback');
+      ctx.db = openDb(ctx.dbPath);
+      ctx.nativeFirstProxy = false;
+    } else if (!ctx.nativeFirstProxy && !handoffWalAfterNativeBuild(ctx)) {
+      // DB reopen failed — return partial result
+      return formatNativeTimingResult(p, 0, analysisTiming);
+    }
+
+    const fileSymbols = reconstructFileSymbolsFromDb(ctx);
+
+    if (needsStructure) {
+      structurePatchMs = await runPostNativeStructure(
+        ctx,
+        fileSymbols,
+        !!result.isFullBuild,
+        result.changedFiles,
+      );
+    }
+
+    if (needsAnalysisFallback) {
+      analysisTiming = await runPostNativeAnalysis(ctx, fileSymbols, result.changedFiles);
+    }
+  }
+
+  // Engine parity: the native orchestrator silently drops files whose
+  // Rust extractor/grammar is missing or fails (e.g. HCL, Scala, Swift on
+  // stale native binaries). WASM handles those — backfill via WASM so both
+  // engines process the same file set (#967).
+  //
+  // Detect the gap once (fs walk + 2 DB queries, ~20–30ms) and use it for
+  // both gating and the backfill itself. On dirty incrementals/full builds
+  // the orchestrator signals trigger backfill, so the walk happens once
+  // (instead of redundantly inside backfill). On quiet incrementals we
+  // still pay the walk so we can detect brand-new files in dropped-language
+  // extensions — a gap that the orchestrator's `detect_removed_files`
+  // filter (#1070) leaves open (#1083, #1091). The pre-check is cheap
+  // because the expensive part (WASM re-parse of the missing set) is
+  // gated below.
+  const removedCount = result.removedCount ?? 0;
+  const changedCount = result.changedCount ?? 0;
+  const gap = detectDroppedLanguageGap(ctx);
+  if (
+    result.isFullBuild ||
+    removedCount > 0 ||
+    changedCount > 0 ||
+    gap.missingAbs.length > 0 ||
+    gap.staleRel.length > 0
+  ) {
+    await backfillNativeDroppedFiles(ctx, gap);
+  }
+
+  closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
+  return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
+}
diff --git a/src/domain/graph/cycles.ts b/src/domain/graph/cycles.ts
index 4ccc872f2..bb4d61168 100644
--- a/src/domain/graph/cycles.ts
+++ b/src/domain/graph/cycles.ts
@@ -3,6 +3,45 @@ import { loadNative } from '../../infrastructure/native.js';
 import { isTestFile } from '../../infrastructure/test-filter.js';
 import type { BetterSqlite3Database } from '../../types.js';
 
+type Edge = { source: string; target: string };
+type DbEdge = { source_id: number; target_id: number };
+
+/**
+ * Build a label-based edge list from DB rows, filtering to known nodes and
+ * deduplicating. Self-loops are skipped (Tarjan treats them as trivial SCCs).
+ */
+function buildLabelEdges(dbEdges: DbEdge[], idToLabel: Map<number, string>): Edge[] {
+  const edges: Edge[] = [];
+  const seen = new Set<string>();
+  for (const e of dbEdges) {
+    if (e.source_id === e.target_id) continue;
+    const src = idToLabel.get(e.source_id);
+    const tgt = idToLabel.get(e.target_id);
+    if (src === undefined || tgt === undefined) continue;
+    const key = `${src}\0${tgt}`;
+    if (seen.has(key)) continue;
+    seen.add(key);
+    edges.push({ source: src, target: tgt });
+  }
+  return edges;
+}
+
+function buildFileLevelEdges(db: BetterSqlite3Database, noTests: boolean): Edge[] {
+  let nodes = getFileNodesAll(db);
+  if (noTests) nodes = nodes.filter((n) => !isTestFile(n.file));
+  const idToLabel = new Map<number, string>();
+  for (const n of nodes) idToLabel.set(n.id, n.file);
+  return buildLabelEdges(getImportEdges(db), idToLabel);
+}
+
+function buildCallableEdges(db: BetterSqlite3Database, noTests: boolean): Edge[] {
+  let nodes = getCallableNodes(db);
+  if (noTests) nodes = nodes.filter((n) => !isTestFile(n.file));
+  const idToLabel = new Map<number, string>();
+  for (const n of nodes) idToLabel.set(n.id, `${n.name}|${n.file}`);
+  return buildLabelEdges(getCallEdges(db), idToLabel);
+}
+
 /**
  * Find cycles using Tarjan's SCC algorithm.
  *
@@ -16,66 +55,20 @@ export function findCycles(
   const fileLevel = opts.fileLevel !== false;
   const noTests = opts.noTests || false;
 
-  const edges: Array<{ source: string; target: string }> = [];
-  const seen = new Set<string>();
-
-  if (fileLevel) {
-    let nodes = getFileNodesAll(db);
-    if (noTests) nodes = nodes.filter((n) => !isTestFile(n.file));
-    const nodeIds = new Set<number>();
-    const idToFile = new Map<number, string>();
-    for (const n of nodes) {
-      nodeIds.add(n.id);
-      idToFile.set(n.id, n.file);
-    }
-    for (const e of getImportEdges(db)) {
-      if (!nodeIds.has(e.source_id) || !nodeIds.has(e.target_id)) continue;
-      if (e.source_id === e.target_id) continue;
-      const src = idToFile.get(e.source_id)!;
-      const tgt = idToFile.get(e.target_id)!;
-      const key = `${src}\0${tgt}`;
-      if (seen.has(key)) continue;
-      seen.add(key);
-      edges.push({ source: src, target: tgt });
-    }
-  } else {
-    let nodes = getCallableNodes(db);
-    if (noTests) nodes = nodes.filter((n) => !isTestFile(n.file));
-    const nodeIds = new Set<number>();
-    const idToLabel = new Map<number, string>();
-    for (const n of nodes) {
-      nodeIds.add(n.id);
-      idToLabel.set(n.id, `${n.name}|${n.file}`);
-    }
-    for (const e of getCallEdges(db)) {
-      if (!nodeIds.has(e.source_id) || !nodeIds.has(e.target_id)) continue;
-      if (e.source_id === e.target_id) continue;
-      const src = idToLabel.get(e.source_id)!;
-      const tgt = idToLabel.get(e.target_id)!;
-      const key = `${src}\0${tgt}`;
-      if (seen.has(key)) continue;
-      seen.add(key);
-      edges.push({ source: src, target: tgt });
-    }
-  }
+  const edges = fileLevel ? buildFileLevelEdges(db, noTests) : buildCallableEdges(db, noTests);
 
   const native = loadNative();
   if (native) {
     return native.detectCycles(edges) as string[][];
   }
-
   return tarjanFromEdges(edges);
 }
 
-export function findCyclesJS(edges: Array<{ source: string; target: string }>): string[][] {
+export function findCyclesJS(edges: Edge[]): string[][] {
   return tarjanFromEdges(edges);
 }
 
-/**
- * Run Tarjan's SCC on a flat edge list. Returns SCCs with length > 1 (cycles).
- * Uses a simple adjacency-list Map instead of a full CodeGraph.
- */
-function tarjanFromEdges(edges: Array<{ source: string; target: string }>): string[][] {
+function buildAdjacency(edges: Edge[]): { adj: Map<string, string[]>; allNodes: Set<string> } {
   const adj = new Map<string, string[]>();
   const allNodes = new Set<string>();
   for (const { source, target } of edges) {
@@ -88,6 +81,15 @@ function tarjanFromEdges(edges: Array<{ source: string; target: string }>): stri
     }
     list.push(target);
   }
+  return { adj, allNodes };
+}
+
+/**
+ * Run Tarjan's SCC on a flat edge list. Returns SCCs with length > 1 (cycles).
+ * Uses a simple adjacency-list Map instead of a full CodeGraph.
+ */
+function tarjanFromEdges(edges: Edge[]): string[][] {
+  const { adj, allNodes } = buildAdjacency(edges);
 
   let index = 0;
   const stack: string[] = [];
diff --git a/src/domain/graph/journal.ts b/src/domain/graph/journal.ts
index 900e33546..d20c7dab9 100644
--- a/src/domain/graph/journal.ts
+++ b/src/domain/graph/journal.ts
@@ -91,62 +91,69 @@ function trySteal(lockPath: string): AcquiredLock | null {
   return { fd, nonce };
 }
 
-function acquireJournalLock(lockPath: string): AcquiredLock {
-  const start = Date.now();
-  for (;;) {
-    const nonce = `${process.pid}-${crypto.randomBytes(8).toString('hex')}`;
+/**
+ * Try to create the lockfile fresh via `wx`. Returns the acquired lock on
+ * success, `null` if another holder exists, or throws on unexpected errors.
+ *
+ * If the stamp write fails (ENOSPC, I/O error) we release the empty file —
+ * leaving it would look stale to concurrent waiters and admit double-acquire.
+ */
+function tryFreshAcquire(lockPath: string): AcquiredLock | null {
+  const nonce = `${process.pid}-${crypto.randomBytes(8).toString('hex')}`;
+  let fd: number;
+  try {
+    fd = fs.openSync(lockPath, 'wx');
+  } catch (e) {
+    if ((e as NodeJS.ErrnoException).code === 'EEXIST') return null;
+    throw e;
+  }
+  try {
+    fs.writeSync(fd, `${process.pid}\n${nonce}\n`);
+  } catch {
     try {
-      const fd = fs.openSync(lockPath, 'wx');
-      try {
-        fs.writeSync(fd, `${process.pid}\n${nonce}\n`);
-      } catch {
-        // Stamp write failed (ENOSPC, I/O error). An empty lockfile would
-        // look stale to concurrent waiters (Number('') === 0, isPidAlive(0)
-        // returns false), so they'd steal our live lock. Release and retry.
-        try {
-          fs.closeSync(fd);
-        } catch {
-          /* ignore */
-        }
-        try {
-          fs.unlinkSync(lockPath);
-        } catch {
-          /* ignore */
-        }
-        if (Date.now() - start > LOCK_TIMEOUT_MS) {
-          throw new Error(
-            `Failed to acquire journal lock at ${lockPath} within ${LOCK_TIMEOUT_MS}ms`,
-          );
-        }
-        sleepSync(LOCK_RETRY_MS);
-        continue;
-      }
-      return { fd, nonce };
-    } catch (e) {
-      if ((e as NodeJS.ErrnoException).code !== 'EEXIST') throw e;
+      fs.closeSync(fd);
+    } catch {
+      /* ignore */
     }
-
-    let holderAlive = true;
     try {
-      const pidContent = fs.readFileSync(lockPath, 'utf-8').split('\n')[0]!.trim();
-      holderAlive = isPidAlive(Number(pidContent));
+      fs.unlinkSync(lockPath);
     } catch {
-      /* unreadable — fall through to age check */
+      /* ignore */
     }
+    return null;
+  }
+  return { fd, nonce };
+}
 
-    let shouldSteal = !holderAlive;
-    if (holderAlive) {
-      try {
-        const stat = fs.statSync(lockPath);
-        if (Date.now() - stat.mtimeMs > LOCK_STALE_MS) {
-          shouldSteal = true;
-        }
-      } catch {
-        /* stat failed — keep retrying */
-      }
-    }
+/**
+ * Decide whether the current lock holder is stale and should be stolen.
+ * Returns true if the PID is dead, or if the lockfile mtime exceeds the
+ * staleness threshold.
+ */
+function isLockStale(lockPath: string): boolean {
+  let holderAlive = true;
+  try {
+    const pidContent = fs.readFileSync(lockPath, 'utf-8').split('\n')[0]!.trim();
+    holderAlive = isPidAlive(Number(pidContent));
+  } catch {
+    /* unreadable — fall through to age check */
+  }
+  if (!holderAlive) return true;
+  try {
+    const stat = fs.statSync(lockPath);
+    return Date.now() - stat.mtimeMs > LOCK_STALE_MS;
+  } catch {
+    return false;
+  }
+}
 
-    if (shouldSteal) {
+function acquireJournalLock(lockPath: string): AcquiredLock {
+  const start = Date.now();
+  for (;;) {
+    const fresh = tryFreshAcquire(lockPath);
+    if (fresh) return fresh;
+
+    if (isLockStale(lockPath)) {
       const stolen = trySteal(lockPath);
       if (stolen) return stolen;
       // Steal failed or lost the race — fall through to timeout check & retry.
@@ -227,27 +234,20 @@ interface JournalResult {
   removed?: string[];
 }
 
-export function readJournal(rootDir: string): JournalResult {
-  const journalPath = path.join(rootDir, '.codegraph', JOURNAL_FILENAME);
-  let content: string;
-  try {
-    content = fs.readFileSync(journalPath, 'utf-8');
-  } catch {
-    return { valid: false };
-  }
-
-  const lines = content.split('\n');
-  if (lines.length === 0 || !lines[0]!.startsWith(HEADER_PREFIX)) {
+function parseJournalHeader(firstLine: string | undefined): number | null {
+  if (!firstLine || !firstLine.startsWith(HEADER_PREFIX)) {
     debug('Journal has malformed or missing header');
-    return { valid: false };
+    return null;
   }
-
-  const timestamp = Number(lines[0]!.slice(HEADER_PREFIX.length).trim());
+  const timestamp = Number(firstLine.slice(HEADER_PREFIX.length).trim());
   if (!Number.isFinite(timestamp) || timestamp <= 0) {
     debug('Journal has invalid timestamp');
-    return { valid: false };
+    return null;
   }
+  return timestamp;
+}
 
+function parseJournalBody(lines: string[]): { changed: string[]; removed: string[] } {
   const changed: string[] = [];
   const removed: string[] = [];
   const seenChanged = new Set<string>();
@@ -263,14 +263,29 @@ export function readJournal(rootDir: string): JournalResult {
         seenRemoved.add(filePath);
         removed.push(filePath);
       }
-    } else {
-      if (!seenChanged.has(line)) {
-        seenChanged.add(line);
-        changed.push(line);
-      }
+    } else if (!seenChanged.has(line)) {
+      seenChanged.add(line);
+      changed.push(line);
     }
   }
 
+  return { changed, removed };
+}
+
+export function readJournal(rootDir: string): JournalResult {
+  const journalPath = path.join(rootDir, '.codegraph', JOURNAL_FILENAME);
+  let content: string;
+  try {
+    content = fs.readFileSync(journalPath, 'utf-8');
+  } catch {
+    return { valid: false };
+  }
+
+  const lines = content.split('\n');
+  const timestamp = parseJournalHeader(lines[0]);
+  if (timestamp === null) return { valid: false };
+
+  const { changed, removed } = parseJournalBody(lines);
   return { valid: true, timestamp, changed, removed };
 }
 
diff --git a/src/domain/parser.ts b/src/domain/parser.ts
index acf4e9c04..38ebc035a 100644
--- a/src/domain/parser.ts
+++ b/src/domain/parser.ts
@@ -571,25 +571,36 @@ export function classifyNativeDrops(relPaths: Iterable<string>): NativeDropClass
 }
 
 /**
- * Render `{ ext → paths[] }` as `ext (n: sample.ext, ...)` slices for log lines.
- * Caps at 3 sample paths per extension and 6 extensions total to keep warnings
- * readable when many languages are dropped at once. Extensions are sorted by
- * descending file count so the loudest offender shows up first; ties keep
- * insertion order. Pure function — safe to unit-test independently.
+ * Render `{ ext → paths[] }` as a multi-line tabular breakdown for log lines.
+ * Each extension occupies its own line so a long warning scans like a table
+ * instead of a wall of semicolon-separated slices. Caps at 3 sample paths per
+ * extension and 6 extensions total to keep output bounded when many languages
+ * are dropped at once. Extensions are sorted by descending file count so the
+ * loudest offender shows up first; ties keep insertion order.
+ *
+ * Returns the empty string for empty input, and otherwise a string that
+ * begins with `\n` so callers can append it directly after the header line
+ * (`"Backfilling via WASM:" + formatDropExtensionSummary(...)`).
+ *
+ * Pure function — safe to unit-test independently.
  */
 export function formatDropExtensionSummary(buckets: Map<string, string[]>): string {
   const MAX_EXTS = 6;
   const MAX_SAMPLES = 3;
   const entries = Array.from(buckets.entries()).sort((a, b) => b[1].length - a[1].length);
-  const shown = entries.slice(0, MAX_EXTS).map(([ext, paths]) => {
+  if (entries.length === 0) return '';
+  const shown = entries.slice(0, MAX_EXTS);
+  const extWidth = Math.max(...shown.map(([ext]) => ext.length));
+  const countWidth = Math.max(...shown.map(([, paths]) => String(paths.length).length));
+  const lines = shown.map(([ext, paths]) => {
     const sample = paths.slice(0, MAX_SAMPLES).join(', ');
-    const more = paths.length > MAX_SAMPLES ? `, +${paths.length - MAX_SAMPLES} more` : '';
-    return `${ext} (${paths.length}: ${sample}${more})`;
+    const more = paths.length > MAX_SAMPLES ? ` (+${paths.length - MAX_SAMPLES} more)` : '';
+    return `  ${ext.padEnd(extWidth)}  ${String(paths.length).padStart(countWidth)}  ${sample}${more}`;
   });
   if (entries.length > MAX_EXTS) {
-    shown.push(`+${entries.length - MAX_EXTS} more extension(s)`);
+    lines.push(`  (+${entries.length - MAX_EXTS} more extension(s))`);
   }
-  return shown.join('; ');
+  return `\n${lines.join('\n')}`;
 }
 
 // ── Unified API ──────────────────────────────────────────────────────────────
diff --git a/tests/parsers/native-drop-classification.test.ts b/tests/parsers/native-drop-classification.test.ts
index 9c380870b..0eb89c854 100644
--- a/tests/parsers/native-drop-classification.test.ts
+++ b/tests/parsers/native-drop-classification.test.ts
@@ -89,25 +89,36 @@ describe('formatDropExtensionSummary', () => {
     expect(formatDropExtensionSummary(new Map())).toBe('');
   });
 
-  it('lists every extension when under the cap', () => {
+  it('renders one indented row per extension prefixed with a leading newline', () => {
     const buckets = new Map<string, string[]>([
       ['.ts', ['a.ts', 'b.ts']],
       ['.py', ['c.py']],
     ]);
-    expect(formatDropExtensionSummary(buckets)).toBe('.ts (2: a.ts, b.ts); .py (1: c.py)');
+    expect(formatDropExtensionSummary(buckets)).toBe('\n  .ts  2  a.ts, b.ts\n  .py  1  c.py');
   });
 
   it('caps samples per extension at 3 and renders +N more', () => {
     const buckets = new Map<string, string[]>([['.ts', ['a.ts', 'b.ts', 'c.ts', 'd.ts', 'e.ts']]]);
-    expect(formatDropExtensionSummary(buckets)).toBe('.ts (5: a.ts, b.ts, c.ts, +2 more)');
+    expect(formatDropExtensionSummary(buckets)).toBe('\n  .ts  5  a.ts, b.ts, c.ts (+2 more)');
   });
 
   it('shows exactly MAX_SAMPLES samples without a +N suffix when count equals the cap', () => {
     const buckets = new Map<string, string[]>([['.ts', ['a.ts', 'b.ts', 'c.ts']]]);
-    expect(formatDropExtensionSummary(buckets)).toBe('.ts (3: a.ts, b.ts, c.ts)');
+    expect(formatDropExtensionSummary(buckets)).toBe('\n  .ts  3  a.ts, b.ts, c.ts');
   });
 
-  it('caps extensions at 6 and renders +N more extension(s)', () => {
+  it('right-pads the extension column and right-aligns the count column for tabular layout', () => {
+    const buckets = new Map<string, string[]>([
+      ['.kt', ['a.kt']],
+      ['.tsx', new Array(100).fill('x.tsx')], // 100 files — sets wider count column
+    ]);
+    const out = formatDropExtensionSummary(buckets);
+    // `.tsx` (4 chars) sets the ext width; `.kt` is padded to 4 chars.
+    // 100 (3 chars) sets the count width; 1 is right-aligned to 3 chars.
+    expect(out).toBe('\n  .tsx  100  x.tsx, x.tsx, x.tsx (+97 more)\n  .kt     1  a.kt');
+  });
+
+  it('caps extensions at 6 and renders +N more extension(s) on its own row', () => {
     // 8 extensions, all with 1 file — sorted by count is a stable tie so insertion
     // order wins, and the first 6 are shown.
     const buckets = new Map<string, string[]>([
@@ -121,12 +132,12 @@ describe('formatDropExtensionSummary', () => {
       ['.h', ['1.h']],
     ]);
     const out = formatDropExtensionSummary(buckets);
-    expect(out.endsWith('; +2 more extension(s)')).toBe(true);
+    expect(out.endsWith('\n  (+2 more extension(s))')).toBe(true);
     // First 6 extensions are present, the last 2 (.g, .h) are not.
-    expect(out).toContain('.a (1: 1.a)');
-    expect(out).toContain('.f (1: 1.f)');
-    expect(out).not.toContain('.g (');
-    expect(out).not.toContain('.h (');
+    expect(out).toContain('\n  .a  1  1.a');
+    expect(out).toContain('\n  .f  1  1.f');
+    expect(out).not.toContain('  .g  ');
+    expect(out).not.toContain('  .h  ');
   });
 
   it('sorts by descending file count so the loudest offender is first', () => {