From 08d254f0392acf7d95fa5790c07679feebbc69ba Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Mon, 25 May 2026 22:59:24 -0600
Subject: [PATCH 01/27] refactor(parity): render orchestrator-drop summary as a
 per-extension table

The native-orchestrator drop warning lived in a single wall-of-text WARN
line that grew unreadable when 30+ extensions were dropped at once
(easy to trigger via journal-vs-fresh-build collisions). Make the
per-extension breakdown scan like a table: header line keeps the count
and now also reports the extension total; each extension occupies its
own indented row with a right-aligned count column.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/domain/graph/builder/pipeline.ts          |  8 +++--
 src/domain/parser.ts                          | 31 +++++++++++++------
 .../native-drop-classification.test.ts        | 31 +++++++++++++------
 3 files changed, 47 insertions(+), 23 deletions(-)
diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts
index b18d3c473..4dce8aa3d 100644
--- a/src/domain/graph/builder/pipeline.ts
+++ b/src/domain/graph/builder/pipeline.ts
@@ -1018,7 +1018,7 @@ async function backfillNativeDroppedFiles(
     // summary directly to avoid a redundant classification pass.
     const staleByExt = groupByExtension(staleRel);
     info(
-      `Detected ${staleRel.length} deleted WASM-only file(s) the native orchestrator skipped; purging stale rows: ${formatDropExtensionSummary(staleByExt)}`,
+      `Detected ${staleRel.length} deleted WASM-only file(s) across ${staleByExt.size} extension(s) the native orchestrator skipped; purging stale rows:${formatDropExtensionSummary(staleByExt)}`,
     );
     purgeFilesData(dbConn, staleRel);
   }
@@ -1031,13 +1031,15 @@ async function backfillNativeDroppedFiles(
   // the language IS supported by the addon yet the file was dropped anyway.
   const { byReason, totals } = classifyNativeDrops(missingRel);
   if (totals['unsupported-by-native'] > 0) {
+    const buckets = byReason['unsupported-by-native'];
     info(
-      `Native orchestrator skipped ${totals['unsupported-by-native']} file(s) in languages without a Rust extractor; backfilling via WASM: ${formatDropExtensionSummary(byReason['unsupported-by-native'])}`,
+      `Native orchestrator skipped ${totals['unsupported-by-native']} file(s) across ${buckets.size} extension(s) in languages without a Rust extractor; backfilling via WASM:${formatDropExtensionSummary(buckets)}`,
     );
   }
   if (totals['native-extractor-failure'] > 0) {
+    const buckets = byReason['native-extractor-failure'];
     warn(
-      `Native orchestrator dropped ${totals['native-extractor-failure']} file(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM: ${formatDropExtensionSummary(byReason['native-extractor-failure'])}`,
+      `Native orchestrator dropped ${totals['native-extractor-failure']} file(s) across ${buckets.size} extension(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM:${formatDropExtensionSummary(buckets)}`,
     );
   }
   const wasmResults = await parseFilesWasmForBackfill(missingAbs, ctx.rootDir);
diff --git a/src/domain/parser.ts b/src/domain/parser.ts
index b4aaa366b..bb53192c9 100644
--- a/src/domain/parser.ts
+++ b/src/domain/parser.ts
@@ -539,25 +539,36 @@ export function classifyNativeDrops(relPaths: Iterable<string>): NativeDropClass
 }
 
 /**
- * Render `{ ext → paths[] }` as `ext (n: sample.ext, ...)` slices for log lines.
- * Caps at 3 sample paths per extension and 6 extensions total to keep warnings
- * readable when many languages are dropped at once. Extensions are sorted by
- * descending file count so the loudest offender shows up first; ties keep
- * insertion order. Pure function — safe to unit-test independently.
+ * Render `{ ext → paths[] }` as a multi-line tabular breakdown for log lines.
+ * Each extension occupies its own line so a long warning scans like a table
+ * instead of a wall of semicolon-separated slices. Caps at 3 sample paths per
+ * extension and 6 extensions total to keep output bounded when many languages
+ * are dropped at once. Extensions are sorted by descending file count so the
+ * loudest offender shows up first; ties keep insertion order.
+ *
+ * Returns the empty string for empty input, and otherwise a string that
+ * begins with `\n` so callers can append it directly after the header line
+ * (`"Backfilling via WASM:" + formatDropExtensionSummary(...)`).
+ *
+ * Pure function — safe to unit-test independently.
  */
 export function formatDropExtensionSummary(buckets: Map<string, string[]>): string {
   const MAX_EXTS = 6;
   const MAX_SAMPLES = 3;
   const entries = Array.from(buckets.entries()).sort((a, b) => b[1].length - a[1].length);
-  const shown = entries.slice(0, MAX_EXTS).map(([ext, paths]) => {
+  if (entries.length === 0) return '';
+  const shown = entries.slice(0, MAX_EXTS);
+  const extWidth = Math.max(...shown.map(([ext]) => ext.length));
+  const countWidth = Math.max(...shown.map(([, paths]) => String(paths.length).length));
+  const lines = shown.map(([ext, paths]) => {
     const sample = paths.slice(0, MAX_SAMPLES).join(', ');
-    const more = paths.length > MAX_SAMPLES ? `, +${paths.length - MAX_SAMPLES} more` : '';
-    return `${ext} (${paths.length}: ${sample}${more})`;
+    const more = paths.length > MAX_SAMPLES ? ` (+${paths.length - MAX_SAMPLES} more)` : '';
+    return `  ${ext.padEnd(extWidth)}  ${String(paths.length).padStart(countWidth)}  ${sample}${more}`;
   });
   if (entries.length > MAX_EXTS) {
-    shown.push(`+${entries.length - MAX_EXTS} more extension(s)`);
+    lines.push(`  (+${entries.length - MAX_EXTS} more extension(s))`);
   }
-  return shown.join('; ');
+  return `\n${lines.join('\n')}`;
 }
 
 // ── Unified API ──────────────────────────────────────────────────────────────
diff --git a/tests/parsers/native-drop-classification.test.ts b/tests/parsers/native-drop-classification.test.ts
index 9c380870b..d617d4757 100644
--- a/tests/parsers/native-drop-classification.test.ts
+++ b/tests/parsers/native-drop-classification.test.ts
@@ -89,25 +89,36 @@ describe('formatDropExtensionSummary', () => {
     expect(formatDropExtensionSummary(new Map())).toBe('');
   });
 
-  it('lists every extension when under the cap', () => {
+  it('renders one indented row per extension prefixed with a leading newline', () => {
     const buckets = new Map<string, string[]>([
       ['.ts', ['a.ts', 'b.ts']],
       ['.py', ['c.py']],
     ]);
-    expect(formatDropExtensionSummary(buckets)).toBe('.ts (2: a.ts, b.ts); .py (1: c.py)');
+    expect(formatDropExtensionSummary(buckets)).toBe('\n  .ts  2  a.ts, b.ts\n  .py  1  c.py');
   });
 
   it('caps samples per extension at 3 and renders +N more', () => {
     const buckets = new Map<string, string[]>([['.ts', ['a.ts', 'b.ts', 'c.ts', 'd.ts', 'e.ts']]]);
-    expect(formatDropExtensionSummary(buckets)).toBe('.ts (5: a.ts, b.ts, c.ts, +2 more)');
+    expect(formatDropExtensionSummary(buckets)).toBe('\n  .ts  5  a.ts, b.ts, c.ts (+2 more)');
   });
 
   it('shows exactly MAX_SAMPLES samples without a +N suffix when count equals the cap', () => {
     const buckets = new Map<string, string[]>([['.ts', ['a.ts', 'b.ts', 'c.ts']]]);
-    expect(formatDropExtensionSummary(buckets)).toBe('.ts (3: a.ts, b.ts, c.ts)');
+    expect(formatDropExtensionSummary(buckets)).toBe('\n  .ts  3  a.ts, b.ts, c.ts');
   });
 
-  it('caps extensions at 6 and renders +N more extension(s)', () => {
+  it('right-pads the extension column and right-aligns the count column for tabular layout', () => {
+    const buckets = new Map<string, string[]>([
+      ['.kt', ['a.kt']], // 100 files later — wider count column
+      ['.tsx', new Array(100).fill('x.tsx')],
+    ]);
+    const out = formatDropExtensionSummary(buckets);
+    // `.tsx` (4 chars) sets the ext width; `.kt` is padded to 4 chars.
+    // 100 (3 chars) sets the count width; 1 is right-aligned to 3 chars.
+    expect(out).toBe('\n  .tsx  100  x.tsx, x.tsx, x.tsx (+97 more)\n  .kt     1  a.kt');
+  });
+
+  it('caps extensions at 6 and renders +N more extension(s) on its own row', () => {
     // 8 extensions, all with 1 file — sorted by count is a stable tie so insertion
     // order wins, and the first 6 are shown.
     const buckets = new Map<string, string[]>([
@@ -121,12 +132,12 @@ describe('formatDropExtensionSummary', () => {
       ['.h', ['1.h']],
     ]);
     const out = formatDropExtensionSummary(buckets);
-    expect(out.endsWith('; +2 more extension(s)')).toBe(true);
+    expect(out.endsWith('\n  (+2 more extension(s))')).toBe(true);
     // First 6 extensions are present, the last 2 (.g, .h) are not.
-    expect(out).toContain('.a (1: 1.a)');
-    expect(out).toContain('.f (1: 1.f)');
-    expect(out).not.toContain('.g (');
-    expect(out).not.toContain('.h (');
+    expect(out).toContain('\n  .a  1  1.a');
+    expect(out).toContain('\n  .f  1  1.f');
+    expect(out).not.toContain('  .g  ');
+    expect(out).not.toContain('  .h  ');
   });
 
   it('sorts by descending file count so the loudest offender is first', () => {

From 9c8be552935310520d478a0adeefad801c940bf9 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 00:37:34 -0600
Subject: [PATCH 02/27] refactor(extractors): extend shared helpers for
 identifier and symbol collection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds shared utilities to src/extractors/helpers.ts in preparation for
adoption across language extractors (phase 2):

- nodeStartLine: companion to nodeEndLine for the ~108 hand-rolled
  startPosition.row + 1 literals scattered across extractors
- findFirstChildOfTypes: find first child matching any of N types
  (useful for grammar variants like string vs string_literal)
- iterChildren / PUNCTUATION_TOKENS: generator-based child iteration
  with punctuation skipping, used in elixir/gleam destructuring walks
- pushCall / pushImport: centralise Call/Import construction so line
  derivation stays consistent across extractors
- extractSimpleParameters / resolveParamName: uniform parameter
  extraction with optional type-map sink — collapses boilerplate in
  the ~16 per-language extractParams helpers

Phase 1 of the TS extractor refactor plan (sync.json clusters 1).
Additive only — no consumer adoption yet; existing helpers and
extractor behaviour unchanged. Consumers updated in phase 2.

docs check acknowledged: internal refactor, no doc updates needed.
---
 src/extractors/helpers.ts | 206 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 205 insertions(+), 1 deletion(-)

diff --git a/src/extractors/helpers.ts b/src/extractors/helpers.ts
index 6a3e129d4..62edc22ea 100644
--- a/src/extractors/helpers.ts
+++ b/src/extractors/helpers.ts
@@ -1,4 +1,11 @@
-import type { SubDeclaration, TreeSitterNode, TypeMapEntry } from '../types.js';
+import type {
+  Call,
+  ExtractorOutput,
+  Import,
+  SubDeclaration,
+  TreeSitterNode,
+  TypeMapEntry,
+} from '../types.js';
 
 /**
  * Maximum recursion depth for tree-sitter AST walkers.
@@ -6,6 +13,11 @@ import type { SubDeclaration, TreeSitterNode, TypeMapEntry } from '../types.js';
  */
 export const MAX_WALK_DEPTH = 200;
 
+/** Convert a tree-sitter node's start row to a 1-based source line. */
+export function nodeStartLine(node: TreeSitterNode): number {
+  return node.startPosition.row + 1;
+}
+
 export function nodeEndLine(node: TreeSitterNode): number {
   return node.endPosition.row + 1;
 }
@@ -18,6 +30,56 @@ export function findChild(node: TreeSitterNode, type: string): TreeSitterNode |
   return null;
 }
 
+/**
+ * Find the first child whose type is in `types`. Useful when several grammar
+ * variants name the same conceptual node differently (e.g. `string` vs
+ * `string_literal`). Returns the first match in document order, or null.
+ */
+export function findFirstChildOfTypes(
+  node: TreeSitterNode,
+  types: readonly string[],
+): TreeSitterNode | null {
+  for (let i = 0; i < node.childCount; i++) {
+    const child = node.child(i);
+    if (child && types.includes(child.type)) return child;
+  }
+  return null;
+}
+
+/**
+ * Iterate the direct children of `node` in document order, skipping nulls and
+ * tokens whose type appears in `skipTypes`. Mirrors the common
+ * `for (let i = 0; i < node.childCount; i++) { const c = node.child(i); if (...) continue; ... }`
+ * idiom while letting callers filter out grammar punctuation (`,`, `(`, `{`, etc.).
+ */
+export function* iterChildren(
+  node: TreeSitterNode,
+  skipTypes: ReadonlySet<string> = EMPTY_SKIP_SET,
+): Generator<TreeSitterNode> {
+  for (let i = 0; i < node.childCount; i++) {
+    const child = node.child(i);
+    if (!child) continue;
+    if (skipTypes.has(child.type)) continue;
+    yield child;
+  }
+}
+
+const EMPTY_SKIP_SET: ReadonlySet<string> = new Set();
+
+/** Common punctuation tokens — handy as a `skipTypes` set for `iterChildren`. */
+export const PUNCTUATION_TOKENS: ReadonlySet<string> = new Set([
+  ',',
+  ';',
+  '(',
+  ')',
+  '[',
+  ']',
+  '{',
+  '}',
+  ':',
+  '.',
+]);
+
 /**
  * Merge a type-map entry, keeping the higher-confidence one.
  * Shared across all language extractors that build type maps for call resolution.
@@ -197,3 +259,145 @@ export function extractModifierVisibility(
   }
   return undefined;
 }
+
+// ── Output-push helpers ────────────────────────────────────────────────────
+//
+// Most extractors finish with `ctx.calls.push({ name, line: node.startPosition.row + 1 })`
+// or `ctx.imports.push({ source, names, line: node.startPosition.row + 1 })`.
+// Centralising the construction keeps `line` derivation consistent and removes
+// the ~108 hand-rolled `startPosition.row + 1` literals scattered across
+// language extractors.
+
+/**
+ * Append a `Call` to the extractor output. `line` defaults to the start line of
+ * `node`; pass `extra` for `receiver` / `dynamic` flags.
+ */
+export function pushCall(
+  ctx: ExtractorOutput,
+  node: TreeSitterNode,
+  name: string,
+  extra: { receiver?: string; dynamic?: boolean } = {},
+): void {
+  if (!name) return;
+  const call: Call = { name, line: nodeStartLine(node) };
+  if (extra.receiver !== undefined) call.receiver = extra.receiver;
+  if (extra.dynamic !== undefined) call.dynamic = extra.dynamic;
+  ctx.calls.push(call);
+}
+
+/**
+ * Append an `Import` to the extractor output. `line` defaults to the start
+ * line of `node`. If `names` is empty, the source basename (split on `/`) is
+ * used as a single-name fallback — matching the convention in gleam, julia,
+ * and similar module-path imports.
+ */
+export function pushImport(
+  ctx: ExtractorOutput,
+  node: TreeSitterNode,
+  source: string,
+  names: string[],
+  flags: Partial<Omit<Import, 'source' | 'names' | 'line'>> = {},
+): void {
+  if (!source) return;
+  const resolved = names.length > 0 ? names : [lastPathSegment(source, '/') || source];
+  const entry: Import = { source, names: resolved, line: nodeStartLine(node) };
+  Object.assign(entry, flags);
+  ctx.imports.push(entry);
+}
+
+// ── Parameter extraction ───────────────────────────────────────────────────
+
+/**
+ * Options for {@link extractSimpleParameters}.
+ */
+export interface ExtractParametersOptions {
+  /** Tree-sitter types that mark a single parameter node (e.g. `formal_parameter`). */
+  paramTypes: readonly string[];
+  /**
+   * Field name on each parameter that holds the bound identifier. Defaults to
+   * `'name'`. Pass `null` to use the parameter node itself when its type is in
+   * `paramTypes` and it has no `name` field (e.g. R's bare `identifier`).
+   */
+  nameField?: string | null;
+  /**
+   * If true, when `nameField` lookup fails fall back to the first `identifier`
+   * child of the parameter. Useful for Gleam / Solidity-style grammars.
+   */
+  fallbackToIdentifier?: boolean;
+  /**
+   * Optional type-map sink. When provided, the parameter's `type` field text
+   * (if present) is recorded with the given confidence.
+   */
+  typeMap?: Map<string, TypeMapEntry>;
+  /** Confidence used when writing into `typeMap`. Defaults to `0.9`. */
+  typeMapConfidence?: number;
+  /**
+   * Optional callback to derive the type text from the parameter's `type`
+   * field node. Defaults to `node.text`. Use this for languages where the
+   * `type` field is wrapped (e.g. Java `generic_type` → first child).
+   */
+  resolveType?: (typeNode: TreeSitterNode) => string | undefined;
+}
+
+/**
+ * Extract parameters from a parameter-list node using a uniform pattern.
+ *
+ * This collapses the boilerplate in `extract*Params` helpers across
+ * Java/Julia/Gleam/Solidity/R/etc. — each one walks the parameter list,
+ * matches a parameter node type, reads the `name` field, and pushes a
+ * `SubDeclaration` with `kind: 'parameter'`.
+ */
+export function extractSimpleParameters(
+  paramListNode: TreeSitterNode | null,
+  options: ExtractParametersOptions,
+): SubDeclaration[] {
+  const params: SubDeclaration[] = [];
+  if (!paramListNode) return params;
+  const { paramTypes, nameField = 'name', fallbackToIdentifier = false } = options;
+
+  for (let i = 0; i < paramListNode.childCount; i++) {
+    const param = paramListNode.child(i);
+    if (!param || !paramTypes.includes(param.type)) continue;
+    const nameNode = resolveParamName(param, nameField, fallbackToIdentifier);
+    if (!nameNode) continue;
+    params.push({ name: nameNode.text, kind: 'parameter', line: nodeStartLine(param) });
+    recordParamType(param, nameNode.text, options);
+  }
+  return params;
+}
+
+/** Record a parameter's declared type into the type-map sink, if configured. */
+function recordParamType(
+  param: TreeSitterNode,
+  paramName: string,
+  options: ExtractParametersOptions,
+): void {
+  const { typeMap, resolveType, typeMapConfidence = 0.9 } = options;
+  if (!typeMap) return;
+  const typeNode = param.childForFieldName('type');
+  if (!typeNode) return;
+  const typeText = resolveType ? resolveType(typeNode) : typeNode.text;
+  if (!typeText) return;
+  setTypeMapEntry(typeMap, paramName, typeText, typeMapConfidence);
+}
+
+/**
+ * Resolve the identifier node that names a parameter. Used by
+ * {@link extractSimpleParameters}; exposed so language-specific extractors
+ * can reuse the same lookup logic in custom loops.
+ */
+export function resolveParamName(
+  paramNode: TreeSitterNode,
+  nameField: string | null,
+  fallbackToIdentifier: boolean,
+): TreeSitterNode | null {
+  if (nameField === null) {
+    return paramNode;
+  }
+  const named = paramNode.childForFieldName(nameField);
+  if (named) return named;
+  if (fallbackToIdentifier) {
+    return findChild(paramNode, 'identifier');
+  }
+  return null;
+}

From 9c3d016b859de1c5ee8f58b3ed0f3d1badd79b52 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 11:31:52 -0600
Subject: [PATCH 03/27] refactor(extractors): adopt shared helpers across
 language extractors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 2 of the TS extractor refactor plan (sync.json cluster 1). Adopts
the helpers extended in 9c8be55 (nodeStartLine, findFirstChildOfTypes,
pushCall, pushImport, extractSimpleParameters, stripQuotes) across six
language extractors:

- r.ts: drop local stripQuotes; use shared stripQuotes/pushCall/
  pushImport/findFirstChildOfTypes/nodeStartLine
- gleam.ts: use pushCall/pushImport/findFirstChildOfTypes/nodeStartLine;
  extract pushConstructor helper for the dual-branch data-constructor
  walk
- julia.ts: use pushCall/pushImport/nodeStartLine; collapse Julia param
  wrapper-type branches via JULIA_PARAM_WRAPPER_TYPES set
- java.ts: use pushCall/pushImport/nodeStartLine; collapse
  extractJavaParameters via extractSimpleParameters with typeMap sink;
  extract resolveJavaTypeText for the generic_type unwrap pattern
- gleam.ts and solidity.ts: extract qualifyWithParent helper in solidity
  to collapse 6 duplicated `parent ? \`\${parent}.\${name}\` : name`
  blocks
- solidity.ts: use pushCall/pushImport/findFirstChildOfTypes/
  nodeStartLine; collapse extractSolParams via extractSimpleParameters
- javascript.ts: bulk-replace 43 inline `XXX.startPosition.row + 1`
  literals with nodeStartLine() calls; replace one stray endPosition
  literal with nodeEndLine

Net -65 lines. No behaviour changes — only call-site collapsing onto the
shared helpers (semantics verified by careful inspection of each
replacement; pushImport's empty-names fallback matches the previous
ad-hoc defaults in each extractor).

docs check acknowledged: internal refactor, no doc updates needed.
---
 src/extractors/gleam.ts      |  70 ++++++++++-----------
 src/extractors/java.ts       |  79 +++++++++++-------------
 src/extractors/javascript.ts |  87 +++++++++++++-------------
 src/extractors/julia.ts      |  63 +++++++++----------
 src/extractors/r.ts          |  94 ++++++++++++-----------------
 src/extractors/solidity.ts   | 114 +++++++++++++----------------------
 6 files changed, 221 insertions(+), 286 deletions(-)

diff --git a/src/extractors/gleam.ts b/src/extractors/gleam.ts
index 45f8bd2b8..244b036d4 100644
--- a/src/extractors/gleam.ts
+++ b/src/extractors/gleam.ts
@@ -1,11 +1,13 @@
-import type {
-  Call,
-  ExtractorOutput,
-  SubDeclaration,
-  TreeSitterNode,
-  TreeSitterTree,
-} from '../types.js';
-import { findChild, nodeEndLine, stripQuotes } from './helpers.js';
+import type { ExtractorOutput, SubDeclaration, TreeSitterNode, TreeSitterTree } from '../types.js';
+import {
+  findChild,
+  findFirstChildOfTypes,
+  nodeEndLine,
+  nodeStartLine,
+  pushCall,
+  pushImport,
+  stripQuotes,
+} from './helpers.js';
 
 /**
  * Extract symbols from Gleam files.
@@ -74,7 +76,7 @@ function handleFunction(node: TreeSitterNode, ctx: ExtractorOutput): void {
   ctx.definitions.push({
     name: nameNode.text,
     kind: 'function',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     visibility,
     children: params.length > 0 ? params : undefined,
@@ -90,7 +92,7 @@ function handleExternalFunction(node: TreeSitterNode, ctx: ExtractorOutput): voi
   ctx.definitions.push({
     name: nameNode.text,
     kind: 'function',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     visibility: isPublic(node) ? 'public' : 'private',
     children: params.length > 0 ? params : undefined,
@@ -107,10 +109,7 @@ function handleTypeDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
     const child = node.child(i);
     if (!child) continue;
     if (child.type === 'data_constructor' || child.type === 'type_constructor') {
-      const ctorName = child.childForFieldName('name') || findChild(child, 'constructor_name');
-      if (ctorName) {
-        children.push({ name: ctorName.text, kind: 'property', line: child.startPosition.row + 1 });
-      }
+      pushConstructor(child, children);
     }
     // Recurse into constructors block
     if (child.type === 'data_constructors' || child.type === 'type_constructors') {
@@ -118,14 +117,7 @@ function handleTypeDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
         const ctor = child.child(j);
         if (!ctor) continue;
         if (ctor.type === 'data_constructor' || ctor.type === 'type_constructor') {
-          const ctorName = ctor.childForFieldName('name') || findChild(ctor, 'constructor_name');
-          if (ctorName) {
-            children.push({
-              name: ctorName.text,
-              kind: 'property',
-              line: ctor.startPosition.row + 1,
-            });
-          }
+          pushConstructor(ctor, children);
         }
       }
     }
@@ -134,13 +126,20 @@ function handleTypeDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
   ctx.definitions.push({
     name: nameNode.text,
     kind: 'type',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     visibility: isPublic(node) ? 'public' : 'private',
     children: children.length > 0 ? children : undefined,
   });
 }
 
+function pushConstructor(ctorNode: TreeSitterNode, out: SubDeclaration[]): void {
+  const ctorName = ctorNode.childForFieldName('name') || findChild(ctorNode, 'constructor_name');
+  if (ctorName) {
+    out.push({ name: ctorName.text, kind: 'property', line: nodeStartLine(ctorNode) });
+  }
+}
+
 function handleTypeAlias(node: TreeSitterNode, ctx: ExtractorOutput): void {
   const nameNode = node.childForFieldName('name') || findChild(node, 'type_name');
   if (!nameNode) return;
@@ -148,7 +147,7 @@ function handleTypeAlias(node: TreeSitterNode, ctx: ExtractorOutput): void {
   ctx.definitions.push({
     name: nameNode.text,
     kind: 'type',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     visibility: isPublic(node) ? 'public' : 'private',
   });
@@ -161,7 +160,7 @@ function handleConstant(node: TreeSitterNode, ctx: ExtractorOutput): void {
   ctx.definitions.push({
     name: nameNode.text,
     kind: 'variable',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     visibility: isPublic(node) ? 'public' : 'private',
   });
@@ -169,7 +168,7 @@ function handleConstant(node: TreeSitterNode, ctx: ExtractorOutput): void {
 
 function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void {
   const moduleNode =
-    node.childForFieldName('module') || findChild(node, 'module') || findChild(node, 'string');
+    node.childForFieldName('module') || findFirstChildOfTypes(node, ['module', 'string']);
   if (!moduleNode) return;
 
   const source = stripQuotes(moduleNode.text);
@@ -193,11 +192,9 @@ function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void {
     names.push(alias.text);
   }
 
-  ctx.imports.push({
-    source,
-    names: names.length > 0 ? names : [source.split('/').pop() || source],
-    line: node.startPosition.row + 1,
-  });
+  // `pushImport` falls back to the source basename when `names` is empty,
+  // preserving the previous `source.split('/').pop() || source` default.
+  pushImport(ctx, node, source, names);
 }
 
 function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
@@ -205,16 +202,15 @@ function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
   if (!funcNode) return;
 
   if (funcNode.type === 'identifier' || funcNode.type === 'variable') {
-    ctx.calls.push({ name: funcNode.text, line: node.startPosition.row + 1 });
+    pushCall(ctx, node, funcNode.text);
   } else if (funcNode.type === 'field_access' || funcNode.type === 'module_select') {
     const field = funcNode.childForFieldName('field') || funcNode.childForFieldName('label');
     // Prefer the `record` field; fall back to first named child to skip
     // anonymous punctuation tokens (the `.` between record and field).
     const record = funcNode.childForFieldName('record') || funcNode.namedChild(0);
     if (field) {
-      const call: Call = { name: field.text, line: node.startPosition.row + 1 };
-      if (record && record !== field) call.receiver = record.text;
-      ctx.calls.push(call);
+      const receiver = record && record !== field ? record.text : undefined;
+      pushCall(ctx, node, field.text, receiver !== undefined ? { receiver } : {});
     }
   }
 }
@@ -231,11 +227,11 @@ function extractParams(funcNode: TreeSitterNode): SubDeclaration[] {
     if (param.type === 'function_parameter' || param.type === 'parameter') {
       const nameNode = param.childForFieldName('name') || findChild(param, 'identifier');
       if (nameNode) {
-        params.push({ name: nameNode.text, kind: 'parameter', line: param.startPosition.row + 1 });
+        params.push({ name: nameNode.text, kind: 'parameter', line: nodeStartLine(param) });
       }
     }
     if (param.type === 'identifier') {
-      params.push({ name: param.text, kind: 'parameter', line: param.startPosition.row + 1 });
+      params.push({ name: param.text, kind: 'parameter', line: nodeStartLine(param) });
     }
   }
   return params;
diff --git a/src/extractors/java.ts b/src/extractors/java.ts
index 64f03f900..d12163d20 100644
--- a/src/extractors/java.ts
+++ b/src/extractors/java.ts
@@ -1,5 +1,4 @@
 import type {
-  Call,
   ExtractorOutput,
   SubDeclaration,
   TreeSitterNode,
@@ -9,10 +8,14 @@ import type {
 import {
   extractBodyMembers,
   extractModifierVisibility,
+  extractSimpleParameters,
   findChild,
   findParentNode,
   lastPathSegment,
   nodeEndLine,
+  nodeStartLine,
+  pushCall,
+  pushImport,
 } from './helpers.js';
 
 /**
@@ -78,7 +81,7 @@ function handleJavaClassDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
   ctx.definitions.push({
     name: nameNode.text,
     kind: 'class',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     children: classChildren.length > 0 ? classChildren : undefined,
   });
@@ -87,7 +90,7 @@ function handleJavaClassDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
 
   const interfaces = node.childForFieldName('interfaces');
   if (interfaces) {
-    extractJavaInterfaces(interfaces, nameNode.text, node.startPosition.row + 1, ctx);
+    extractJavaInterfaces(interfaces, nameNode.text, nodeStartLine(node), ctx);
   }
 }
 
@@ -101,7 +104,7 @@ function extractJavaSuperclass(
   if (!superclass) return;
   const superName = findJavaSuperTypeName(superclass);
   if (superName) {
-    ctx.classes.push({ name: className, extends: superName, line: node.startPosition.row + 1 });
+    ctx.classes.push({ name: className, extends: superName, line: nodeStartLine(node) });
   }
 }
 
@@ -163,7 +166,7 @@ function handleJavaInterfaceDecl(node: TreeSitterNode, ctx: ExtractorOutput): vo
   ctx.definitions.push({
     name: nameNode.text,
     kind: 'interface',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
   });
   const body = node.childForFieldName('body');
@@ -184,8 +187,8 @@ function extractJavaInterfaceMethods(
         ctx.definitions.push({
           name: `${ifaceName}.${methName.text}`,
           kind: 'method',
-          line: child.startPosition.row + 1,
-          endLine: child.endPosition.row + 1,
+          line: nodeStartLine(child),
+          endLine: nodeEndLine(child),
         });
       }
     }
@@ -199,7 +202,7 @@ function handleJavaEnumDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
   ctx.definitions.push({
     name: nameNode.text,
     kind: 'enum',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     children: enumChildren.length > 0 ? enumChildren : undefined,
   });
@@ -216,7 +219,7 @@ function handleJavaMethodDecl(node: TreeSitterNode, ctx: ExtractorOutput): void
   ctx.definitions.push({
     name: fullName,
     kind: 'method',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     children: params.length > 0 ? params : undefined,
     visibility: extractModifierVisibility(node),
@@ -232,7 +235,7 @@ function handleJavaConstructorDecl(node: TreeSitterNode, ctx: ExtractorOutput):
   ctx.definitions.push({
     name: fullName,
     kind: 'method',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     children: params.length > 0 ? params : undefined,
     visibility: extractModifierVisibility(node),
@@ -245,12 +248,7 @@ function handleJavaImportDecl(node: TreeSitterNode, ctx: ExtractorOutput): void
     if (child && (child.type === 'scoped_identifier' || child.type === 'identifier')) {
       const fullPath = child.text;
       const lastName = lastPathSegment(fullPath, '.');
-      ctx.imports.push({
-        source: fullPath,
-        names: [lastName],
-        line: node.startPosition.row + 1,
-        javaImport: true,
-      });
+      pushImport(ctx, node, fullPath, [lastName], { javaImport: true });
     }
     if (child && child.type === 'asterisk') {
       const lastImport = ctx.imports[ctx.imports.length - 1];
@@ -263,15 +261,13 @@ function handleJavaMethodInvocation(node: TreeSitterNode, ctx: ExtractorOutput):
   const nameNode = node.childForFieldName('name');
   if (!nameNode) return;
   const obj = node.childForFieldName('object');
-  const call: Call = { name: nameNode.text, line: node.startPosition.row + 1 };
-  if (obj) call.receiver = obj.text;
-  ctx.calls.push(call);
+  pushCall(ctx, node, nameNode.text, obj ? { receiver: obj.text } : {});
 }
 
 function handleJavaLocalVarDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
   const typeNode = node.childForFieldName('type');
   if (!typeNode) return;
-  const typeName = typeNode.type === 'generic_type' ? typeNode.child(0)?.text : typeNode.text;
+  const typeName = resolveJavaTypeText(typeNode);
   if (!typeName) return;
   for (let i = 0; i < node.childCount; i++) {
     const child = node.child(i);
@@ -285,8 +281,17 @@ function handleJavaLocalVarDecl(node: TreeSitterNode, ctx: ExtractorOutput): voi
 function handleJavaObjectCreation(node: TreeSitterNode, ctx: ExtractorOutput): void {
   const typeNode = node.childForFieldName('type');
   if (!typeNode) return;
-  const typeName = typeNode.type === 'generic_type' ? typeNode.child(0)?.text : typeNode.text;
-  if (typeName) ctx.calls.push({ name: typeName, line: node.startPosition.row + 1 });
+  const typeName = resolveJavaTypeText(typeNode);
+  if (typeName) pushCall(ctx, node, typeName);
+}
+
+/**
+ * Resolve a Java type node's text, unwrapping `generic_type` to its base name.
+ * Used wherever we need the bare type identifier (local var decls, object
+ * creation, parameter types).
+ */
+function resolveJavaTypeText(typeNode: TreeSitterNode): string | undefined {
+  return typeNode.type === 'generic_type' ? typeNode.child(0)?.text : typeNode.text;
 }
 
 const JAVA_PARENT_TYPES = [
@@ -300,31 +305,17 @@ function findJavaParentClass(node: TreeSitterNode): string | null {
 
 // ── Child extraction helpers ────────────────────────────────────────────────
 
+const JAVA_PARAM_TYPES = ['formal_parameter', 'spread_parameter'] as const;
+
 function extractJavaParameters(
   paramListNode: TreeSitterNode | null,
   typeMap?: Map<string, TypeMapEntry>,
 ): SubDeclaration[] {
-  const params: SubDeclaration[] = [];
-  if (!paramListNode) return params;
-  for (let i = 0; i < paramListNode.childCount; i++) {
-    const param = paramListNode.child(i);
-    if (!param) continue;
-    if (param.type === 'formal_parameter' || param.type === 'spread_parameter') {
-      const nameNode = param.childForFieldName('name');
-      if (nameNode) {
-        params.push({ name: nameNode.text, kind: 'parameter', line: param.startPosition.row + 1 });
-        if (typeMap) {
-          const typeNode = param.childForFieldName('type');
-          if (typeNode) {
-            const typeName =
-              typeNode.type === 'generic_type' ? typeNode.child(0)?.text : typeNode.text;
-            if (typeName) typeMap.set(nameNode.text, { type: typeName, confidence: 0.9 });
-          }
-        }
-      }
-    }
-  }
-  return params;
+  return extractSimpleParameters(paramListNode, {
+    paramTypes: JAVA_PARAM_TYPES,
+    typeMap,
+    resolveType: resolveJavaTypeText,
+  });
 }
 
 function extractClassFields(classNode: TreeSitterNode): SubDeclaration[] {
@@ -350,7 +341,7 @@ function extractFieldDeclarators(member: TreeSitterNode, fields: SubDeclaration[
       fields.push({
         name: nameNode.text,
         kind: 'property',
-        line: member.startPosition.row + 1,
+        line: nodeStartLine(member),
         visibility: vis,
       });
     }
diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts
index fef61a0c3..0fc9e46f1 100644
--- a/src/extractors/javascript.ts
+++ b/src/extractors/javascript.ts
@@ -17,6 +17,7 @@ import {
   findParentNode,
   MAX_WALK_DEPTH,
   nodeEndLine,
+  nodeStartLine,
   setTypeMapEntry,
 } from './helpers.js';
 
@@ -99,7 +100,7 @@ function handleFnCapture(c: Record<string, TreeSitterNode>, definitions: Definit
   definitions.push({
     name: c.fn_name!.text,
     kind: 'function',
-    line: c.fn_node!.startPosition.row + 1,
+    line: nodeStartLine(c.fn_node!),
     endLine: nodeEndLine(c.fn_node!),
     children: fnChildren.length > 0 ? fnChildren : undefined,
   });
@@ -108,7 +109,7 @@ function handleFnCapture(c: Record<string, TreeSitterNode>, definitions: Definit
 /** Handle variable_declarator with arrow_function / function_expression capture. */
 function handleVarFnCapture(c: Record<string, TreeSitterNode>, definitions: Definition[]): void {
   const declNode = c.varfn_name!.parent?.parent;
-  const line = declNode ? declNode.startPosition.row + 1 : c.varfn_name!.startPosition.row + 1;
+  const line = declNode ? nodeStartLine(declNode) : nodeStartLine(c.varfn_name!);
   const varFnChildren = extractParameters(c.varfn_value!);
   definitions.push({
     name: c.varfn_name!.text,
@@ -126,7 +127,7 @@ function handleClassCapture(
   classes: ClassRelation[],
 ): void {
   const className = c.cls_name!.text;
-  const startLine = c.cls_node!.startPosition.row + 1;
+  const startLine = nodeStartLine(c.cls_node!);
   const clsChildren = extractClassProperties(c.cls_node!);
   definitions.push({
     name: className,
@@ -157,7 +158,7 @@ function handleMethodCapture(c: Record<string, TreeSitterNode>, definitions: Def
   definitions.push({
     name: fullName,
     kind: 'method',
-    line: c.meth_node!.startPosition.row + 1,
+    line: nodeStartLine(c.meth_node!),
     endLine: nodeEndLine(c.meth_node!),
     children: methChildren.length > 0 ? methChildren : undefined,
     visibility: methVis,
@@ -170,7 +171,7 @@ function handleExportCapture(
   exps: Export[],
   imports: Import[],
 ): void {
-  const exportLine = c.exp_node!.startPosition.row + 1;
+  const exportLine = nodeStartLine(c.exp_node!);
   const decl = c.exp_node!.childForFieldName('declaration');
   if (decl) {
     const declType = decl.type;
@@ -211,7 +212,7 @@ function handleInterfaceCapture(
   definitions.push({
     name: ifaceName,
     kind: 'interface',
-    line: ifaceNode.startPosition.row + 1,
+    line: nodeStartLine(ifaceNode),
     endLine: nodeEndLine(ifaceNode),
   });
   const body =
@@ -226,7 +227,7 @@ function handleTypeCapture(c: Record<string, TreeSitterNode>, definitions: Defin
   definitions.push({
     name: c.type_name!.text,
     kind: 'type',
-    line: typeNode.startPosition.row + 1,
+    line: nodeStartLine(typeNode),
     endLine: nodeEndLine(typeNode),
   });
 }
@@ -239,7 +240,7 @@ function handleImportCapture(c: Record<string, TreeSitterNode>, imports: Import[
   imports.push({
     source: modPath,
     names,
-    line: impNode.startPosition.row + 1,
+    line: nodeStartLine(impNode),
     typeOnly: isTypeOnly,
   });
 }
@@ -272,7 +273,7 @@ function dispatchQueryMatch(
   } else if (c.callfn_node) {
     calls.push({
       name: c.callfn_name!.text,
-      line: c.callfn_node.startPosition.row + 1,
+      line: nodeStartLine(c.callfn_node),
     });
     calls.push(...extractCallbackReferenceCalls(c.callfn_node));
   } else if (c.callmem_node) {
@@ -288,7 +289,7 @@ function dispatchQueryMatch(
   } else if (c.newfn_node) {
     calls.push({
       name: c.newfn_name!.text,
-      line: c.newfn_node.startPosition.row + 1,
+      line: nodeStartLine(c.newfn_node),
     });
   } else if (c.newmem_node) {
     const callInfo = extractCallInfo(c.newmem_fn!, c.newmem_node);
@@ -411,7 +412,7 @@ function extractDestructuredBindingsWalk(node: TreeSitterNode, definitions: Defi
         if (nameN && nameN.type === 'object_pattern') {
           extractDestructuredBindings(
             nameN,
-            declNode.startPosition.row + 1,
+            nodeStartLine(declNode),
             nodeEndLine(declNode),
             definitions,
           );
@@ -445,7 +446,7 @@ function extractConstDeclarators(declNode: TreeSitterNode, definitions: Definiti
       definitions.push({
         name: nameN.text,
         kind: 'constant',
-        line: declNode.startPosition.row + 1,
+        line: nodeStartLine(declNode),
         endLine: nodeEndLine(declNode),
       });
     }
@@ -470,12 +471,12 @@ function extractDynamicImportsWalk(node: TreeSitterNode, imports: Import[]): voi
           imports.push({
             source: modPath,
             names,
-            line: node.startPosition.row + 1,
+            line: nodeStartLine(node),
             dynamicImport: true,
           });
         } else {
           debug(
-            `Skipping non-static dynamic import() at line ${node.startPosition.row + 1} (template literal or variable)`,
+            `Skipping non-static dynamic import() at line ${nodeStartLine(node)} (template literal or variable)`,
           );
         }
       }
@@ -497,7 +498,7 @@ function handleCommonJSAssignment(
   const leftText = left.text;
   if (!leftText.startsWith('module.exports') && leftText !== 'exports') return;
 
-  const assignLine = node.startPosition.row + 1;
+  const assignLine = nodeStartLine(node);
 
   // module.exports = require("…") — direct re-export
   if (right.type === 'call_expression') {
@@ -618,7 +619,7 @@ function handleFunctionDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
     ctx.definitions.push({
       name: nameNode.text,
       kind: 'function',
-      line: node.startPosition.row + 1,
+      line: nodeStartLine(node),
       endLine: nodeEndLine(node),
       children: fnChildren.length > 0 ? fnChildren : undefined,
     });
@@ -629,7 +630,7 @@ function handleClassDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
   const nameNode = node.childForFieldName('name');
   if (!nameNode) return;
   const className = nameNode.text;
-  const startLine = node.startPosition.row + 1;
+  const startLine = nodeStartLine(node);
   const clsChildren = extractClassProperties(node);
   ctx.definitions.push({
     name: className,
@@ -661,7 +662,7 @@ function handleMethodDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
     ctx.definitions.push({
       name: fullName,
       kind: 'method',
-      line: node.startPosition.row + 1,
+      line: nodeStartLine(node),
       endLine: nodeEndLine(node),
       children: methChildren.length > 0 ? methChildren : undefined,
       visibility: methVis,
@@ -675,7 +676,7 @@ function handleInterfaceDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
   ctx.definitions.push({
     name: nameNode.text,
     kind: 'interface',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
   });
   const body =
@@ -693,7 +694,7 @@ function handleTypeAliasDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
     ctx.definitions.push({
       name: nameNode.text,
       kind: 'type',
-      line: node.startPosition.row + 1,
+      line: nodeStartLine(node),
       endLine: nodeEndLine(node),
     });
   }
@@ -751,7 +752,7 @@ function handleVariableDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
           ctx.definitions.push({
             name: nameN.text,
             kind: 'function',
-            line: node.startPosition.row + 1,
+            line: nodeStartLine(node),
             endLine: nodeEndLine(valueN),
             children: varFnChildren.length > 0 ? varFnChildren : undefined,
           });
@@ -759,7 +760,7 @@ function handleVariableDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
           ctx.definitions.push({
             name: nameN.text,
             kind: 'constant',
-            line: node.startPosition.row + 1,
+            line: nodeStartLine(node),
             endLine: nodeEndLine(node),
           });
         } else if (isConst && nameN.type === 'object_pattern' && !hasFunctionScopeAncestor(node)) {
@@ -772,7 +773,7 @@ function handleVariableDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
           // handle_var_decl (Rust path) — skips bindings inside function bodies.
           extractDestructuredBindings(
             nameN,
-            node.startPosition.row + 1,
+            nodeStartLine(node),
             nodeEndLine(node),
             ctx.definitions,
           );
@@ -797,7 +798,7 @@ function handleEnumDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
           enumChildren.push({
             name: mName.text,
             kind: 'constant',
-            line: member.startPosition.row + 1,
+            line: nodeStartLine(member),
           });
         }
       }
@@ -806,7 +807,7 @@ function handleEnumDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
   ctx.definitions.push({
     name: nameNode.text,
     kind: 'enum',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     children: enumChildren.length > 0 ? enumChildren : undefined,
   });
@@ -832,7 +833,7 @@ function handleNewExpr(node: TreeSitterNode, ctx: ExtractorOutput): void {
   const ctor = node.childForFieldName('constructor') || node.child(1);
   if (!ctor) return;
   if (ctor.type === 'identifier') {
-    ctx.calls.push({ name: ctor.text, line: node.startPosition.row + 1 });
+    ctx.calls.push({ name: ctor.text, line: nodeStartLine(node) });
   } else if (ctor.type === 'member_expression') {
     const callInfo = extractCallInfo(ctor, node);
     if (callInfo) ctx.calls.push(callInfo);
@@ -847,10 +848,10 @@ function handleDynamicImportCall(node: TreeSitterNode, imports: Import[]): void
   if (strArg) {
     const modPath = strArg.text.replace(/['"]/g, '');
     const names = extractDynamicImportNames(node);
-    imports.push({ source: modPath, names, line: node.startPosition.row + 1, dynamicImport: true });
+    imports.push({ source: modPath, names, line: nodeStartLine(node), dynamicImport: true });
   } else {
     debug(
-      `Skipping non-static dynamic import() at line ${node.startPosition.row + 1} (template literal or variable)`,
+      `Skipping non-static dynamic import() at line ${nodeStartLine(node)} (template literal or variable)`,
     );
   }
 }
@@ -864,14 +865,14 @@ function handleImportStmt(node: TreeSitterNode, ctx: ExtractorOutput): void {
     ctx.imports.push({
       source: modPath,
       names,
-      line: node.startPosition.row + 1,
+      line: nodeStartLine(node),
       typeOnly: isTypeOnly,
     });
   }
 }
 
 function handleExportStmt(node: TreeSitterNode, ctx: ExtractorOutput): void {
-  const exportLine = node.startPosition.row + 1;
+  const exportLine = nodeStartLine(node);
   const decl = node.childForFieldName('declaration');
   if (decl) {
     const declType = decl.type;
@@ -923,7 +924,7 @@ function extractParameters(node: TreeSitterNode): SubDeclaration[] {
     if (!child) continue;
     const t = child.type;
     if (t === 'identifier') {
-      params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 });
+      params.push({ name: child.text, kind: 'parameter', line: nodeStartLine(child) });
     } else if (
       t === 'required_parameter' ||
       t === 'optional_parameter' ||
@@ -936,12 +937,12 @@ function extractParameters(node: TreeSitterNode): SubDeclaration[] {
         (nameNode.type === 'identifier' ||
           nameNode.type === 'shorthand_property_identifier_pattern')
       ) {
-        params.push({ name: nameNode.text, kind: 'parameter', line: child.startPosition.row + 1 });
+        params.push({ name: nameNode.text, kind: 'parameter', line: nodeStartLine(child) });
       }
     } else if (t === 'rest_pattern' || t === 'rest_element') {
       const nameNode = child.child(1) || child.childForFieldName('name');
       if (nameNode && nameNode.type === 'identifier') {
-        params.push({ name: nameNode.text, kind: 'parameter', line: child.startPosition.row + 1 });
+        params.push({ name: nameNode.text, kind: 'parameter', line: nodeStartLine(child) });
       }
     }
   }
@@ -975,7 +976,7 @@ function extractClassProperties(classNode: TreeSitterNode): SubDeclaration[] {
         props.push({
           name: nameNode.text,
           kind: 'property',
-          line: child.startPosition.row + 1,
+          line: nodeStartLine(child),
           visibility: vis,
         });
       }
@@ -1044,8 +1045,8 @@ function extractInterfaceMethods(
         definitions.push({
           name: `${interfaceName}.${nameNode.text}`,
           kind: 'method',
-          line: child.startPosition.row + 1,
-          endLine: child.endPosition.row + 1,
+          line: nodeStartLine(child),
+          endLine: nodeEndLine(child),
         });
       }
     }
@@ -1216,7 +1217,7 @@ function extractReceiverName(objNode: TreeSitterNode | null): string | undefined
 function extractCallInfo(fn: TreeSitterNode, callNode: TreeSitterNode): Call | null {
   const fnType = fn.type;
   if (fnType === 'identifier') {
-    return { name: fn.text, line: callNode.startPosition.row + 1 };
+    return { name: fn.text, line: nodeStartLine(callNode) };
   }
   if (fnType === 'member_expression') {
     return extractMemberExprCallInfo(fn, callNode);
@@ -1233,7 +1234,7 @@ function extractMemberExprCallInfo(fn: TreeSitterNode, callNode: TreeSitterNode)
   const prop = fn.childForFieldName('property');
   if (!prop) return null;
 
-  const callLine = callNode.startPosition.row + 1;
+  const callLine = nodeStartLine(callNode);
   const propText = prop.text;
 
   // .call()/.apply()/.bind() — dynamic invocation
@@ -1272,7 +1273,7 @@ function extractSubscriptCallInfo(fn: TreeSitterNode, callNode: TreeSitterNode):
       const receiver = extractReceiverName(obj);
       return {
         name: methodName,
-        line: callNode.startPosition.row + 1,
+        line: nodeStartLine(callNode),
         dynamic: true,
         receiver,
       };
@@ -1435,7 +1436,7 @@ function extractCallbackReferenceCalls(callNode: TreeSitterNode): Call[] {
   }
 
   const result: Call[] = [];
-  const callLine = callNode.startPosition.row + 1;
+  const callLine = nodeStartLine(callNode);
 
   for (let i = 0; i < args.childCount; i++) {
     const child = args.child(i);
@@ -1540,7 +1541,7 @@ function extractCallbackDefinition(
     return {
       name: `command:${firstWord}`,
       kind: 'function',
-      line: cb.startPosition.row + 1,
+      line: nodeStartLine(cb),
       endLine: nodeEndLine(cb),
     };
   }
@@ -1554,7 +1555,7 @@ function extractCallbackDefinition(
     return {
       name: `route:${method.toUpperCase()} ${strArg}`,
       kind: 'function',
-      line: cb.startPosition.row + 1,
+      line: nodeStartLine(cb),
       endLine: nodeEndLine(cb),
     };
   }
@@ -1568,7 +1569,7 @@ function extractCallbackDefinition(
     return {
       name: `event:${eventName}`,
       kind: 'function',
-      line: cb.startPosition.row + 1,
+      line: nodeStartLine(cb),
       endLine: nodeEndLine(cb),
     };
   }
diff --git a/src/extractors/julia.ts b/src/extractors/julia.ts
index 7667ec95d..d412fecda 100644
--- a/src/extractors/julia.ts
+++ b/src/extractors/julia.ts
@@ -1,5 +1,5 @@
 import type { ExtractorOutput, SubDeclaration, TreeSitterNode, TreeSitterTree } from '../types.js';
-import { findChild, nodeEndLine } from './helpers.js';
+import { findChild, nodeEndLine, nodeStartLine, pushCall, pushImport } from './helpers.js';
 
 /**
  * Extract symbols from Julia files.
@@ -76,7 +76,7 @@ function handleModuleDef(node: TreeSitterNode, ctx: ExtractorOutput): string | n
   ctx.definitions.push({
     name: nameNode.text,
     kind: 'module',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
   });
 
@@ -130,7 +130,7 @@ function handleFunctionDef(
       ctx.definitions.push({
         name,
         kind: 'function',
-        line: node.startPosition.row + 1,
+        line: nodeStartLine(node),
         endLine: nodeEndLine(node),
         children: params.length > 0 ? params : undefined,
       });
@@ -145,7 +145,7 @@ function handleFunctionDef(
   ctx.definitions.push({
     name: qualifyName(nameNode.text, currentModule),
     kind: 'function',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
   });
 }
@@ -169,7 +169,7 @@ function handleAssignment(
     ctx.definitions.push({
       name: qualifyName(funcNameNode.text, currentModule),
       kind: 'function',
-      line: node.startPosition.row + 1,
+      line: nodeStartLine(node),
       endLine: nodeEndLine(node),
       children: params.length > 0 ? params : undefined,
     });
@@ -253,14 +253,14 @@ function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
         children.push({
           name: fieldName.text,
           kind: 'property',
-          line: child.startPosition.row + 1,
+          line: nodeStartLine(child),
         });
       }
     } else if (child.type === 'identifier') {
       // Plain identifier fields (no type annotation) appear as direct
       // identifier children of struct_definition. The type_head is a
       // separate node so there is nothing to filter out here.
-      children.push({ name: child.text, kind: 'property', line: child.startPosition.row + 1 });
+      children.push({ name: child.text, kind: 'property', line: nodeStartLine(child) });
     }
   }
 
@@ -268,14 +268,14 @@ function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
     ctx.classes.push({
       name: structName,
       extends: supertypeNode.text,
-      line: node.startPosition.row + 1,
+      line: nodeStartLine(node),
     });
   }
 
   ctx.definitions.push({
     name: structName,
     kind: 'struct',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     children: children.length > 0 ? children : undefined,
   });
@@ -295,7 +295,7 @@ function handleAbstractDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
   ctx.definitions.push({
     name: nameNode.text,
     kind: 'type',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
   });
 }
@@ -319,7 +319,7 @@ function handleMacroDef(
   ctx.definitions.push({
     name,
     kind: 'function',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
   });
 }
@@ -363,11 +363,10 @@ function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void {
   }
 
   if (source) {
-    ctx.imports.push({
-      source,
-      names: names.length > 0 ? names : [source],
-      line: node.startPosition.row + 1,
-    });
+    // pushImport falls back to source basename for empty `names`. Julia module
+    // sources have no `/` separator, so the basename equals `source` — matching
+    // the previous explicit `[source]` fallback.
+    pushImport(ctx, node, source, names);
   }
 }
 
@@ -388,21 +387,26 @@ function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
   if (!funcNode) return;
 
   if (funcNode.type === 'identifier') {
-    ctx.calls.push({ name: funcNode.text, line: node.startPosition.row + 1 });
+    pushCall(ctx, node, funcNode.text);
   } else if (funcNode.type === 'field_expression' || funcNode.type === 'scoped_identifier') {
     const parts = funcNode.text.split('.');
     if (parts.length >= 2) {
-      ctx.calls.push({
-        name: parts[parts.length - 1]!,
+      pushCall(ctx, node, parts[parts.length - 1]!, {
         receiver: parts.slice(0, -1).join('.'),
-        line: node.startPosition.row + 1,
       });
     } else {
-      ctx.calls.push({ name: funcNode.text, line: node.startPosition.row + 1 });
+      pushCall(ctx, node, funcNode.text);
     }
   }
 }
 
+const JULIA_PARAM_WRAPPER_TYPES = new Set([
+  'typed_parameter',
+  'typed_expression',
+  'optional_parameter',
+  'default_parameter',
+]);
+
 function extractJuliaParams(callExpr: TreeSitterNode): SubDeclaration[] {
   const params: SubDeclaration[] = [];
   const argList = findChild(callExpr, 'argument_list') || findChild(callExpr, 'tuple_expression');
@@ -412,25 +416,14 @@ function extractJuliaParams(callExpr: TreeSitterNode): SubDeclaration[] {
     const child = argList.child(i);
     if (!child) continue;
     if (child.type === 'identifier') {
-      params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 });
-    }
-    if (child.type === 'typed_parameter' || child.type === 'typed_expression') {
-      const nameNode = findChild(child, 'identifier');
-      if (nameNode) {
-        params.push({
-          name: nameNode.text,
-          kind: 'parameter',
-          line: child.startPosition.row + 1,
-        });
-      }
-    }
-    if (child.type === 'optional_parameter' || child.type === 'default_parameter') {
+      params.push({ name: child.text, kind: 'parameter', line: nodeStartLine(child) });
+    } else if (JULIA_PARAM_WRAPPER_TYPES.has(child.type)) {
       const nameNode = findChild(child, 'identifier');
       if (nameNode) {
         params.push({
           name: nameNode.text,
           kind: 'parameter',
-          line: child.startPosition.row + 1,
+          line: nodeStartLine(child),
         });
       }
     }
diff --git a/src/extractors/r.ts b/src/extractors/r.ts
index a6edac135..ef0a863e0 100644
--- a/src/extractors/r.ts
+++ b/src/extractors/r.ts
@@ -1,5 +1,13 @@
 import type { ExtractorOutput, SubDeclaration, TreeSitterNode, TreeSitterTree } from '../types.js';
-import { findChild, nodeEndLine } from './helpers.js';
+import {
+  findChild,
+  findFirstChildOfTypes,
+  nodeEndLine,
+  nodeStartLine,
+  pushCall,
+  pushImport,
+  stripQuotes,
+} from './helpers.js';
 
 /**
  * Extract symbols from R files.
@@ -58,7 +66,7 @@ function handleBinaryOp(node: TreeSitterNode, ctx: ExtractorOutput): void {
     ctx.definitions.push({
       name: lhs.text,
       kind: 'function',
-      line: node.startPosition.row + 1,
+      line: nodeStartLine(node),
       endLine: nodeEndLine(node),
       children: params.length > 0 ? params : undefined,
     });
@@ -68,7 +76,7 @@ function handleBinaryOp(node: TreeSitterNode, ctx: ExtractorOutput): void {
       ctx.definitions.push({
         name: lhs.text,
         kind: 'variable',
-        line: node.startPosition.row + 1,
+        line: nodeStartLine(node),
         endLine: nodeEndLine(node),
       });
     }
@@ -87,14 +95,14 @@ function extractRParams(funcDef: TreeSitterNode): SubDeclaration[] {
       // parameter node has name and possibly default value
       const nameNode = child.childForFieldName('name') || findChild(child, 'identifier');
       if (nameNode) {
-        params.push({ name: nameNode.text, kind: 'parameter', line: child.startPosition.row + 1 });
+        params.push({ name: nameNode.text, kind: 'parameter', line: nodeStartLine(child) });
       } else if (child.text && child.text !== ',' && child.text !== '(' && child.text !== ')') {
         // Some grammars have the param as plain text
-        params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 });
+        params.push({ name: child.text, kind: 'parameter', line: nodeStartLine(child) });
       }
     }
     if (child.type === 'identifier') {
-      params.push({ name: child.text, kind: 'parameter', line: child.startPosition.row + 1 });
+      params.push({ name: child.text, kind: 'parameter', line: nodeStartLine(child) });
     }
   }
   return params;
@@ -137,15 +145,13 @@ function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
 
   // Regular call
   if (funcNode.type === 'identifier') {
-    ctx.calls.push({ name: funcName, line: node.startPosition.row + 1 });
+    pushCall(ctx, node, funcName);
   } else if (funcNode.type === 'namespace_operator') {
     // pkg::func
     const parts = funcName.split('::');
     if (parts.length >= 2) {
-      ctx.calls.push({
-        name: parts[parts.length - 1]!,
+      pushCall(ctx, node, parts[parts.length - 1]!, {
         receiver: parts.slice(0, -1).join('::'),
-        line: node.startPosition.row + 1,
       });
     }
   }
@@ -164,20 +170,12 @@ function handleLibraryCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
         const arg = child.child(j);
         if (!arg) continue;
         if (arg.type === 'identifier') {
-          ctx.imports.push({
-            source: arg.text,
-            names: [arg.text],
-            line: node.startPosition.row + 1,
-          });
+          pushImport(ctx, node, arg.text, [arg.text]);
           return;
         }
         if (arg.type === 'string' || arg.type === 'string_content') {
-          const text = arg.text.replace(/^["']|["']$/g, '');
-          ctx.imports.push({
-            source: text,
-            names: [text],
-            line: node.startPosition.row + 1,
-          });
+          const text = stripQuotes(arg.text);
+          pushImport(ctx, node, text, [text]);
           return;
         }
         // Argument might be wrapped
@@ -202,12 +200,8 @@ function handleLibraryCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
             }
           }
           if (pick) {
-            const text = pick.text.replace(/^["']|["']$/g, '');
-            ctx.imports.push({
-              source: text,
-              names: [text],
-              line: node.startPosition.row + 1,
-            });
+            const text = stripQuotes(pick.text);
+            pushImport(ctx, node, text, [text]);
             return;
           }
         }
@@ -220,11 +214,7 @@ function handleSourceCall(node: TreeSitterNode, ctx: ExtractorOutput): void {
   // source() only accepts string literals — `source(varname)` is not an import.
   const path = firstStringArgument(node);
   if (path === null) return;
-  ctx.imports.push({
-    source: path,
-    names: ['source'],
-    line: node.startPosition.row + 1,
-  });
+  pushImport(ctx, node, path, ['source']);
 }
 
 function handleSetClass(node: TreeSitterNode, ctx: ExtractorOutput): void {
@@ -233,7 +223,7 @@ function handleSetClass(node: TreeSitterNode, ctx: ExtractorOutput): void {
   ctx.definitions.push({
     name,
     kind: 'class',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
   });
 }
@@ -244,7 +234,7 @@ function handleSetGeneric(node: TreeSitterNode, ctx: ExtractorOutput): void {
   ctx.definitions.push({
     name,
     kind: 'function',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
   });
 }
@@ -258,7 +248,7 @@ function handleSetGeneric(node: TreeSitterNode, ctx: ExtractorOutput): void {
 function handleSetMethod(node: TreeSitterNode, ctx: ExtractorOutput): void {
   const name = firstStringArgument(node);
   if (name === null) return;
-  ctx.calls.push({ name, line: node.startPosition.row + 1 });
+  pushCall(ctx, node, name);
 }
 
 // tree-sitter-r wraps each positional argument in an `argument` node that
@@ -266,28 +256,20 @@ function handleSetMethod(node: TreeSitterNode, ctx: ExtractorOutput): void {
 // must be unwrapped — checking `child.type === 'string'` directly misses it.
 // Mirrors `first_argument_value` in the Rust extractor for parity.
 function firstStringArgument(node: TreeSitterNode): string | null {
-  for (let i = 0; i < node.childCount; i++) {
-    const child = node.child(i);
-    if (!child || child.type !== 'arguments') continue;
-    for (let j = 0; j < child.childCount; j++) {
-      const arg = child.child(j);
-      if (!arg) continue;
-      if (arg.type === 'string') {
-        return stripQuotes(arg.text);
-      }
-      if (arg.type === 'argument') {
-        const valueNode = arg.childForFieldName('value');
-        if (valueNode && valueNode.type === 'string') return stripQuotes(valueNode.text);
-        for (let k = 0; k < arg.childCount; k++) {
-          const inner = arg.child(k);
-          if (inner && inner.type === 'string') return stripQuotes(inner.text);
-        }
-      }
+  const args = findFirstChildOfTypes(node, ['arguments']);
+  if (!args) return null;
+  for (let j = 0; j < args.childCount; j++) {
+    const arg = args.child(j);
+    if (!arg) continue;
+    if (arg.type === 'string') {
+      return stripQuotes(arg.text);
+    }
+    if (arg.type === 'argument') {
+      const valueNode = arg.childForFieldName('value');
+      if (valueNode && valueNode.type === 'string') return stripQuotes(valueNode.text);
+      const innerStr = findFirstChildOfTypes(arg, ['string']);
+      if (innerStr) return stripQuotes(innerStr.text);
     }
   }
   return null;
 }
-
-function stripQuotes(text: string): string {
-  return text.replace(/^["']|["']$/g, '');
-}
diff --git a/src/extractors/solidity.ts b/src/extractors/solidity.ts
index c68043966..8626b29e0 100644
--- a/src/extractors/solidity.ts
+++ b/src/extractors/solidity.ts
@@ -1,15 +1,14 @@
-import type {
-  Call,
-  ExtractorOutput,
-  SubDeclaration,
-  TreeSitterNode,
-  TreeSitterTree,
-} from '../types.js';
+import type { ExtractorOutput, SubDeclaration, TreeSitterNode, TreeSitterTree } from '../types.js';
 import {
   extractModifierVisibility,
+  extractSimpleParameters,
   findChild,
+  findFirstChildOfTypes,
   findParentNode,
   nodeEndLine,
+  nodeStartLine,
+  pushCall,
+  pushImport,
   stripQuotes,
 } from './helpers.js';
 
@@ -103,7 +102,7 @@ function handleContractDecl(
   ctx.definitions.push({
     name,
     kind,
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     children: members.length > 0 ? members : undefined,
   });
@@ -125,7 +124,7 @@ function extractContractMembers(body: TreeSitterNode): SubDeclaration[] {
 
 /** Map a single contract body child to a SubDeclaration, or null if not a recognized member. */
 function extractContractMember(child: TreeSitterNode): SubDeclaration | null {
-  const line = child.startPosition.row + 1;
+  const line = nodeStartLine(child);
   switch (child.type) {
     case 'function_definition': {
       const fnName = child.childForFieldName('name');
@@ -172,7 +171,7 @@ function extractInheritance(node: TreeSitterNode, name: string, ctx: ExtractorOu
       const child = inheritance.child(j);
       if (!child) continue;
       if (child.type === 'user_defined_type' || child.type === 'identifier') {
-        ctx.classes.push({ name, extends: child.text, line: node.startPosition.row + 1 });
+        ctx.classes.push({ name, extends: child.text, line: nodeStartLine(node) });
       }
     }
   }
@@ -191,19 +190,16 @@ function handleStructDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
         members.push({
           name: memberName.text,
           kind: 'property',
-          line: child.startPosition.row + 1,
+          line: nodeStartLine(child),
         });
       }
     }
   }
 
-  const parent = findParentNode(node, SOL_PARENT_TYPES);
-  const fullName = parent ? `${parent}.${nameNode.text}` : nameNode.text;
-
   ctx.definitions.push({
-    name: fullName,
+    name: qualifyWithParent(node, nameNode.text),
     kind: 'struct',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     children: members.length > 0 ? members : undefined,
   });
@@ -217,17 +213,14 @@ function handleEnumDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
   for (let i = 0; i < node.childCount; i++) {
     const child = node.child(i);
     if (child && child.type === 'enum_value') {
-      members.push({ name: child.text, kind: 'constant', line: child.startPosition.row + 1 });
+      members.push({ name: child.text, kind: 'constant', line: nodeStartLine(child) });
     }
   }
 
-  const parent = findParentNode(node, SOL_PARENT_TYPES);
-  const fullName = parent ? `${parent}.${nameNode.text}` : nameNode.text;
-
   ctx.definitions.push({
-    name: fullName,
+    name: qualifyWithParent(node, nameNode.text),
     kind: 'enum',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     children: members.length > 0 ? members : undefined,
   });
@@ -244,7 +237,7 @@ function handleFunctionDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
   ctx.definitions.push({
     name: fullName,
     kind,
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     children: params.length > 0 ? params : undefined,
     visibility: extractSolVisibility(node),
@@ -254,13 +247,10 @@ function handleFunctionDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
 function handleModifierDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
   const nameNode = node.childForFieldName('name');
   if (!nameNode) return;
-  const parent = findParentNode(node, SOL_PARENT_TYPES);
-  const fullName = parent ? `${parent}.${nameNode.text}` : nameNode.text;
-
   ctx.definitions.push({
-    name: fullName,
+    name: qualifyWithParent(node, nameNode.text),
     kind: 'function',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     decorators: ['modifier'],
   });
@@ -269,13 +259,10 @@ function handleModifierDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
 function handleEventDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
   const nameNode = node.childForFieldName('name');
   if (!nameNode) return;
-  const parent = findParentNode(node, SOL_PARENT_TYPES);
-  const fullName = parent ? `${parent}.${nameNode.text}` : nameNode.text;
-
   ctx.definitions.push({
-    name: fullName,
+    name: qualifyWithParent(node, nameNode.text),
     kind: 'type',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     decorators: ['event'],
   });
@@ -284,13 +271,10 @@ function handleEventDef(node: TreeSitterNode, ctx: ExtractorOutput): void {
 function handleErrorDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
   const nameNode = node.childForFieldName('name');
   if (!nameNode) return;
-  const parent = findParentNode(node, SOL_PARENT_TYPES);
-  const fullName = parent ? `${parent}.${nameNode.text}` : nameNode.text;
-
   ctx.definitions.push({
-    name: fullName,
+    name: qualifyWithParent(node, nameNode.text),
     kind: 'type',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     decorators: ['error'],
   });
@@ -299,18 +283,21 @@ function handleErrorDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
 function handleStateVarDecl(node: TreeSitterNode, ctx: ExtractorOutput): void {
   const nameNode = node.childForFieldName('name');
   if (!nameNode) return;
-  const parent = findParentNode(node, SOL_PARENT_TYPES);
-  const fullName = parent ? `${parent}.${nameNode.text}` : nameNode.text;
-
   ctx.definitions.push({
-    name: fullName,
+    name: qualifyWithParent(node, nameNode.text),
     kind: 'variable',
-    line: node.startPosition.row + 1,
+    line: nodeStartLine(node),
     endLine: nodeEndLine(node),
     visibility: extractSolVisibility(node),
   });
 }
 
+/** Qualify `name` with the nearest contract/interface/library, if any. */
+function qualifyWithParent(node: TreeSitterNode, name: string): string {
+  const parent = findParentNode(node, SOL_PARENT_TYPES);
+  return parent ? `${parent}.${name}` : name;
+}
+
 function handleImportDirective(node: TreeSitterNode, ctx: ExtractorOutput): void {
   // import "path"; or import { X } from "path"; or import "path" as Alias;
   for (let i = 0; i < node.childCount; i++) {
@@ -328,22 +315,17 @@ function handleImportDirective(node: TreeSitterNode, ctx: ExtractorOutput): void
           if (id) names.push(id.text);
         }
       }
-      ctx.imports.push({
-        source,
-        names: names.length > 0 ? names : ['*'],
-        line: node.startPosition.row + 1,
-      });
+      // Preserve the explicit `['*']` fallback — pushImport's default uses the
+      // source basename, but Solidity's convention here is to mark unqualified
+      // imports as `*`.
+      pushImport(ctx, node, source, names.length > 0 ? names : ['*']);
       return;
     }
     // source_import: handles `import * as X from "path"`
     if (child.type === 'source_import' || child.type === 'import_clause') {
-      const strNode = findChild(child, 'string') || findChild(child, 'string_literal');
+      const strNode = findFirstChildOfTypes(child, ['string', 'string_literal']);
       if (strNode) {
-        ctx.imports.push({
-          source: stripQuotes(strNode.text),
-          names: ['*'],
-          line: node.startPosition.row + 1,
-        });
+        pushImport(ctx, node, stripQuotes(strNode.text), ['*']);
         return;
       }
     }
@@ -354,35 +336,25 @@ function handleCallExpression(node: TreeSitterNode, ctx: ExtractorOutput): void
   const funcNode = node.childForFieldName('function') || node.childForFieldName('callee');
   if (!funcNode) return;
 
-  const call: Call = { name: '', line: node.startPosition.row + 1 };
+  let name = '';
+  let receiver: string | undefined;
   if (funcNode.type === 'member_expression' || funcNode.type === 'member_access') {
     const prop = funcNode.childForFieldName('property') || funcNode.childForFieldName('member');
     const obj = funcNode.childForFieldName('object') || funcNode.childForFieldName('expression');
-    if (prop) call.name = prop.text;
-    if (obj) call.receiver = obj.text;
+    if (prop) name = prop.text;
+    if (obj) receiver = obj.text;
   } else {
-    call.name = funcNode.text;
+    name = funcNode.text;
   }
-  if (call.name) ctx.calls.push(call);
+  if (name) pushCall(ctx, node, name, receiver !== undefined ? { receiver } : {});
 }
 
 // ── Helpers ────────────────────────────────────────────────────────────────
 
 function extractSolParams(funcNode: TreeSitterNode): SubDeclaration[] {
-  const params: SubDeclaration[] = [];
   const paramList =
     funcNode.childForFieldName('parameters') || findChild(funcNode, 'parameter_list');
-  if (!paramList) return params;
-
-  for (let i = 0; i < paramList.childCount; i++) {
-    const param = paramList.child(i);
-    if (!param || param.type !== 'parameter') continue;
-    const nameNode = param.childForFieldName('name');
-    if (nameNode) {
-      params.push({ name: nameNode.text, kind: 'parameter', line: param.startPosition.row + 1 });
-    }
-  }
-  return params;
+  return extractSimpleParameters(paramList, { paramTypes: ['parameter'] });
 }
 
 function extractSolVisibility(

From 5abe6ad55f1d1e1c4da06bec15f0aba8637ff537 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 11:38:46 -0600
Subject: [PATCH 04/27] refactor(extractors): break elixir param/map binding
 cycle

Convert collectElixirParamIdentifiers from mutual-recursion with
collectElixirMapBindings into a single iterative worklist traversal.
Map/list/tuple/binary-operator dispatch is now done via three leaf
helpers that push child nodes onto the worklist instead of calling
back into the main function. This removes the function-level cycle
flagged by codegraph (9 -> 8 cycles) without changing extractor
semantics. docs check acknowledged: internal refactor only.
---
 src/extractors/elixir.ts | 146 ++++++++++++++++++++++-----------------
 1 file changed, 83 insertions(+), 63 deletions(-)

diff --git a/src/extractors/elixir.ts b/src/extractors/elixir.ts
index cb1f8ff05..1b547645c 100644
--- a/src/extractors/elixir.ts
+++ b/src/extractors/elixir.ts
@@ -197,74 +197,94 @@ function extractElixirParams(defCallNode: TreeSitterNode): SubDeclaration[] {
 }
 
 /**
- * Recursively walk a parameter pattern and emit each bound identifier as a
- * `parameter` child. Handles bare identifiers, default-value `a \\ default`,
- * list-cons `[head | tail]`, list `[a, b, c]`, tuple `{x, y}`, and
- * map / struct destructuring (`%{k: v}`, `%Foo{k: v}`).
+ * Walk a parameter pattern and emit each bound identifier as a `parameter`
+ * child. Handles bare identifiers, default-value `a \\ default`, list-cons
+ * `[head | tail]`, list `[a, b, c]`, tuple `{x, y}`, and map / struct
+ * destructuring (`%{k: v}`, `%Foo{k: v}`).
+ *
+ * Implemented as an iterative worklist (rather than recursion + helpers) so
+ * the call graph has no function-level cycle: only one function performs the
+ * traversal and it invokes only leaf helpers (`pushSubNodes`, `pushMapValues`).
  */
-function collectElixirParamIdentifiers(node: TreeSitterNode, out: SubDeclaration[]): void {
-  switch (node.type) {
-    case 'identifier':
-      out.push({ name: node.text, kind: 'parameter', line: node.startPosition.row + 1 });
-      return;
-    case 'binary_operator': {
-      // `name \\ default` (default-value) binds the left operand only.
-      // `head | tail` (list-cons, appears inside a `list` pattern) binds both operands.
-      const op = node.child(1);
-      if (!op) return;
-      if (op.type === '\\\\') {
-        const left = node.child(0);
-        if (left) collectElixirParamIdentifiers(left, out);
-        return;
-      }
-      if (op.type === '|') {
-        const left = node.child(0);
-        const right = node.child(2);
-        if (left) collectElixirParamIdentifiers(left, out);
-        if (right) collectElixirParamIdentifiers(right, out);
-        return;
-      }
-      return;
+function collectElixirParamIdentifiers(root: TreeSitterNode, out: SubDeclaration[]): void {
+  const stack: TreeSitterNode[] = [root];
+  while (stack.length > 0) {
+    const node = stack.pop();
+    if (!node) continue;
+    switch (node.type) {
+      case 'identifier':
+        out.push({ name: node.text, kind: 'parameter', line: node.startPosition.row + 1 });
+        break;
+      case 'binary_operator':
+        pushElixirBinaryOperatorOperands(node, stack);
+        break;
+      case 'list':
+      case 'tuple':
+        pushElixirSequenceItems(node, stack);
+        break;
+      case 'map':
+        pushElixirMapValues(node, stack);
+        break;
     }
-    case 'list':
-      // `[a, b, c]` or `[head | tail]` — walk children, skipping punctuation. The
-      // `|` cons case is handled by the `binary_operator` arm when we recurse.
-      for (let i = 0; i < node.childCount; i++) {
-        const c = node.child(i);
-        if (!c || c.type === '[' || c.type === ']' || c.type === ',') continue;
-        collectElixirParamIdentifiers(c, out);
-      }
-      return;
-    case 'tuple':
-      for (let i = 0; i < node.childCount; i++) {
-        const c = node.child(i);
-        if (!c || c.type === '{' || c.type === '}' || c.type === ',') continue;
-        collectElixirParamIdentifiers(c, out);
-      }
-      return;
-    case 'map':
-      // `%{k: v}` or `%Foo{k: v}` — walk map_content > keywords > pair and emit each
-      // pair's value side (the bound name). The struct alias (`Foo`) is a type, not a
-      // bound identifier, so the leading `struct` child is intentionally skipped.
-      for (let i = 0; i < node.childCount; i++) {
-        const c = node.child(i);
-        if (c && c.type === 'map_content') collectElixirMapBindings(c, out);
-      }
-      return;
   }
 }
 
-function collectElixirMapBindings(content: TreeSitterNode, out: SubDeclaration[]): void {
-  for (let i = 0; i < content.childCount; i++) {
-    const kws = content.child(i);
-    if (!kws || kws.type !== 'keywords') continue;
-    for (let j = 0; j < kws.childCount; j++) {
-      const pair = kws.child(j);
-      if (!pair || pair.type !== 'pair') continue;
-      for (let k = 0; k < pair.childCount; k++) {
-        const part = pair.child(k);
-        if (!part || part.type === 'keyword') continue;
-        collectElixirParamIdentifiers(part, out);
+/**
+ * Push the binding-relevant operands of a `binary_operator` parameter onto the
+ * worklist:
+ * - `name \\ default` (default-value) binds the left operand only.
+ * - `head | tail`     (list-cons, appears inside a `list` pattern) binds both.
+ */
+function pushElixirBinaryOperatorOperands(node: TreeSitterNode, stack: TreeSitterNode[]): void {
+  const op = node.child(1);
+  if (!op) return;
+  if (op.type === '\\\\') {
+    const left = node.child(0);
+    if (left) stack.push(left);
+    return;
+  }
+  if (op.type === '|') {
+    const right = node.child(2);
+    const left = node.child(0);
+    if (right) stack.push(right);
+    if (left) stack.push(left);
+  }
+}
+
+/**
+ * Push the binding-relevant elements of a `list` or `tuple` parameter onto
+ * the worklist, skipping punctuation tokens.
+ */
+function pushElixirSequenceItems(node: TreeSitterNode, stack: TreeSitterNode[]): void {
+  for (let i = 0; i < node.childCount; i++) {
+    const c = node.child(i);
+    if (!c) continue;
+    const t = c.type;
+    if (t === '[' || t === ']' || t === '{' || t === '}' || t === ',') continue;
+    stack.push(c);
+  }
+}
+
+/**
+ * Push the value side of every pair in a `map` or `%Foo{...}` parameter onto
+ * the worklist. The struct alias (`Foo`) is a type, not a bound identifier, so
+ * the leading `struct` child is intentionally skipped.
+ */
+function pushElixirMapValues(node: TreeSitterNode, stack: TreeSitterNode[]): void {
+  for (let i = 0; i < node.childCount; i++) {
+    const content = node.child(i);
+    if (!content || content.type !== 'map_content') continue;
+    for (let j = 0; j < content.childCount; j++) {
+      const kws = content.child(j);
+      if (!kws || kws.type !== 'keywords') continue;
+      for (let k = 0; k < kws.childCount; k++) {
+        const pair = kws.child(k);
+        if (!pair || pair.type !== 'pair') continue;
+        for (let p = 0; p < pair.childCount; p++) {
+          const part = pair.child(p);
+          if (!part || part.type === 'keyword') continue;
+          stack.push(part);
+        }
       }
     }
   }

From 0d687c4f0741a040412f3fd18d870bde60d90c0f Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 11:52:28 -0600
Subject: [PATCH 05/27] refactor(extractors-rs): extend shared helpers for
 identifier and symbol collection

---
 .../codegraph-core/src/extractors/helpers.rs  | 289 +++++++++++++++++-
 1 file changed, 288 insertions(+), 1 deletion(-)

diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs
index 7ae7b4bf9..4ee3666c2 100644
--- a/crates/codegraph-core/src/extractors/helpers.rs
+++ b/crates/codegraph-core/src/extractors/helpers.rs
@@ -1,4 +1,4 @@
-use crate::types::{AstNode, Definition, FileSymbols};
+use crate::types::{AstNode, Call, Definition, FileSymbols, Import, TypeMapEntry};
 use tree_sitter::Node;
 
 // Re-export so extractors that `use super::helpers::*` still see it.
@@ -40,6 +40,51 @@ pub fn find_child<'a>(node: &Node<'a>, kind: &str) -> Option<Node<'a>> {
     None
 }
 
+/// Find the first child whose type is in `kinds`. Useful when several
+/// grammar variants name the same conceptual node differently (e.g.
+/// `string` vs `string_literal`). Returns the first match in document
+/// order, or `None`.
+///
+/// Mirrors `findFirstChildOfTypes` in `src/extractors/helpers.ts`.
+pub fn find_first_child_of_types<'a>(node: &Node<'a>, kinds: &[&str]) -> Option<Node<'a>> {
+    for i in 0..node.child_count() {
+        if let Some(child) = node.child(i) {
+            if kinds.contains(&child.kind()) {
+                return Some(child);
+            }
+        }
+    }
+    None
+}
+
+/// Common punctuation tokens — handy as a `skip_kinds` set for
+/// [`iter_children`]. Mirrors `PUNCTUATION_TOKENS` in
+/// `src/extractors/helpers.ts`.
+pub const PUNCTUATION_TOKENS: &[&str] = &[
+    ",", ";", "(", ")", "[", "]", "{", "}", ":", ".",
+];
+
+/// Iterate the direct children of `node` in document order, skipping
+/// nulls and tokens whose `kind()` is in `skip_kinds`. Mirrors the
+/// common `for i in 0..node.child_count() { let c = node.child(i); ... }`
+/// idiom while letting callers filter out grammar punctuation
+/// (`,`, `(`, `{`, etc.).
+///
+/// Mirrors `iterChildren` in `src/extractors/helpers.ts`.
+pub fn iter_children<'a>(
+    node: &'a Node<'a>,
+    skip_kinds: &'a [&'a str],
+) -> impl Iterator<Item = Node<'a>> + 'a {
+    (0..node.child_count()).filter_map(move |i| {
+        let child = node.child(i)?;
+        if skip_kinds.contains(&child.kind()) {
+            None
+        } else {
+            Some(child)
+        }
+    })
+}
+
 /// Find a parent of a given type, walking up the tree.
 pub fn find_parent_of_type<'a>(node: &Node<'a>, kind: &str) -> Option<Node<'a>> {
     let mut current = node.parent();
@@ -748,3 +793,245 @@ fn extract_child_expression_text(node: &Node, source: &[u8]) -> Option<String> {
     }
     Some(truncate(node_text(node, source), AST_TEXT_MAX))
 }
+
+// ── Output-push helpers ────────────────────────────────────────────────────
+//
+// Most extractors finish with `symbols.calls.push(Call { name, line: start_line(node), ... })`
+// or `symbols.imports.push(Import::new(source, names, start_line(node)))`. Centralising
+// the construction keeps `line` derivation consistent and removes the many
+// hand-rolled `start_position().row + 1` literals scattered across language extractors.
+
+/// Append a [`Call`] to `symbols`, using `start_line(node)` for the line and
+/// the given optional `receiver`/`dynamic` flags. Skips no-op pushes when
+/// `name` is empty.
+///
+/// Mirrors `pushCall` in `src/extractors/helpers.ts`.
+pub fn push_call(
+    symbols: &mut FileSymbols,
+    node: &Node,
+    name: impl Into<String>,
+    receiver: Option<String>,
+    dynamic: Option<bool>,
+) {
+    let name = name.into();
+    if name.is_empty() {
+        return;
+    }
+    symbols.calls.push(Call {
+        name,
+        line: start_line(node),
+        dynamic,
+        receiver,
+    });
+}
+
+/// Append a simple [`Call`] (no receiver, no dynamic flag) to `symbols`.
+/// Convenience wrapper around [`push_call`] for the common case shared by
+/// most C-family and procedural-language extractors.
+pub fn push_simple_call(symbols: &mut FileSymbols, node: &Node, name: impl Into<String>) {
+    push_call(symbols, node, name, None, None);
+}
+
+/// Append an [`Import`] to `symbols`, using `start_line(node)` for the
+/// line. If `names` is empty, the last `/`-segment of `source` is used as
+/// a single-name fallback — matching the convention used by gleam, julia,
+/// and similar module-path imports.
+///
+/// The `customize` closure receives a mutable reference to the freshly
+/// constructed `Import` so callers can flip language-specific flags
+/// (`c_include`, `python_import`, `bash_source`, etc.) before the entry
+/// is pushed. Pass `|_| {}` when no flags are needed.
+///
+/// Mirrors `pushImport` in `src/extractors/helpers.ts`.
+pub fn push_import<F>(
+    symbols: &mut FileSymbols,
+    node: &Node,
+    source: impl Into<String>,
+    names: Vec<String>,
+    customize: F,
+) where
+    F: FnOnce(&mut Import),
+{
+    let source = source.into();
+    if source.is_empty() {
+        return;
+    }
+    let resolved_names = if names.is_empty() {
+        let fallback = source.rsplit('/').next().unwrap_or(source.as_str());
+        vec![fallback.to_string()]
+    } else {
+        names
+    };
+    let mut imp = Import::new(source, resolved_names, start_line(node));
+    customize(&mut imp);
+    symbols.imports.push(imp);
+}
+
+// ── Parameter extraction ───────────────────────────────────────────────────
+
+/// Configuration for [`extract_simple_parameters`].
+///
+/// Collapses the boilerplate in `extract_*_params` helpers across
+/// java / julia / gleam / solidity / r / etc. — each one walks a
+/// parameter list, matches a parameter-node kind, reads the `name`
+/// field, and pushes a [`Definition`] with `kind: "parameter"`.
+pub struct ExtractParametersOptions<'a> {
+    /// Tree-sitter node kinds that mark a single parameter node
+    /// (e.g. `formal_parameter`, `parameter`).
+    pub param_kinds: &'a [&'a str],
+    /// Field name on each parameter that holds the bound identifier.
+    /// Defaults to `Some("name")`. Pass `None` to use the parameter
+    /// node itself when its kind is in `param_kinds` and it has no
+    /// `name` field (e.g. R's bare `identifier`).
+    pub name_field: Option<&'a str>,
+    /// If true, when `name_field` lookup fails fall back to the first
+    /// `identifier` child of the parameter. Useful for gleam /
+    /// solidity-style grammars.
+    pub fallback_to_identifier: bool,
+}
+
+impl<'a> Default for ExtractParametersOptions<'a> {
+    fn default() -> Self {
+        Self {
+            param_kinds: &[],
+            name_field: Some("name"),
+            fallback_to_identifier: false,
+        }
+    }
+}
+
+/// Resolve the identifier node that names a parameter. Used by
+/// [`extract_simple_parameters`]; exposed so language-specific
+/// extractors can reuse the same lookup logic in custom loops.
+///
+/// Mirrors `resolveParamName` in `src/extractors/helpers.ts`.
+pub fn resolve_param_name<'a>(
+    param_node: &Node<'a>,
+    name_field: Option<&str>,
+    fallback_to_identifier: bool,
+) -> Option<Node<'a>> {
+    let Some(field) = name_field else {
+        return Some(*param_node);
+    };
+    if let Some(named) = param_node.child_by_field_name(field) {
+        return Some(named);
+    }
+    if fallback_to_identifier {
+        return find_child(param_node, "identifier");
+    }
+    None
+}
+
+/// Extract parameters from a parameter-list node using a uniform
+/// pattern. Returns an empty vec when `param_list` is `None`.
+///
+/// Mirrors `extractSimpleParameters` in `src/extractors/helpers.ts`.
+pub fn extract_simple_parameters(
+    param_list: Option<Node>,
+    source: &[u8],
+    options: &ExtractParametersOptions,
+) -> Vec<Definition> {
+    let mut params = Vec::new();
+    let Some(param_list) = param_list else {
+        return params;
+    };
+    for i in 0..param_list.child_count() {
+        let Some(child) = param_list.child(i) else { continue };
+        if !options.param_kinds.contains(&child.kind()) {
+            continue;
+        }
+        let Some(name_node) = resolve_param_name(
+            &child,
+            options.name_field,
+            options.fallback_to_identifier,
+        ) else {
+            continue;
+        };
+        params.push(child_def(
+            node_text(&name_node, source).to_string(),
+            "parameter",
+            start_line(&child),
+        ));
+    }
+    params
+}
+
+// ── Type-map helpers ───────────────────────────────────────────────────────
+
+/// Record a parameter name → type binding in the type-map sink, using
+/// the default confidence of `0.9` shared by every Rust extractor.
+pub fn push_type_map_entry(
+    symbols: &mut FileSymbols,
+    name: impl Into<String>,
+    type_name: impl Into<String>,
+) {
+    let name = name.into();
+    if name.is_empty() {
+        return;
+    }
+    symbols.type_map.push(TypeMapEntry {
+        name,
+        type_name: type_name.into(),
+        confidence: 0.9,
+    });
+}
+
+/// C-family `declaration` / `parameter_declaration` type-map matcher.
+///
+/// The cpp / cuda / c extractors all emit verbatim copies of the same
+/// `match_*_type_map` walker — they share node kinds (`declaration`,
+/// `init_declarator`, `parameter_declaration`) and only differ in the
+/// per-language declarator-unwrap helper. This helper centralises the
+/// shared walker; callers supply the language's `unwrap_declarator`
+/// closure (e.g. `unwrap_cpp_declarator`).
+///
+/// Returns whether the node was a relevant C-family type-map node. The
+/// generic [`walk_tree`] match-fn signature still wraps this helper so
+/// the helper can be called from a tiny per-language adapter.
+pub fn match_c_family_type_map<F>(
+    node: &Node,
+    source: &[u8],
+    symbols: &mut FileSymbols,
+    mut unwrap_declarator: F,
+) -> bool
+where
+    F: FnMut(&Node, &[u8]) -> String,
+{
+    match node.kind() {
+        "declaration" => {
+            let Some(type_node) = node.child_by_field_name("type") else {
+                return false;
+            };
+            let type_name = node_text(&type_node, source).to_string();
+            for i in 0..node.child_count() {
+                let Some(child) = node.child(i) else { continue };
+                let kind = child.kind();
+                if kind != "init_declarator" && kind != "identifier" {
+                    continue;
+                }
+                let name_node = if kind == "init_declarator" {
+                    child.child_by_field_name("declarator")
+                } else {
+                    Some(child)
+                };
+                let Some(name_node) = name_node else { continue };
+                let final_name = unwrap_declarator(&name_node, source);
+                push_type_map_entry(symbols, final_name, type_name.clone());
+            }
+            true
+        }
+        "parameter_declaration" => {
+            let Some(type_node) = node.child_by_field_name("type") else {
+                return false;
+            };
+            let Some(decl) = node.child_by_field_name("declarator") else {
+                return false;
+            };
+            let name = unwrap_declarator(&decl, source);
+            let type_name = node_text(&type_node, source).to_string();
+            push_type_map_entry(symbols, name, type_name);
+            true
+        }
+        _ => false,
+    }
+}

From f10fcab4859615d002240ca3580006734b49ff7e Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 12:03:31 -0600
Subject: [PATCH 06/27] refactor(extractors-rs): adopt shared helpers across
 language extractors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 5 of the Rust extractor refactor plan (sync.json cluster 2). Adopts
the helpers extended in 0d687c4 (push_call, push_simple_call, push_import,
push_type_map_entry, extract_simple_parameters, match_c_family_type_map)
across eight language extractors:

- cpp.rs: collapse match_cpp_type_map to a one-line delegate of
  match_c_family_type_map; use push_import/push_simple_call/push_call
  for include and call sites
- cuda.rs: same delegation as cpp.rs; use push_import/push_simple_call/
  push_call across include and call_expression handlers
- java.rs: use push_type_map_entry for local-variable / formal-parameter
  bindings; use push_call/push_simple_call for method invocation and
  object creation; collapse extract_java_parameters to a one-shot
  extract_simple_parameters call; use push_import for import declaration
- javascript.rs: use push_simple_call for new_expression identifier
  branch; use push_type_map_entry for the confidence-0.9 type entries
- julia.rs: use push_simple_call/push_call across identifier and
  field_expression / scoped_identifier call branches
- objc.rs: use push_import for at_import; use push_call for c-call and
  message-expression handlers (drops redundant is_empty guards)
- r_lang.rs: use push_simple_call/push_call across identifier and
  namespace_operator call branches; use push_import for library/source
- solidity.rs: use push_call (drops redundant guard) for call sites;
  collapse extract_sol_params to a one-shot extract_simple_parameters

Net: -207 lines across 8 files, no behavior change. cargo check clean,
324 rust unit tests pass.

Pre-existing test failure: tests/engines/parity.test.ts has two failing
elixir cases unrelated to this commit (filed as #1227 — regression from
commit 5abe6ad in Phase 3).
---
 crates/codegraph-core/src/extractors/cpp.rs   | 73 +++---------------
 crates/codegraph-core/src/extractors/cuda.rs  | 76 +++----------------
 crates/codegraph-core/src/extractors/java.rs  | 73 ++++++++----------
 .../src/extractors/javascript.rs              | 23 ++----
 crates/codegraph-core/src/extractors/julia.rs | 21 +----
 crates/codegraph-core/src/extractors/objc.rs  | 26 +------
 .../codegraph-core/src/extractors/r_lang.rs   | 33 ++------
 .../codegraph-core/src/extractors/solidity.rs | 36 +++------
 8 files changed, 77 insertions(+), 284 deletions(-)

diff --git a/crates/codegraph-core/src/extractors/cpp.rs b/crates/codegraph-core/src/extractors/cpp.rs
index 676f1105f..0ed8f4cda 100644
--- a/crates/codegraph-core/src/extractors/cpp.rs
+++ b/crates/codegraph-core/src/extractors/cpp.rs
@@ -20,49 +20,9 @@ impl SymbolExtractor for CppExtractor {
 // ── Type inference ──────────────────────────────────────────────────────────
 
 fn match_cpp_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) {
-    match node.kind() {
-        "declaration" => {
-            if let Some(type_node) = node.child_by_field_name("type") {
-                let type_name = node_text(&type_node, source);
-                for i in 0..node.child_count() {
-                    if let Some(child) = node.child(i) {
-                        if child.kind() == "init_declarator" || child.kind() == "identifier" {
-                            let name_node = if child.kind() == "init_declarator" {
-                                child.child_by_field_name("declarator")
-                            } else {
-                                Some(child)
-                            };
-                            if let Some(name_node) = name_node {
-                                let final_name = unwrap_cpp_declarator(&name_node, source);
-                                if !final_name.is_empty() {
-                                    symbols.type_map.push(TypeMapEntry {
-                                        name: final_name,
-                                        type_name: type_name.to_string(),
-                                        confidence: 0.9,
-                                    });
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        "parameter_declaration" => {
-            if let Some(type_node) = node.child_by_field_name("type") {
-                if let Some(decl) = node.child_by_field_name("declarator") {
-                    let name = unwrap_cpp_declarator(&decl, source);
-                    if !name.is_empty() {
-                        symbols.type_map.push(TypeMapEntry {
-                            name,
-                            type_name: node_text(&type_node, source).to_string(),
-                            confidence: 0.9,
-                        });
-                    }
-                }
-            }
-        }
-        _ => {}
-    }
+    // Delegate the shared C-family declaration / parameter_declaration walker
+    // to the helper; supply the C++ declarator unwrap closure.
+    match_c_family_type_map(node, source, symbols, unwrap_cpp_declarator);
 }
 
 fn unwrap_cpp_declarator(node: &Node, source: &[u8]) -> String {
@@ -353,9 +313,9 @@ fn handle_cpp_preproc_include(node: &Node, source: &[u8], symbols: &mut FileSymb
             let name = last.strip_suffix(".h")
                 .or_else(|| last.strip_suffix(".hpp"))
                 .unwrap_or(last);
-            let mut imp = Import::new(path.to_string(), vec![name.to_string()], start_line(node));
-            imp.c_include = Some(true);
-            symbols.imports.push(imp);
+            push_import(symbols, node, path.to_string(), vec![name.to_string()], |imp| {
+                imp.c_include = Some(true);
+            });
         }
     }
 }
@@ -364,12 +324,7 @@ fn handle_cpp_call_expression(node: &Node, source: &[u8], symbols: &mut FileSymb
     if let Some(fn_node) = node.child_by_field_name("function") {
         match fn_node.kind() {
             "identifier" | "qualified_identifier" | "scoped_identifier" => {
-                symbols.calls.push(Call {
-                    name: node_text(&fn_node, source).to_string(),
-                    line: start_line(node),
-                    dynamic: None,
-                    receiver: None,
-                });
+                push_simple_call(symbols, node, node_text(&fn_node, source).to_string());
             }
             "field_expression" => {
                 let name = named_child_text(&fn_node, "field", source)
@@ -377,20 +332,10 @@ fn handle_cpp_call_expression(node: &Node, source: &[u8], symbols: &mut FileSymb
                     .unwrap_or_else(|| node_text(&fn_node, source).to_string());
                 let receiver = named_child_text(&fn_node, "argument", source)
                     .map(|s| s.to_string());
-                symbols.calls.push(Call {
-                    name,
-                    line: start_line(node),
-                    dynamic: None,
-                    receiver,
-                });
+                push_call(symbols, node, name, receiver, None);
             }
             _ => {
-                symbols.calls.push(Call {
-                    name: node_text(&fn_node, source).to_string(),
-                    line: start_line(node),
-                    dynamic: None,
-                    receiver: None,
-                });
+                push_simple_call(symbols, node, node_text(&fn_node, source).to_string());
             }
         }
     }
diff --git a/crates/codegraph-core/src/extractors/cuda.rs b/crates/codegraph-core/src/extractors/cuda.rs
index b8c67127a..f322a44c5 100644
--- a/crates/codegraph-core/src/extractors/cuda.rs
+++ b/crates/codegraph-core/src/extractors/cuda.rs
@@ -44,49 +44,10 @@ impl SymbolExtractor for CudaExtractor {
 /// nodes. Mirrors `match_cpp_type_map` in `cpp.rs` — the CUDA grammar shares
 /// these C++ node types, so the same logic works unchanged.
 fn match_cuda_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) {
-    match node.kind() {
-        "declaration" => {
-            if let Some(type_node) = node.child_by_field_name("type") {
-                let type_name = node_text(&type_node, source);
-                for i in 0..node.child_count() {
-                    if let Some(child) = node.child(i) {
-                        if child.kind() == "init_declarator" || child.kind() == "identifier" {
-                            let name_node = if child.kind() == "init_declarator" {
-                                child.child_by_field_name("declarator")
-                            } else {
-                                Some(child)
-                            };
-                            if let Some(name_node) = name_node {
-                                let final_name = unwrap_cuda_declarator(&name_node, source);
-                                if !final_name.is_empty() {
-                                    symbols.type_map.push(TypeMapEntry {
-                                        name: final_name,
-                                        type_name: type_name.to_string(),
-                                        confidence: 0.9,
-                                    });
-                                }
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        "parameter_declaration" => {
-            if let Some(type_node) = node.child_by_field_name("type") {
-                if let Some(decl) = node.child_by_field_name("declarator") {
-                    let name = unwrap_cuda_declarator(&decl, source);
-                    if !name.is_empty() {
-                        symbols.type_map.push(TypeMapEntry {
-                            name,
-                            type_name: node_text(&type_node, source).to_string(),
-                            confidence: 0.9,
-                        });
-                    }
-                }
-            }
-        }
-        _ => {}
-    }
+    // Delegate to the shared C-family walker; pass the CUDA declarator unwrap
+    // closure so pointer / reference / function declarators yield the bare
+    // identifier name.
+    match_c_family_type_map(node, source, symbols, unwrap_cuda_declarator);
 }
 
 // ── CUDA-specific qualifiers ────────────────────────────────────────────────
@@ -522,13 +483,9 @@ fn handle_cuda_preproc_include(node: &Node, source: &[u8], symbols: &mut FileSym
                 .or_else(|| last.strip_suffix(".hpp"))
                 .or_else(|| last.strip_suffix(".h"))
                 .unwrap_or(last);
-            let mut imp = Import::new(
-                path.to_string(),
-                vec![name.to_string()],
-                start_line(node),
-            );
-            imp.c_include = Some(true);
-            symbols.imports.push(imp);
+            push_import(symbols, node, path.to_string(), vec![name.to_string()], |imp| {
+                imp.c_include = Some(true);
+            });
         }
     }
 }
@@ -540,24 +497,9 @@ fn handle_cuda_call_expression(node: &Node, source: &[u8], symbols: &mut FileSym
                 .map(|s| s.to_string())
                 .unwrap_or_default();
             let receiver = named_child_text(&fn_node, "argument", source).map(|s| s.to_string());
-            if !name.is_empty() {
-                symbols.calls.push(Call {
-                    name,
-                    line: start_line(node),
-                    dynamic: None,
-                    receiver,
-                });
-            }
+            push_call(symbols, node, name, receiver, None);
         } else {
-            let name = node_text(&fn_node, source).to_string();
-            if !name.is_empty() {
-                symbols.calls.push(Call {
-                    name,
-                    line: start_line(node),
-                    dynamic: None,
-                    receiver: None,
-                });
-            }
+            push_simple_call(symbols, node, node_text(&fn_node, source).to_string());
         }
     }
 }
diff --git a/crates/codegraph-core/src/extractors/java.rs b/crates/codegraph-core/src/extractors/java.rs
index a7c4bf6d1..94dd99e87 100644
--- a/crates/codegraph-core/src/extractors/java.rs
+++ b/crates/codegraph-core/src/extractors/java.rs
@@ -36,11 +36,11 @@ fn match_java_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _d
                         if let Some(child) = node.child(i) {
                             if child.kind() == "variable_declarator" {
                                 if let Some(name_node) = child.child_by_field_name("name") {
-                                    symbols.type_map.push(TypeMapEntry {
-                                        name: node_text(&name_node, source).to_string(),
-                                        type_name: type_name.to_string(),
-                                        confidence: 0.9,
-                                    });
+                                    push_type_map_entry(
+                                        symbols,
+                                        node_text(&name_node, source).to_string(),
+                                        type_name.to_string(),
+                                    );
                                 }
                             }
                         }
@@ -52,11 +52,11 @@ fn match_java_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _d
             if let Some(type_node) = node.child_by_field_name("type") {
                 if let Some(type_name) = extract_java_type_name(&type_node, source) {
                     if let Some(name_node) = node.child_by_field_name("name") {
-                        symbols.type_map.push(TypeMapEntry {
-                            name: node_text(&name_node, source).to_string(),
-                            type_name: type_name.to_string(),
-                            confidence: 0.9,
-                        });
+                        push_type_map_entry(
+                            symbols,
+                            node_text(&name_node, source).to_string(),
+                            type_name.to_string(),
+                        );
                     }
                 }
             }
@@ -266,9 +266,9 @@ fn handle_import_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
             let last = import_path.split('.').last().unwrap_or("").to_string();
             vec![last]
         };
-        let mut imp = Import::new(import_path, names, start_line(node));
-        imp.java_import = Some(true);
-        symbols.imports.push(imp);
+        push_import(symbols, node, import_path, names, |imp| {
+            imp.java_import = Some(true);
+        });
     }
 }
 
@@ -276,12 +276,13 @@ fn handle_method_invocation(node: &Node, source: &[u8], symbols: &mut FileSymbol
     if let Some(name_node) = node.child_by_field_name("name") {
         let receiver = named_child_text(node, "object", source)
             .map(|s| s.to_string());
-        symbols.calls.push(Call {
-            name: node_text(&name_node, source).to_string(),
-            line: start_line(node),
-            dynamic: None,
+        push_call(
+            symbols,
+            node,
+            node_text(&name_node, source).to_string(),
             receiver,
-        });
+            None,
+        );
     }
 }
 
@@ -293,37 +294,25 @@ fn handle_object_creation(node: &Node, source: &[u8], symbols: &mut FileSymbols)
         Some(node_text(&type_node, source).to_string())
     };
     if let Some(name) = type_name {
-        symbols.calls.push(Call {
-            name,
-            line: start_line(node),
-            dynamic: None,
-            receiver: None,
-        });
+        push_simple_call(symbols, node, name);
     }
 }
 
 // ── Extended kinds helpers ──────────────────────────────────────────────────
 
 fn extract_java_parameters(node: &Node, source: &[u8]) -> Vec<Definition> {
-    let mut params = Vec::new();
-    let params_node = node.child_by_field_name("parameters")
+    let params_node = node
+        .child_by_field_name("parameters")
         .or_else(|| find_child(node, "formal_parameters"));
-    if let Some(params_node) = params_node {
-        for i in 0..params_node.child_count() {
-            if let Some(child) = params_node.child(i) {
-                if child.kind() == "formal_parameter" || child.kind() == "spread_parameter" {
-                    if let Some(name_node) = child.child_by_field_name("name") {
-                        params.push(child_def(
-                            node_text(&name_node, source).to_string(),
-                            "parameter",
-                            start_line(&child),
-                        ));
-                    }
-                }
-            }
-        }
-    }
-    params
+    extract_simple_parameters(
+        params_node,
+        source,
+        &ExtractParametersOptions {
+            param_kinds: &["formal_parameter", "spread_parameter"],
+            name_field: Some("name"),
+            fallback_to_identifier: false,
+        },
+    )
 }
 
 fn extract_java_class_fields(node: &Node, source: &[u8]) -> Vec<Definition> {
diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs
index 3a56d4d6d..d5403aa0f 100644
--- a/crates/codegraph-core/src/extractors/javascript.rs
+++ b/crates/codegraph-core/src/extractors/javascript.rs
@@ -61,11 +61,7 @@ fn match_js_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _dep
                     // Type annotation: confidence 0.9
                     if let Some(type_anno) = find_child(node, "type_annotation") {
                         if let Some(type_name) = extract_simple_type_name(&type_anno, source) {
-                            symbols.type_map.push(TypeMapEntry {
-                                name: var_name.to_string(),
-                                type_name: type_name.to_string(),
-                                confidence: 0.9,
-                            });
+                            push_type_map_entry(symbols, var_name.to_string(), type_name.to_string());
                         }
                     }
                     // Constructor: confidence 1.0 (overrides annotation in edge builder)
@@ -91,11 +87,11 @@ fn match_js_type_map(node: &Node, source: &[u8], symbols: &mut FileSymbols, _dep
                 if name_node.kind() == "identifier" {
                     if let Some(type_anno) = find_child(node, "type_annotation") {
                         if let Some(type_name) = extract_simple_type_name(&type_anno, source) {
-                            symbols.type_map.push(TypeMapEntry {
-                                name: node_text(&name_node, source).to_string(),
-                                type_name: type_name.to_string(),
-                                confidence: 0.9,
-                            });
+                            push_type_map_entry(
+                                symbols,
+                                node_text(&name_node, source).to_string(),
+                                type_name.to_string(),
+                            );
                         }
                     }
                 }
@@ -333,12 +329,7 @@ fn handle_new_expr(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
     let Some(ctor) = ctor else { return };
     match ctor.kind() {
         "identifier" => {
-            symbols.calls.push(Call {
-                name: node_text(&ctor, source).to_string(),
-                line: start_line(node),
-                dynamic: None,
-                receiver: None,
-            });
+            push_simple_call(symbols, node, node_text(&ctor, source).to_string());
         }
         "member_expression" => {
             if let Some(call_info) = extract_call_info(&ctor, node, source) {
diff --git a/crates/codegraph-core/src/extractors/julia.rs b/crates/codegraph-core/src/extractors/julia.rs
index 61acb77a9..f8ceeb6c1 100644
--- a/crates/codegraph-core/src/extractors/julia.rs
+++ b/crates/codegraph-core/src/extractors/julia.rs
@@ -482,12 +482,7 @@ fn handle_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
 
     match func_node.kind() {
         "identifier" => {
-            symbols.calls.push(Call {
-                name: node_text(&func_node, source).to_string(),
-                line: start_line(node),
-                dynamic: None,
-                receiver: None,
-            });
+            push_simple_call(symbols, node, node_text(&func_node, source).to_string());
         }
         "field_expression" | "scoped_identifier" => {
             let raw = node_text(&func_node, source);
@@ -495,19 +490,9 @@ fn handle_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
             if parts.len() >= 2 {
                 let last = parts.last().copied().unwrap_or("");
                 let receiver = parts[..parts.len() - 1].join(".");
-                symbols.calls.push(Call {
-                    name: last.to_string(),
-                    line: start_line(node),
-                    dynamic: None,
-                    receiver: Some(receiver),
-                });
+                push_call(symbols, node, last.to_string(), Some(receiver), None);
             } else {
-                symbols.calls.push(Call {
-                    name: raw.to_string(),
-                    line: start_line(node),
-                    dynamic: None,
-                    receiver: None,
-                });
+                push_simple_call(symbols, node, raw.to_string());
             }
         }
         _ => {}
diff --git a/crates/codegraph-core/src/extractors/objc.rs b/crates/codegraph-core/src/extractors/objc.rs
index 50e68140b..edd029f88 100644
--- a/crates/codegraph-core/src/extractors/objc.rs
+++ b/crates/codegraph-core/src/extractors/objc.rs
@@ -226,11 +226,7 @@ fn handle_at_import(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
         .or_else(|| find_child(node, "identifier"));
     if let Some(m) = module_node {
         let name = node_text(&m, source).to_string();
-        symbols.imports.push(Import::new(
-            name.clone(),
-            vec![name],
-            start_line(node),
-        ));
+        push_import(symbols, node, name.clone(), vec![name], |_| {});
     }
 }
 
@@ -329,14 +325,7 @@ fn handle_c_call_expr(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
         (node_text(&fn_node, source).to_string(), None)
     };
 
-    if !name.is_empty() {
-        symbols.calls.push(Call {
-            name,
-            line: start_line(node),
-            dynamic: None,
-            receiver,
-        });
-    }
+    push_call(symbols, node, name, receiver, None);
 }
 
 /// `[receiver selector:arg ...]` message send. The grammar gives every
@@ -347,16 +336,7 @@ fn handle_message_expr(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
         .map(|n| node_text(&n, source).to_string());
 
     let selector = build_message_selector(node, source);
-    if selector.is_empty() {
-        return;
-    }
-
-    symbols.calls.push(Call {
-        name: selector,
-        line: start_line(node),
-        dynamic: None,
-        receiver,
-    });
+    push_call(symbols, node, selector, receiver, None);
 }
 
 // ── Helpers ───────────────────────────────────────────────────────────────
diff --git a/crates/codegraph-core/src/extractors/r_lang.rs b/crates/codegraph-core/src/extractors/r_lang.rs
index d5d89bdb8..3686c562e 100644
--- a/crates/codegraph-core/src/extractors/r_lang.rs
+++ b/crates/codegraph-core/src/extractors/r_lang.rs
@@ -177,12 +177,7 @@ fn handle_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
 
     match func_node.kind() {
         "identifier" => {
-            symbols.calls.push(Call {
-                name: func_text.to_string(),
-                line: start_line(node),
-                dynamic: None,
-                receiver: None,
-            });
+            push_simple_call(symbols, node, func_text.to_string());
         }
         "namespace_operator" => {
             // `pkg::func` — receiver is the package; name is the function.
@@ -190,12 +185,7 @@ fn handle_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
             if parts.len() >= 2 {
                 let name = parts[parts.len() - 1].to_string();
                 let receiver = parts[..parts.len() - 1].join("::");
-                symbols.calls.push(Call {
-                    name,
-                    line: start_line(node),
-                    dynamic: None,
-                    receiver: Some(receiver),
-                });
+                push_call(symbols, node, name, Some(receiver), None);
             }
         }
         _ => {}
@@ -287,22 +277,14 @@ fn strip_string_quotes(node: &Node, source: &[u8]) -> String {
 
 fn handle_library_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
     if let Some(pkg) = first_argument_value(node, source, true) {
-        symbols.imports.push(Import::new(
-            pkg.clone(),
-            vec![pkg],
-            start_line(node),
-        ));
+        push_import(symbols, node, pkg.clone(), vec![pkg], |_| {});
     }
 }
 
 fn handle_source_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
     // source() only accepts string literals — `source(varname)` is not an import.
     if let Some(path) = first_argument_value(node, source, false) {
-        symbols.imports.push(Import::new(
-            path,
-            vec!["source".to_string()],
-            start_line(node),
-        ));
+        push_import(symbols, node, path, vec!["source".to_string()], |_| {});
     }
 }
 
@@ -344,12 +326,7 @@ fn handle_set_generic(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
 // recursive walk of the anonymous function argument.
 fn handle_set_method(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
     if let Some(name) = first_argument_value(node, source, false) {
-        symbols.calls.push(Call {
-            name,
-            line: start_line(node),
-            dynamic: None,
-            receiver: None,
-        });
+        push_simple_call(symbols, node, name);
     }
 }
 
diff --git a/crates/codegraph-core/src/extractors/solidity.rs b/crates/codegraph-core/src/extractors/solidity.rs
index 0302250ee..313b259fa 100644
--- a/crates/codegraph-core/src/extractors/solidity.rs
+++ b/crates/codegraph-core/src/extractors/solidity.rs
@@ -459,40 +459,24 @@ fn handle_call_expression(node: &Node, source: &[u8], symbols: &mut FileSymbols)
         _ => (node_text(&func_node, source).to_string(), None),
     };
 
-    if !name.is_empty() {
-        symbols.calls.push(Call {
-            name,
-            line: start_line(node),
-            dynamic: None,
-            receiver,
-        });
-    }
+    push_call(symbols, node, name, receiver, None);
 }
 
 // ── Helpers ──────────────────────────────────────────────────────────────────
 
 fn extract_sol_params(func_node: &Node, source: &[u8]) -> Vec<Definition> {
-    let mut params = Vec::new();
     let param_list = func_node
         .child_by_field_name("parameters")
         .or_else(|| find_child(func_node, "parameter_list"));
-    let Some(param_list) = param_list else {
-        return params;
-    };
-    for i in 0..param_list.child_count() {
-        let Some(param) = param_list.child(i) else { continue };
-        if param.kind() != "parameter" {
-            continue;
-        }
-        if let Some(name_node) = param.child_by_field_name("name") {
-            params.push(child_def(
-                node_text(&name_node, source).to_string(),
-                "parameter",
-                start_line(&param),
-            ));
-        }
-    }
-    params
+    extract_simple_parameters(
+        param_list,
+        source,
+        &ExtractParametersOptions {
+            param_kinds: &["parameter"],
+            name_field: Some("name"),
+            fallback_to_identifier: false,
+        },
+    )
 }
 
 /// Find the name of an enclosing contract/interface/library, if any.

From d9bbc8f4d1298521a606612254ca3116d42d4cba Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 12:07:23 -0600
Subject: [PATCH 07/27] refactor(extractors-rs): break elixir param/map binding
 cycle

Convert collect_elixir_param_identifiers from mutual-recursion with
collect_elixir_map_bindings into a single iterative worklist traversal.
Map/list/tuple/binary-operator dispatch is now done via three leaf
helpers (push_elixir_sequence_items, push_elixir_map_values,
push_elixir_binary_operator_operands) that push child nodes onto the
worklist instead of calling back into the main function. This removes
the function-level cycle flagged by codegraph (8 -> 7 cycles) and
mirrors the TS refactor in 5abe6ad without changing extractor
semantics. docs check acknowledged: internal refactor only.
---
 .../codegraph-core/src/extractors/elixir.rs   | 143 ++++++++++--------
 1 file changed, 76 insertions(+), 67 deletions(-)

diff --git a/crates/codegraph-core/src/extractors/elixir.rs b/crates/codegraph-core/src/extractors/elixir.rs
index b9fa8686a..cddacf42c 100644
--- a/crates/codegraph-core/src/extractors/elixir.rs
+++ b/crates/codegraph-core/src/extractors/elixir.rs
@@ -157,84 +157,93 @@ fn extract_elixir_params(args: &Node, source: &[u8]) -> Vec<Definition> {
     params
 }
 
-/// Recursively walk a parameter pattern and emit each bound identifier as a
-/// `parameter` child. Handles bare identifiers, default-value `a \\ default`,
-/// list-cons `[head | tail]`, list `[a, b, c]`, tuple `{x, y}`, and
-/// map / struct destructuring (`%{k: v}`, `%Foo{k: v}`).
-fn collect_elixir_param_identifiers(node: &Node, source: &[u8], out: &mut Vec<Definition>) {
-    match node.kind() {
-        "identifier" => {
-            out.push(child_def(
-                node_text(node, source).to_string(),
-                "parameter",
-                start_line(node),
-            ));
-        }
-        "binary_operator" => {
-            // `name \\ default` (default-value) binds the left operand only.
-            // `head | tail` (list-cons, appears inside a `list` pattern) binds both operands.
-            let Some(op) = node.child(1) else { return };
-            match op.kind() {
-                "\\\\" => {
-                    if let Some(left) = node.child(0) {
-                        collect_elixir_param_identifiers(&left, source, out);
-                    }
-                }
-                "|" => {
-                    if let Some(left) = node.child(0) {
-                        collect_elixir_param_identifiers(&left, source, out);
-                    }
-                    if let Some(right) = node.child(2) {
-                        collect_elixir_param_identifiers(&right, source, out);
-                    }
-                }
-                _ => {}
+/// Walk a parameter pattern and emit each bound identifier as a `parameter`
+/// child. Handles bare identifiers, default-value `a \\ default`, list-cons
+/// `[head | tail]`, list `[a, b, c]`, tuple `{x, y}`, and map / struct
+/// destructuring (`%{k: v}`, `%Foo{k: v}`).
+///
+/// Implemented as an iterative worklist (rather than recursion + helpers) so
+/// the call graph has no function-level cycle: only one function performs the
+/// traversal and it invokes only leaf helpers (`push_elixir_sequence_items`,
+/// `push_elixir_map_values`, `push_elixir_binary_operator_operands`).
+fn collect_elixir_param_identifiers(root: &Node, source: &[u8], out: &mut Vec<Definition>) {
+    let mut stack: Vec<Node> = vec![*root];
+    while let Some(node) = stack.pop() {
+        match node.kind() {
+            "identifier" => {
+                out.push(child_def(
+                    node_text(&node, source).to_string(),
+                    "parameter",
+                    start_line(&node),
+                ));
             }
-        }
-        "list" => {
-            // `[a, b, c]` or `[head | tail]` — walk children, skipping punctuation.
-            // The `|` cons case is handled by the `binary_operator` arm on recursion.
-            for i in 0..node.child_count() {
-                let Some(c) = node.child(i) else { continue };
-                let k = c.kind();
-                if k == "[" || k == "]" || k == "," { continue; }
-                collect_elixir_param_identifiers(&c, source, out);
+            "binary_operator" => {
+                push_elixir_binary_operator_operands(&node, &mut stack);
+            }
+            "list" | "tuple" => {
+                push_elixir_sequence_items(&node, &mut stack);
+            }
+            "map" => {
+                push_elixir_map_values(&node, &mut stack);
             }
+            _ => {}
         }
-        "tuple" => {
-            for i in 0..node.child_count() {
-                let Some(c) = node.child(i) else { continue };
-                let k = c.kind();
-                if k == "{" || k == "}" || k == "," { continue; }
-                collect_elixir_param_identifiers(&c, source, out);
+    }
+}
+
+/// Push the binding-relevant operands of a `binary_operator` parameter onto the
+/// worklist:
+/// - `name \\ default` (default-value) binds the left operand only.
+/// - `head | tail`     (list-cons, appears inside a `list` pattern) binds both.
+fn push_elixir_binary_operator_operands<'a>(node: &Node<'a>, stack: &mut Vec<Node<'a>>) {
+    let Some(op) = node.child(1) else { return };
+    match op.kind() {
+        "\\\\" => {
+            if let Some(left) = node.child(0) {
+                stack.push(left);
             }
         }
-        "map" => {
-            // `%{k: v}` or `%Foo{k: v}` — walk map_content > keywords > pair and emit
-            // each pair's value side (the bound name). The leading `struct` alias is a
-            // type, not a bound identifier, so it is intentionally skipped.
-            for i in 0..node.child_count() {
-                let Some(c) = node.child(i) else { continue };
-                if c.kind() == "map_content" {
-                    collect_elixir_map_bindings(&c, source, out);
-                }
+        "|" => {
+            if let Some(right) = node.child(2) {
+                stack.push(right);
+            }
+            if let Some(left) = node.child(0) {
+                stack.push(left);
             }
         }
         _ => {}
     }
 }
 
-fn collect_elixir_map_bindings(content: &Node, source: &[u8], out: &mut Vec<Definition>) {
-    for i in 0..content.child_count() {
-        let Some(kws) = content.child(i) else { continue };
-        if kws.kind() != "keywords" { continue; }
-        for j in 0..kws.child_count() {
-            let Some(pair) = kws.child(j) else { continue };
-            if pair.kind() != "pair" { continue; }
-            for k in 0..pair.child_count() {
-                let Some(part) = pair.child(k) else { continue };
-                if part.kind() == "keyword" { continue; }
-                collect_elixir_param_identifiers(&part, source, out);
+/// Push the binding-relevant elements of a `list` or `tuple` parameter onto
+/// the worklist, skipping punctuation tokens.
+fn push_elixir_sequence_items<'a>(node: &Node<'a>, stack: &mut Vec<Node<'a>>) {
+    for i in 0..node.child_count() {
+        let Some(c) = node.child(i) else { continue };
+        let k = c.kind();
+        if k == "[" || k == "]" || k == "{" || k == "}" || k == "," { continue; }
+        stack.push(c);
+    }
+}
+
+/// Push the value side of every pair in a `map` or `%Foo{...}` parameter onto
+/// the worklist. The struct alias (`Foo`) is a type, not a bound identifier, so
+/// the leading `struct` child is intentionally skipped.
+fn push_elixir_map_values<'a>(node: &Node<'a>, stack: &mut Vec<Node<'a>>) {
+    for i in 0..node.child_count() {
+        let Some(content) = node.child(i) else { continue };
+        if content.kind() != "map_content" { continue; }
+        for j in 0..content.child_count() {
+            let Some(kws) = content.child(j) else { continue };
+            if kws.kind() != "keywords" { continue; }
+            for k in 0..kws.child_count() {
+                let Some(pair) = kws.child(k) else { continue };
+                if pair.kind() != "pair" { continue; }
+                for p in 0..pair.child_count() {
+                    let Some(part) = pair.child(p) else { continue };
+                    if part.kind() == "keyword" { continue; }
+                    stack.push(part);
+                }
             }
         }
     }

From 24c8cf51ec2074dffcb65f0a337b2ed5d36dd989 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 12:13:37 -0600
Subject: [PATCH 08/27] refactor(ast-analysis): break visitor-utils
 destructuring cycle

---
 src/ast-analysis/visitor-utils.ts | 132 +++++++++++++++++++-----------
 1 file changed, 86 insertions(+), 46 deletions(-)

diff --git a/src/ast-analysis/visitor-utils.ts b/src/ast-analysis/visitor-utils.ts
index 530787d2d..4b161c2e3 100644
--- a/src/ast-analysis/visitor-utils.ts
+++ b/src/ast-analysis/visitor-utils.ts
@@ -88,78 +88,118 @@ export function extractParams(
   return result;
 }
 
-/** Extract names from a rest parameter (e.g. `...args`). */
-function extractRestParamNames(node: TreeSitterNode, rules: LanguageRules): string[] {
-  const nameNode = node.childForFieldName('name');
-  if (nameNode) return [nameNode.text];
-  for (const child of node.namedChildren) {
-    if (child.type === rules.paramIdentifier) return [child.text];
-  }
-  return [];
-}
-
-/** Extract names from an object destructuring pattern (e.g. `{ a, b: c }`). */
-function extractObjectDestructNames(node: TreeSitterNode, rules: LanguageRules): string[] {
-  const names: string[] = [];
-  for (const child of node.namedChildren) {
-    if (rules.shorthandPropPattern && child.type === rules.shorthandPropPattern) {
-      names.push(child.text);
-    } else if (rules.pairPatternType && child.type === rules.pairPatternType) {
-      const value = child.childForFieldName('value');
-      if (value) names.push(...extractParamNames(value, rules));
-    } else if (rules.restParamType && child.type === rules.restParamType) {
-      names.push(...extractParamNames(child, rules));
-    }
-  }
-  return names;
-}
-
-/** Extract names from an array destructuring pattern (e.g. `[a, b]`). */
-function extractArrayDestructNames(node: TreeSitterNode, rules: LanguageRules): string[] {
-  const names: string[] = [];
-  for (const child of node.namedChildren) {
-    names.push(...extractParamNames(child, rules));
-  }
-  return names;
-}
-
 /**
- * Extract parameter names from a single parameter node.
+ * Resolve a single parameter node to either a direct list of names (base case)
+ * or a list of child nodes that still need processing. Returns `null` if the
+ * node yields nothing.
+ *
+ * This base case keeps destructuring helpers from recursing back into
+ * `extractParamNames`, breaking the 3-node mutual recursion cycle between
+ * `extractParamNames`, `extractObjectDestructNames`, and `extractArrayDestructNames`.
  */
-export function extractParamNames(node: TreeSitterNode | null, rules: LanguageRules): string[] {
-  if (!node) return [];
+function resolveParamNode(
+  node: TreeSitterNode,
+  rules: LanguageRules,
+): { names?: string[]; next?: TreeSitterNode[] } | null {
   const t = node.type;
 
   if (rules.extractParamName) {
     const result = rules.extractParamName(node);
-    if (result) return result;
+    if (result) return { names: result };
   }
 
-  if (t === rules.paramIdentifier) return [node.text];
+  if (t === rules.paramIdentifier) return { names: [node.text] };
 
   if (rules.paramWrapperTypes.has(t)) {
     const pattern = node.childForFieldName('pattern') || node.childForFieldName('name');
-    return pattern ? extractParamNames(pattern, rules) : [];
+    return pattern ? { next: [pattern] } : null;
   }
 
   if (rules.defaultParamType && t === rules.defaultParamType) {
     const left = node.childForFieldName('left') || node.childForFieldName('name');
-    return left ? extractParamNames(left, rules) : [];
+    return left ? { next: [left] } : null;
   }
 
   if (rules.restParamType && t === rules.restParamType) {
-    return extractRestParamNames(node, rules);
+    const nameNode = node.childForFieldName('name');
+    if (nameNode) return { names: [nameNode.text] };
+    for (const child of node.namedChildren) {
+      if (child.type === rules.paramIdentifier) return { names: [child.text] };
+    }
+    return null;
   }
 
   if (rules.objectDestructType && t === rules.objectDestructType) {
-    return extractObjectDestructNames(node, rules);
+    return { next: collectObjectDestructChildren(node, rules) };
   }
 
   if (rules.arrayDestructType && t === rules.arrayDestructType) {
-    return extractArrayDestructNames(node, rules);
+    return { next: [...node.namedChildren] };
+  }
+
+  return null;
+}
+
+/**
+ * Collect child nodes from an object destructuring pattern that should be
+ * processed for further name extraction. Returns nodes (not names) so the
+ * caller drives traversal via a worklist instead of recursion.
+ */
+function collectObjectDestructChildren(
+  node: TreeSitterNode,
+  rules: LanguageRules,
+): TreeSitterNode[] {
+  const next: TreeSitterNode[] = [];
+  for (const child of node.namedChildren) {
+    if (rules.shorthandPropPattern && child.type === rules.shorthandPropPattern) {
+      // Shorthand prop is a direct identifier — handled in the worklist
+      // by `resolveParamNode` once requeued.
+      next.push(child);
+    } else if (rules.pairPatternType && child.type === rules.pairPatternType) {
+      const value = child.childForFieldName('value');
+      if (value) next.push(value);
+    } else if (rules.restParamType && child.type === rules.restParamType) {
+      next.push(child);
+    }
   }
+  return next;
+}
 
-  return [];
+/**
+ * Extract parameter names from a single parameter node.
+ *
+ * Uses an iterative worklist to handle nested destructuring (objects, arrays,
+ * defaults, rest, wrappers) without mutual recursion through helper functions.
+ */
+export function extractParamNames(node: TreeSitterNode | null, rules: LanguageRules): string[] {
+  if (!node) return [];
+
+  const names: string[] = [];
+  const stack: TreeSitterNode[] = [node];
+
+  while (stack.length > 0) {
+    const current = stack.pop();
+    if (!current) continue;
+
+    // Shorthand identifier inside an object destructuring is just the node's text.
+    if (rules.shorthandPropPattern && current.type === rules.shorthandPropPattern) {
+      names.push(current.text);
+      continue;
+    }
+
+    const resolved = resolveParamNode(current, rules);
+    if (!resolved) continue;
+    if (resolved.names) names.push(...resolved.names);
+    if (resolved.next) {
+      // Push in reverse so traversal order matches the previous recursive order.
+      for (let i = resolved.next.length - 1; i >= 0; i--) {
+        const child = resolved.next[i];
+        if (child) stack.push(child);
+      }
+    }
+  }
+
+  return names;
 }
 
 /**

From 4f34034038357c26f3cfe5fc6b532caba84bf39c Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 12:21:47 -0600
Subject: [PATCH 09/27] refactor(ast-analysis): decompose engine and visitors

---
 src/ast-analysis/engine.ts                    | 206 ++++++++++++------
 .../visitors/ast-store-visitor.ts             | 173 +++++++++------
 src/ast-analysis/visitors/dataflow-visitor.ts | 137 ++++++++----
 3 files changed, 339 insertions(+), 177 deletions(-)

diff --git a/src/ast-analysis/engine.ts b/src/ast-analysis/engine.ts
index c96989437..958485147 100644
--- a/src/ast-analysis/engine.ts
+++ b/src/ast-analysis/engine.ts
@@ -753,6 +753,146 @@ function allNativeDataComplete(
 
 // ─── Public API ──────────────────────────────────────────────────────────
 
+/** Distribute the per-file walk time equally among the visitors that ran. */
+function accumulateWalkTime(
+  timing: AnalysisTiming,
+  walkMs: number,
+  astVisitor: Visitor | null,
+  complexityVisitor: Visitor | null,
+  cfgVisitor: Visitor | null,
+  dataflowVisitor: Visitor | null,
+): void {
+  const activeCount = [astVisitor, complexityVisitor, cfgVisitor, dataflowVisitor].filter(
+    Boolean,
+  ).length;
+  if (activeCount === 0) return;
+
+  const share = walkMs / activeCount;
+  if (astVisitor) timing.astMs += share;
+  if (complexityVisitor) timing.complexityMs += share;
+  if (cfgVisitor) timing.cfgMs += share;
+  if (dataflowVisitor) timing.dataflowMs += share;
+}
+
+/** Apply visitor walk results to the per-file symbols/definitions. */
+function applyVisitorResults(
+  results: WalkResults,
+  symbols: ExtractorOutput,
+  langId: string,
+  astVisitor: Visitor | null,
+  complexityVisitor: Visitor | null,
+  cfgVisitor: Visitor | null,
+  dataflowVisitor: Visitor | null,
+): void {
+  const defs = symbols.definitions || [];
+
+  if (astVisitor) {
+    const astRows = (results['ast-store'] || []) as ASTNodeRow[];
+    if (astRows.length > 0) symbols.astNodes = astRows;
+  }
+
+  if (complexityVisitor) storeComplexityResults(results, defs, langId);
+  if (cfgVisitor) storeCfgResults(results, defs);
+  if (dataflowVisitor) symbols.dataflow = results.dataflow as DataflowResult;
+}
+
+/** Process a single file: set up visitors, walk the tree, and apply results. */
+function processFileWalk(
+  db: BetterSqlite3Database,
+  relPath: string,
+  symbols: ExtractorOutput,
+  langId: string,
+  opts: AnalysisOpts,
+  timing: AnalysisTiming,
+): void {
+  if (!symbols._tree) return;
+
+  const { visitors, walkerOpts, astVisitor, complexityVisitor, cfgVisitor, dataflowVisitor } =
+    setupVisitors(db, relPath, symbols, langId, opts);
+
+  if (visitors.length === 0) return;
+
+  const walkStart = performance.now();
+  const results = walkWithVisitors(symbols._tree.rootNode, visitors, langId, walkerOpts);
+  const walkMs = performance.now() - walkStart;
+
+  accumulateWalkTime(timing, walkMs, astVisitor, complexityVisitor, cfgVisitor, dataflowVisitor);
+  applyVisitorResults(
+    results,
+    symbols,
+    langId,
+    astVisitor,
+    complexityVisitor,
+    cfgVisitor,
+    dataflowVisitor,
+  );
+}
+
+/**
+ * Unified pre-walk: run all applicable visitors in a single DFS per file.
+ * Returns the total wall-clock time for diagnostics.
+ */
+function runUnifiedWalkPass(
+  db: BetterSqlite3Database,
+  fileSymbols: Map<string, ExtractorOutput>,
+  extToLang: Map<string, string>,
+  opts: AnalysisOpts,
+  timing: AnalysisTiming,
+): number {
+  const t0walk = performance.now();
+
+  for (const [relPath, symbols] of fileSymbols) {
+    if (!symbols._tree) continue;
+
+    const ext = path.extname(relPath).toLowerCase();
+    const langId = symbols._langId || extToLang.get(ext);
+    if (!langId) continue;
+
+    processFileWalk(db, relPath, symbols, langId, opts, timing);
+  }
+
+  return performance.now() - t0walk;
+}
+
+/** Try native Rust standalone analysis to fill gaps before WASM fallback. */
+function tryNativeStandaloneAnalysis(
+  fileSymbols: Map<string, ExtractorOutput>,
+  rootDir: string,
+  opts: AnalysisOpts,
+  extToLang: Map<string, string>,
+): void {
+  const native = loadNative();
+  if (!native?.analyzeComplexity && !native?.buildCfgAnalysis && !native?.extractDataflowAnalysis) {
+    return;
+  }
+  const t0native = performance.now();
+  runNativeAnalysis(native, fileSymbols, rootDir, opts, extToLang);
+  debug(`native standalone analysis: ${(performance.now() - t0native).toFixed(1)}ms`);
+}
+
+/**
+ * Fast path: when all files were parsed by the native engine with full analysis,
+ * skip WASM re-parse and JS visitor walks entirely and go straight to DB persistence.
+ * Returns true if the fast path handled the work.
+ */
+async function runFastPathIfApplicable(
+  db: BetterSqlite3Database,
+  fileSymbols: Map<string, ExtractorOutput>,
+  rootDir: string,
+  opts: AnalysisOpts,
+  engineOpts: EngineOpts | undefined,
+  timing: AnalysisTiming,
+): Promise<boolean> {
+  if (!allNativeDataComplete(fileSymbols, opts)) return false;
+
+  debug('native full-analysis fast path: all data present, skipping WASM/visitor passes');
+  const doComplexity = opts.complexity !== false;
+  const doCfg = opts.cfg !== false;
+  if (doComplexity && doCfg) reconcileCfgCyclomatic(fileSymbols);
+  await delegateToBuildFunctions(db, fileSymbols, rootDir, opts, engineOpts, timing);
+  return true;
+}
+
 export async function runAnalyses(
   db: BetterSqlite3Database,
   fileSymbols: Map<string, ExtractorOutput>,
@@ -771,80 +911,24 @@ export async function runAnalyses(
 
   const extToLang = buildExtToLangMap();
 
-  // Fast path: when all files were parsed by the native engine with full analysis
-  // (parseFilesFull), all data is already present — skip WASM re-parse and JS
-  // visitor walks entirely, go straight to DB persistence.
-  if (allNativeDataComplete(fileSymbols, opts)) {
-    debug('native full-analysis fast path: all data present, skipping WASM/visitor passes');
-    if (doComplexity && doCfg) reconcileCfgCyclomatic(fileSymbols);
-    await delegateToBuildFunctions(db, fileSymbols, rootDir, opts, engineOpts, timing);
+  if (await runFastPathIfApplicable(db, fileSymbols, rootDir, opts, engineOpts, timing)) {
     return timing;
   }
 
   // Native analysis pass: try Rust standalone functions before WASM fallback.
   // This fills in complexity/CFG/dataflow for files that the native parse pipeline
   // missed, avoiding the need to parse with WASM + run JS visitors.
-  const native = loadNative();
-  if (native?.analyzeComplexity || native?.buildCfgAnalysis || native?.extractDataflowAnalysis) {
-    const t0native = performance.now();
-    runNativeAnalysis(native, fileSymbols, rootDir, opts, extToLang);
-    debug(`native standalone analysis: ${(performance.now() - t0native).toFixed(1)}ms`);
-  }
+  tryNativeStandaloneAnalysis(fileSymbols, rootDir, opts, extToLang);
 
   // WASM pre-parse for files that still need it (AST store, or native gaps)
   await ensureWasmTreesIfNeeded(fileSymbols, opts, rootDir);
 
-  // Unified pre-walk: run all applicable visitors in a single DFS per file.
   // Time each file's walk and distribute equally among active visitors
   // so that phase timers (astMs, complexityMs, etc.) reflect real work — not
   // just the DB-write tail in delegateToBuildFunctions.
-  const t0walk = performance.now();
-
-  for (const [relPath, symbols] of fileSymbols) {
-    if (!symbols._tree) continue;
-
-    const ext = path.extname(relPath).toLowerCase();
-    const langId = symbols._langId || extToLang.get(ext);
-    if (!langId) continue;
-
-    const { visitors, walkerOpts, astVisitor, complexityVisitor, cfgVisitor, dataflowVisitor } =
-      setupVisitors(db, relPath, symbols, langId, opts);
-
-    if (visitors.length === 0) continue;
-
-    const walkStart = performance.now();
-    const results = walkWithVisitors(symbols._tree.rootNode, visitors, langId, walkerOpts);
-    const walkMs = performance.now() - walkStart;
-
-    // Distribute walk time equally among active visitors
-    const activeCount = [astVisitor, complexityVisitor, cfgVisitor, dataflowVisitor].filter(
-      Boolean,
-    ).length;
-    if (activeCount > 0) {
-      const share = walkMs / activeCount;
-      if (astVisitor) timing.astMs += share;
-      if (complexityVisitor) timing.complexityMs += share;
-      if (cfgVisitor) timing.cfgMs += share;
-      if (dataflowVisitor) timing.dataflowMs += share;
-    }
-
-    const defs = symbols.definitions || [];
-
-    if (astVisitor) {
-      const astRows = (results['ast-store'] || []) as ASTNodeRow[];
-      if (astRows.length > 0) symbols.astNodes = astRows;
-    }
-
-    if (complexityVisitor) storeComplexityResults(results, defs, langId);
-    if (cfgVisitor) storeCfgResults(results, defs);
-    if (dataflowVisitor) symbols.dataflow = results.dataflow as DataflowResult;
-  }
-
-  // Total wall-clock time for the unified walk loop, including per-file
-  // setupVisitors overhead. Walk time is already distributed into per-phase
-  // timers above, so this field overlaps with (astMs + complexityMs + ...).
-  // It is kept as a diagnostic cross-check, not an additive bucket.
-  timing._unifiedWalkMs = performance.now() - t0walk;
+  // _unifiedWalkMs is kept as a diagnostic cross-check (overlaps with the
+  // per-phase timers above, not additive).
+  timing._unifiedWalkMs = runUnifiedWalkPass(db, fileSymbols, extToLang, opts, timing);
 
   // Reconcile: apply CFG-derived cyclomatic override for any definitions that have
   // both precomputed complexity and CFG data but whose cyclomatic was never overridden.
diff --git a/src/ast-analysis/visitors/ast-store-visitor.ts b/src/ast-analysis/visitors/ast-store-visitor.ts
index 661ceae2e..dd63515be 100644
--- a/src/ast-analysis/visitors/ast-store-visitor.ts
+++ b/src/ast-analysis/visitors/ast-store-visitor.ts
@@ -181,46 +181,17 @@ function newTypesFor(astTypeMap: Record<string, string>): Set<string> {
   return s;
 }
 
-export function createAstStoreVisitor(
-  astTypeMap: Record<string, string>,
-  defs: Definition[],
-  relPath: string,
-  nodeIdMap: Map<string, number>,
-  stringConfig: AstStringConfig = DEFAULT_STRING_CONFIG,
-  stopRecurseKinds: ReadonlySet<string> = new Set(),
-): Visitor {
-  const rows: AstStoreRow[] = [];
-  const matched = new Set<number>();
-  const newTypes = newTypesFor(astTypeMap);
-  // When nodeIdMap is empty, parentNodeId resolution is wasted work — the
-  // worker passes an empty map and the main thread re-resolves against its
-  // own DB-populated map in features/ast.ts::collectFileAstRows. Skip the
-  // findParentDef linear scan in that case.
-  const skipParentLookup = nodeIdMap.size === 0;
-
-  function findParentDef(line: number): Definition | null {
-    let best: Definition | null = null;
-    for (const def of defs) {
-      if (def.line <= line && (def.endLine == null || def.endLine >= line)) {
-        if (!best || (def.endLine ?? 0) - def.line < (best.endLine ?? 0) - best.line) {
-          best = def;
-        }
-      }
-    }
-    return best;
-  }
-
-  function resolveParentNodeId(line: number): number | null {
-    if (skipParentLookup) return null;
-    const parentDef = findParentDef(line);
-    if (!parentDef) return null;
-    return nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null;
-  }
+type NameTextResult = { name: string | null | undefined; text: string | null; skip?: boolean };
+type KindHandler = (node: TreeSitterNode) => NameTextResult;
 
-  type NameTextResult = { name: string | null | undefined; text: string | null; skip?: boolean };
-  type KindHandler = (node: TreeSitterNode) => NameTextResult;
+const DEFAULT_NAME_TEXT_RESULT: NameTextResult = { name: undefined, text: null };
 
-  const kindHandlers: Record<string, KindHandler> = {
+/** Build the per-kind resolver map for name/text extraction. */
+function buildKindHandlers(
+  newTypes: Set<string>,
+  stringConfig: AstStringConfig,
+): Record<string, KindHandler> {
+  return {
     new: (node) => ({ name: extractConstructorName(node), text: truncate(node.text) }),
     throw: (node) => ({
       name: extractThrowName(node, newTypes),
@@ -234,31 +205,102 @@ export function createAstStoreVisitor(
     },
     regex: (node) => ({ name: node.text || '?', text: truncate(node.text) }),
   };
-  const defaultResult: NameTextResult = { name: undefined, text: null };
+}
 
-  function resolveNameAndText(node: TreeSitterNode, kind: string): NameTextResult {
-    const handler = kindHandlers[kind];
-    return handler ? handler(node) : defaultResult;
+/** Find the innermost definition whose line range contains `line`. */
+function findParentDef(line: number, defs: Definition[]): Definition | null {
+  let best: Definition | null = null;
+  for (const def of defs) {
+    if (def.line <= line && (def.endLine == null || def.endLine >= line)) {
+      if (!best || (def.endLine ?? 0) - def.line < (best.endLine ?? 0) - best.line) {
+        best = def;
+      }
+    }
   }
+  return best;
+}
+
+/** Resolve the parent definition's node id for a given source line. */
+function resolveParentNodeId(
+  line: number,
+  defs: Definition[],
+  nodeIdMap: Map<string, number>,
+  skipParentLookup: boolean,
+): number | null {
+  if (skipParentLookup) return null;
+  const parentDef = findParentDef(line, defs);
+  if (!parentDef) return null;
+  return nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null;
+}
 
-  function collectNode(node: TreeSitterNode, kind: string): void {
-    if (matched.has(node.id)) return;
+interface CollectCtx {
+  rows: AstStoreRow[];
+  matched: Set<number>;
+  relPath: string;
+  defs: Definition[];
+  nodeIdMap: Map<string, number>;
+  skipParentLookup: boolean;
+  kindHandlers: Record<string, KindHandler>;
+}
 
-    const resolved = resolveNameAndText(node, kind);
-    if (resolved.skip) return;
+function collectNode(ctx: CollectCtx, node: TreeSitterNode, kind: string): void {
+  if (ctx.matched.has(node.id)) return;
+
+  const handler = ctx.kindHandlers[kind];
+  const resolved = handler ? handler(node) : DEFAULT_NAME_TEXT_RESULT;
+  if (resolved.skip) return;
+
+  const line = node.startPosition.row + 1;
+  ctx.rows.push({
+    file: ctx.relPath,
+    line,
+    kind,
+    name: resolved.name,
+    text: resolved.text,
+    receiver: null,
+    parentNodeId: resolveParentNodeId(line, ctx.defs, ctx.nodeIdMap, ctx.skipParentLookup),
+  });
+
+  ctx.matched.add(node.id);
+}
 
-    rows.push({
-      file: relPath,
-      line: node.startPosition.row + 1,
-      kind,
-      name: resolved.name,
-      text: resolved.text,
-      receiver: null,
-      parentNodeId: resolveParentNodeId(node.startPosition.row + 1),
-    });
+/**
+ * Resolve the kind for a tree-sitter node, or `null` if the node should be ignored.
+ *
+ * Gate with `hasOwn` because plain-object lookup walks Object.prototype:
+ * tree-sitter node types like `constructor` (Haskell sum-types: Left,
+ * Right) would otherwise resolve to `Object.prototype.constructor` (the
+ * Object() function), which then crashes the worker boundary with
+ * "function Object() { [native code] } could not be cloned" when the
+ * resulting astNodes row is structured-cloned back to the main thread.
+ */
+function resolveAstKind(node: TreeSitterNode, astTypeMap: Record<string, string>): string | null {
+  if (!Object.hasOwn(astTypeMap, node.type)) return null;
+  return astTypeMap[node.type] || null;
+}
 
-    matched.add(node.id);
-  }
+export function createAstStoreVisitor(
+  astTypeMap: Record<string, string>,
+  defs: Definition[],
+  relPath: string,
+  nodeIdMap: Map<string, number>,
+  stringConfig: AstStringConfig = DEFAULT_STRING_CONFIG,
+  stopRecurseKinds: ReadonlySet<string> = new Set(),
+): Visitor {
+  const newTypes = newTypesFor(astTypeMap);
+  // When nodeIdMap is empty, parentNodeId resolution is wasted work — the
+  // worker passes an empty map and the main thread re-resolves against its
+  // own DB-populated map in features/ast.ts::collectFileAstRows. Skip the
+  // findParentDef linear scan in that case.
+  const ctx: CollectCtx = {
+    rows: [],
+    matched: new Set<number>(),
+    relPath,
+    defs,
+    nodeIdMap,
+    skipParentLookup: nodeIdMap.size === 0,
+    kindHandlers: buildKindHandlers(newTypes, stringConfig),
+  };
 
   return {
     name: 'ast-store',
@@ -267,19 +309,12 @@ export function createAstStoreVisitor(
       // Guard: skip re-collection but do NOT skipChildren — node.id (memory address)
       // can be reused by tree-sitter, so a collision would incorrectly suppress an
       // unrelated subtree. The parent call's skipChildren handles the intended case.
-      if (matched.has(node.id)) return;
-
-      // Gate with `hasOwn` because plain-object lookup walks Object.prototype:
-      // tree-sitter node types like `constructor` (Haskell sum-types: Left,
-      // Right) would otherwise resolve to `Object.prototype.constructor` (the
-      // Object() function), which then crashes the worker boundary with
-      // "function Object() { [native code] } could not be cloned" when the
-      // resulting astNodes row is structured-cloned back to the main thread.
-      if (!Object.hasOwn(astTypeMap, node.type)) return;
-      const kind = astTypeMap[node.type];
+      if (ctx.matched.has(node.id)) return;
+
+      const kind = resolveAstKind(node, astTypeMap);
       if (!kind) return;
 
-      collectNode(node, kind);
+      collectNode(ctx, node, kind);
 
       // Mirror the native walker's recursion policy. In JS/TS, the native
       // javascript.rs walker returns after collecting `new` or `throw` to
@@ -293,7 +328,7 @@ export function createAstStoreVisitor(
     },
 
     finish(): AstStoreRow[] {
-      return rows;
+      return ctx.rows;
     },
   };
 }
diff --git a/src/ast-analysis/visitors/dataflow-visitor.ts b/src/ast-analysis/visitors/dataflow-visitor.ts
index c3e4b46be..b66215577 100644
--- a/src/ast-analysis/visitors/dataflow-visitor.ts
+++ b/src/ast-analysis/visitors/dataflow-visitor.ts
@@ -405,6 +405,83 @@ function handleReturn(
   }
 }
 
+/** Collect parameter entries for a function and push a new scope onto the stack. */
+function enterFunctionScope(
+  funcNode: TreeSitterNode,
+  rules: AnyRules,
+  scopeStack: ScopeEntry[],
+  parameters: DataflowParam[],
+): void {
+  const name = functionName(funcNode, rules);
+  const paramsNode = funcNode.childForFieldName(rules.paramListField);
+  const paramList = extractParams(paramsNode, rules);
+  const paramMap = new Map<string, number>();
+  for (const p of paramList) {
+    paramMap.set(p.name, p.index);
+    if (name) {
+      parameters.push({
+        funcName: name,
+        paramName: p.name,
+        paramIndex: p.index,
+        line: (paramsNode?.startPosition?.row ?? funcNode.startPosition.row) + 1,
+      });
+    }
+  }
+  scopeStack.push({ funcName: name, funcNode, params: paramMap, locals: new Map() });
+}
+
+interface DataflowDispatchCtx {
+  rules: AnyRules;
+  scopeStack: ScopeEntry[];
+  returns: DataflowReturnEntry[];
+  assignments: DataflowAssignment[];
+  argFlows: DataflowArgFlow[];
+  mutations: DataflowMutation[];
+  isCallNode: (t: string) => boolean;
+}
+
+/**
+ * Route a node to the appropriate dataflow handler based on its type, or return
+ * `false` if no handler matched. Function-definition nodes are signalled by
+ * a `true` return so the caller can short-circuit.
+ */
+function dispatchDataflowNode(ctx: DataflowDispatchCtx, node: TreeSitterNode): boolean {
+  const { rules } = ctx;
+  const t = node.type;
+
+  if (rules.functionNodes.has(t)) return true;
+
+  if (rules.returnNode && t === rules.returnNode) {
+    handleReturn(node, rules, ctx.scopeStack, ctx.returns);
+    return true;
+  }
+
+  if (
+    (rules.varDeclaratorNode && t === rules.varDeclaratorNode) ||
+    rules.varDeclaratorNodes?.has(t)
+  ) {
+    handleVarDeclarator(node, rules, ctx.scopeStack, ctx.assignments, ctx.isCallNode);
+    return true;
+  }
+
+  if (ctx.isCallNode(t)) {
+    handleCallExpr(node, rules, ctx.scopeStack, ctx.argFlows);
+    return true;
+  }
+
+  if (rules.assignmentNode && t === rules.assignmentNode) {
+    handleAssignment(node, rules, ctx.scopeStack, ctx.assignments, ctx.mutations, ctx.isCallNode);
+    return true;
+  }
+
+  if (rules.expressionStmtNode && t === rules.expressionStmtNode) {
+    handleExprStmtMutation(node, rules, ctx.scopeStack, ctx.mutations, ctx.isCallNode);
+    return true;
+  }
+
+  return false;
+}
+
 export function createDataflowVisitor(rules: AnyRules): Visitor {
   const isCallNode: (t: string) => boolean = rules.callNodes
     ? (t: string) => rules.callNodes.has(t)
@@ -417,6 +494,16 @@ export function createDataflowVisitor(rules: AnyRules): Visitor {
   const mutations: DataflowMutation[] = [];
   const scopeStack: ScopeEntry[] = [];
 
+  const dispatchCtx: DataflowDispatchCtx = {
+    rules,
+    scopeStack,
+    returns,
+    assignments,
+    argFlows,
+    mutations,
+    isCallNode,
+  };
+
   return {
     name: 'dataflow',
     functionNodeTypes: rules.functionNodes,
@@ -426,22 +513,7 @@ export function createDataflowVisitor(rules: AnyRules): Visitor {
       _funcName: string | null,
       _context: VisitorContext,
     ): void {
-      const name = functionName(funcNode, rules);
-      const paramsNode = funcNode.childForFieldName(rules.paramListField);
-      const paramList = extractParams(paramsNode, rules);
-      const paramMap = new Map<string, number>();
-      for (const p of paramList) {
-        paramMap.set(p.name, p.index);
-        if (name) {
-          parameters.push({
-            funcName: name,
-            paramName: p.name,
-            paramIndex: p.index,
-            line: (paramsNode?.startPosition?.row ?? funcNode.startPosition.row) + 1,
-          });
-        }
-      }
-      scopeStack.push({ funcName: name, funcNode, params: paramMap, locals: new Map() });
+      enterFunctionScope(funcNode, rules, scopeStack, parameters);
     },
 
     exitFunction(
@@ -453,37 +525,8 @@ export function createDataflowVisitor(rules: AnyRules): Visitor {
     },
 
     enterNode(node: TreeSitterNode, _context: VisitorContext): EnterNodeResult | undefined {
-      const t = node.type;
-
-      if (rules.functionNodes.has(t)) return;
-
-      if (rules.returnNode && t === rules.returnNode) {
-        handleReturn(node, rules, scopeStack, returns);
-        return;
-      }
-
-      if (rules.varDeclaratorNode && t === rules.varDeclaratorNode) {
-        handleVarDeclarator(node, rules, scopeStack, assignments, isCallNode);
-        return;
-      }
-      if (rules.varDeclaratorNodes?.has(t)) {
-        handleVarDeclarator(node, rules, scopeStack, assignments, isCallNode);
-        return;
-      }
-
-      if (isCallNode(t)) {
-        handleCallExpr(node, rules, scopeStack, argFlows);
-        return;
-      }
-
-      if (rules.assignmentNode && t === rules.assignmentNode) {
-        handleAssignment(node, rules, scopeStack, assignments, mutations, isCallNode);
-        return;
-      }
-
-      if (rules.expressionStmtNode && t === rules.expressionStmtNode) {
-        handleExprStmtMutation(node, rules, scopeStack, mutations, isCallNode);
-      }
+      dispatchDataflowNode(dispatchCtx, node);
+      return undefined;
     },
 
     finish(): DataflowResultInternal {

From dab4dcf8630259bb632c0cc31f247bda3069d3ca Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 12:32:39 -0600
Subject: [PATCH 10/27] refactor(builder): break pipeline cycle by extracting
 orchestrator-selection strategy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extract the native-orchestrator path out of pipeline.ts into two new stage
modules:

  - stages/native-orchestrator.ts — tryNativeOrchestrator + post-native
    structure/analysis fallback + dropped-language detection/backfill.
  - stages/native-db-lifecycle.ts — shared rusqlite connection helpers
    (closeNativeDb, reopenNativeDb, suspendNativeDb, refreshJsDb).

This breaks the function-level cycle 'buildGraph <-> tryNativeOrchestrator'
caused by codegraph's name-based resolver conflating the local buildGraph
function with the ctx.nativeDb.buildGraph() method call. Once the
orchestrator lives in its own file, there is no longer a local buildGraph
in scope to collide with the method invocation.

Function-level cycles: 9 -> 5. No file-level cycle introduced (still 1,
unchanged — pre-existing MCP cycle). pipeline.ts shrinks from 1404 to 465
lines and now reads as a thin top-level controller: detect changes, try
native, fall back to JS stages.

computeWasmOnlyStaleFiles is re-exported from pipeline.ts so existing
unit tests (tests/builder/wasm-only-stale-files.test.ts) keep working
without changes.
---
 src/domain/graph/builder/pipeline.ts          | 978 +-----------------
 .../builder/stages/native-db-lifecycle.ts     |  74 ++
 .../builder/stages/native-orchestrator.ts     | 942 +++++++++++++++++
 3 files changed, 1035 insertions(+), 959 deletions(-)
 create mode 100644 src/domain/graph/builder/stages/native-db-lifecycle.ts
 create mode 100644 src/domain/graph/builder/stages/native-orchestrator.ts

diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts
index 4dce8aa3d..ff4ee5e5d 100644
--- a/src/domain/graph/builder/pipeline.ts
+++ b/src/domain/graph/builder/pipeline.ts
@@ -8,52 +8,24 @@ import fs from 'node:fs';
 import path from 'node:path';
 import { performance } from 'node:perf_hooks';
 import {
-  acquireAdvisoryLock,
   closeDb,
   closeDbPair,
   getBuildMeta,
   initSchema,
   MIGRATIONS,
   openDb,
-  purgeFilesData,
-  releaseAdvisoryLock,
-  setBuildMeta,
 } from '../../../db/index.js';
 import { detectWorkspaces, loadConfig } from '../../../infrastructure/config.js';
 import { debug, info, warn } from '../../../infrastructure/logger.js';
 import { loadNative } from '../../../infrastructure/native.js';
-import { semverCompare } from '../../../infrastructure/update-check.js';
-import { normalizePath } from '../../../shared/constants.js';
 import { toErrorMessage } from '../../../shared/errors.js';
 import { CODEGRAPH_VERSION } from '../../../shared/version.js';
-import type {
-  BetterSqlite3Database,
-  BuildGraphOpts,
-  BuildResult,
-  Definition,
-  ExtractorOutput,
-  SqliteStatement,
-} from '../../../types.js';
-import {
-  classifyNativeDrops,
-  formatDropExtensionSummary,
-  getActiveEngine,
-  getInstalledWasmExtensions,
-  NATIVE_SUPPORTED_EXTENSIONS,
-  parseFilesWasmForBackfill,
-} from '../../parser.js';
+import type { BuildGraphOpts, BuildResult } from '../../../types.js';
+import { getActiveEngine } from '../../parser.js';
 import { writeJournalHeader } from '../journal.js';
 import { setWorkspaces } from '../resolve.js';
 import { PipelineContext } from './context.js';
-import {
-  batchInsertNodes,
-  collectFiles as collectFilesUtil,
-  fileHash,
-  fileStat,
-  loadPathAliases,
-  readFileSafe,
-} from './helpers.js';
-import { NativeDbProxy } from './native-db-proxy.js';
+import { loadPathAliases } from './helpers.js';
 import { buildEdges } from './stages/build-edges.js';
 import { buildStructure } from './stages/build-structure.js';
 // Pipeline stages
@@ -61,10 +33,24 @@ import { collectFiles } from './stages/collect-files.js';
 import { detectChanges, detectNoChanges } from './stages/detect-changes.js';
 import { finalize } from './stages/finalize.js';
 import { insertNodes } from './stages/insert-nodes.js';
+import {
+  closeNativeDb,
+  refreshJsDb,
+  reopenNativeDb,
+  suspendNativeDb,
+} from './stages/native-db-lifecycle.js';
+import { tryNativeOrchestrator } from './stages/native-orchestrator.js';
 import { parseFiles } from './stages/parse-files.js';
 import { resolveImports } from './stages/resolve-imports.js';
 import { runAnalyses } from './stages/run-analyses.js';
 
+// Re-export computeWasmOnlyStaleFiles for backward compatibility with tests
+// that import from this module path (#1073 unit tests).
+export {
+  computeWasmOnlyStaleFiles,
+  type WasmOnlyStaleFilesInput,
+} from './stages/native-orchestrator.js';
+
 // ── Setup helpers ───────────────────────────────────────────────────────
 
 function initializeEngine(ctx: PipelineContext): void {
@@ -237,934 +223,8 @@ function formatTimingResult(ctx: PipelineContext): BuildResult {
   };
 }
 
-// ── NativeDb lifecycle helpers ──────────────────────────────────────────
-
-/** Checkpoint WAL through rusqlite and close the native connection. */
-function closeNativeDb(ctx: PipelineContext, label: string): void {
-  if (!ctx.nativeDb) return;
-  try {
-    ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
-  } catch (e) {
-    debug(`${label} WAL checkpoint failed: ${toErrorMessage(e)}`);
-  }
-  try {
-    ctx.nativeDb.close();
-  } catch (e) {
-    debug(`${label} nativeDb close failed: ${toErrorMessage(e)}`);
-  }
-  ctx.nativeDb = undefined;
-}
-
-/** Try to reopen the native connection for a given pipeline phase. */
-function reopenNativeDb(ctx: PipelineContext, label: string): void {
-  if ((ctx.opts.engine ?? 'auto') === 'wasm') return;
-  const native = loadNative();
-  if (!native?.NativeDatabase) return;
-  try {
-    ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
-  } catch (e) {
-    debug(`reopen nativeDb for ${label} failed: ${toErrorMessage(e)}`);
-    ctx.nativeDb = undefined;
-  }
-}
-
-/** Close nativeDb and clear stale references in engineOpts. */
-function suspendNativeDb(ctx: PipelineContext, label: string): void {
-  closeNativeDb(ctx, label);
-  if (ctx.engineOpts?.nativeDb) {
-    ctx.engineOpts.nativeDb = undefined;
-  }
-}
-
-/**
- * After native writes, reopen the JS db connection to get a fresh page cache.
- * Rusqlite WAL truncation invalidates better-sqlite3's internal WAL index,
- * causing SQLITE_CORRUPT on the next read (#715, #736).
- */
-function refreshJsDb(ctx: PipelineContext): void {
-  try {
-    ctx.db.close();
-  } catch (e) {
-    debug(`refreshJsDb close failed: ${toErrorMessage(e)}`);
-  }
-  ctx.db = openDb(ctx.dbPath);
-}
-
-// ── Native orchestrator types ──────────────────────────────────────────
-
-interface NativeOrchestratorResult {
-  phases: Record<string, number>;
-  earlyExit?: boolean;
-  nodeCount?: number;
-  edgeCount?: number;
-  fileCount?: number;
-  changedFiles?: string[];
-  changedCount?: number;
-  removedCount?: number;
-  isFullBuild?: boolean;
-  /** Whether the Rust pipeline handled the structure phase (small-incremental fast path). */
-  structureHandled?: boolean;
-  /** Whether the Rust pipeline wrote AST/complexity/CFG/dataflow to DB. */
-  analysisComplete?: boolean;
-}
-
-// ── Native orchestrator helpers ───────────────────────────────────────
-
-/** Determine whether the native orchestrator should be skipped. Returns a reason string, or null if it should run. */
-function shouldSkipNativeOrchestrator(ctx: PipelineContext): string | null {
-  if (ctx.forceFullRebuild) return 'forceFullRebuild';
-  // v3.9.0 addon had buggy incremental purge (wrong SQL on analysis tables,
-  // scoped removal over-detection). Fixed in v3.9.1 by PR #865. Gate on
-  // < 3.9.1 so v3.9.1+ uses the fast Rust orchestrator path.
-  const orchestratorBuggy = !!ctx.engineVersion && semverCompare(ctx.engineVersion, '3.9.1') < 0;
-  if (orchestratorBuggy) return `buggy addon ${ctx.engineVersion}`;
-  if (ctx.engineName !== 'native') return `engine=${ctx.engineName}`;
-  return null;
-}
-
-/** Checkpoint WAL through rusqlite, close nativeDb, and reopen better-sqlite3.
- *  Returns false if the DB reopen fails (caller should return partial result). */
-function handoffWalAfterNativeBuild(ctx: PipelineContext): boolean {
-  closeNativeDb(ctx, 'post-native-build');
-  try {
-    ctx.db.close();
-  } catch (e) {
-    debug(`handoffWal JS db close failed: ${toErrorMessage(e)}`);
-  }
-  try {
-    ctx.db = openDb(ctx.dbPath);
-    return true;
-  } catch (reopenErr) {
-    warn(`Failed to reopen DB after native build: ${(reopenErr as Error).message}`);
-    return false;
-  }
-}
-
-/**
- * Reconstruct fileSymbols from the DB after a native orchestrator build.
- * When `scopeFiles` is provided, only loads those files (for analysis-only).
- * When omitted, loads all files (needed for structure rebuilds).
- */
-function reconstructFileSymbolsFromDb(
-  ctx: PipelineContext,
-  scopeFiles?: string[],
-): Map<string, ExtractorOutput> {
-  let query =
-    'SELECT file, name, kind, line, end_line as endLine FROM nodes WHERE file IS NOT NULL';
-  const params: string[] = [];
-  if (scopeFiles && scopeFiles.length > 0) {
-    const placeholders = scopeFiles.map(() => '?').join(',');
-    query += ` AND file IN (${placeholders})`;
-    params.push(...scopeFiles);
-  }
-  query += ' ORDER BY file, line';
-
-  const rows = ctx.db.prepare(query).all(...params) as {
-    file: string;
-    name: string;
-    kind: string;
-    line: number;
-    endLine: number | null;
-  }[];
-
-  const fileSymbols = new Map<string, ExtractorOutput>();
-  for (const row of rows) {
-    let entry = fileSymbols.get(row.file);
-    if (!entry) {
-      entry = {
-        definitions: [],
-        calls: [],
-        imports: [],
-        classes: [],
-        exports: [],
-        typeMap: new Map(),
-      };
-      fileSymbols.set(row.file, entry);
-    }
-    entry.definitions.push({
-      name: row.name,
-      kind: row.kind as Definition['kind'],
-      line: row.line,
-      endLine: row.endLine ?? undefined,
-    });
-  }
-
-  // Populate import/export counts from DB edges so buildStructure
-  // computes correct import_count/export_count in node_metrics.
-  // The extractor arrays aren't persisted to the DB, so we derive
-  // counts from edge data instead (#804).
-  const importCountRows = ctx.db
-    .prepare(
-      `SELECT n.file, COUNT(*) AS cnt
-       FROM edges e JOIN nodes n ON e.source_id = n.id
-       WHERE e.kind IN ('imports', 'imports-type', 'dynamic-imports')
-         AND n.file IS NOT NULL
-       GROUP BY n.file`,
-    )
-    .all() as { file: string; cnt: number }[];
-  for (const row of importCountRows) {
-    const entry = fileSymbols.get(row.file);
-    if (entry) entry.imports = new Array(row.cnt) as ExtractorOutput['imports'];
-  }
-
-  const exportCountRows = ctx.db
-    .prepare(
-      `SELECT n_tgt.file, COUNT(DISTINCT n_tgt.id) AS cnt
-       FROM edges e
-       JOIN nodes n_tgt ON e.target_id = n_tgt.id
-       JOIN nodes n_src ON e.source_id = n_src.id
-       WHERE e.kind IN ('imports', 'imports-type', 'reexports')
-         AND n_tgt.file IS NOT NULL
-         AND n_src.file != n_tgt.file
-       GROUP BY n_tgt.file`,
-    )
-    .all() as { file: string; cnt: number }[];
-  for (const row of exportCountRows) {
-    const entry = fileSymbols.get(row.file);
-    if (entry) entry.exports = new Array(row.cnt) as ExtractorOutput['exports'];
-  }
-
-  return fileSymbols;
-}
-
-/**
- * Run JS buildStructure() after native orchestrator to fill directory nodes + contains edges.
- * For full builds, passes changedFiles=null (full rebuild).
- * For incremental builds, passes the changed file list to scope the update.
- */
-async function runPostNativeStructure(
-  ctx: PipelineContext,
-  allFileSymbols: Map<string, ExtractorOutput>,
-  isFullBuild: boolean,
-  changedFiles: string[] | undefined,
-): Promise<number> {
-  const structureStart = performance.now();
-  try {
-    const directories = new Set<string>();
-    for (const relPath of allFileSymbols.keys()) {
-      const parts = relPath.split('/');
-      for (let i = 1; i < parts.length; i++) {
-        directories.add(parts.slice(0, i).join('/'));
-      }
-    }
-
-    const lineCountMap = new Map<string, number>();
-    const cachedLineCounts = ctx.db
-      .prepare(
-        `SELECT n.name AS file, m.line_count
-         FROM node_metrics m JOIN nodes n ON m.node_id = n.id
-         WHERE n.kind = 'file'`,
-      )
-      .all() as Array<{ file: string; line_count: number }>;
-    for (const row of cachedLineCounts) {
-      lineCountMap.set(row.file, row.line_count);
-    }
-
-    // Full builds need null (rebuild everything). Incremental builds pass the
-    // changed file list so buildStructure only updates those files' metrics
-    // and contains edges — matching the JS pipeline's medium-incremental path.
-    const changedFilePaths = isFullBuild || !changedFiles?.length ? null : changedFiles;
-    const { buildStructure: buildStructureFn } = (await import(
-      '../../../features/structure.js'
-    )) as {
-      buildStructure: (
-        db: typeof ctx.db,
-        fileSymbols: Map<string, ExtractorOutput>,
-        rootDir: string,
-        lineCountMap: Map<string, number>,
-        directories: Set<string>,
-        changedFiles: string[] | null,
-      ) => void;
-    };
-    buildStructureFn(
-      ctx.db,
-      allFileSymbols,
-      ctx.rootDir,
-      lineCountMap,
-      directories,
-      changedFilePaths,
-    );
-    debug(
-      `Structure phase completed after native orchestrator${changedFilePaths ? ` (${changedFilePaths.length} files)` : ' (full)'}`,
-    );
-  } catch (err) {
-    warn(`Structure phase failed after native build: ${toErrorMessage(err)}`);
-  }
-  return performance.now() - structureStart;
-}
-
-/**
- * JS fallback for AST/complexity/CFG/dataflow analysis after native orchestrator.
- * Used when the Rust addon doesn't include analysis persistence (older addon
- * version) or when analysis failed on the Rust side.
- */
-async function runPostNativeAnalysis(
-  ctx: PipelineContext,
-  allFileSymbols: Map<string, ExtractorOutput>,
-  changedFiles: string[] | undefined,
-): Promise<{ astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number }> {
-  const timing = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 };
-
-  // Scope analysis fileSymbols to changed files only
-  let analysisFileSymbols: Map<string, ExtractorOutput>;
-  if (changedFiles && changedFiles.length > 0) {
-    analysisFileSymbols = new Map();
-    for (const f of changedFiles) {
-      const entry = allFileSymbols.get(f);
-      if (entry) analysisFileSymbols.set(f, entry);
-    }
-  } else {
-    analysisFileSymbols = allFileSymbols;
-  }
-
-  // Reopen nativeDb for analysis features (suspend/resume WAL pattern).
-  const native = loadNative();
-  if (native?.NativeDatabase) {
-    try {
-      ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
-      if (ctx.engineOpts) ctx.engineOpts.nativeDb = ctx.nativeDb;
-    } catch {
-      ctx.nativeDb = undefined;
-      if (ctx.engineOpts) ctx.engineOpts.nativeDb = undefined;
-    }
-  }
-
-  // Flush JS WAL pages once so Rust can see them, then no-op callbacks.
-  // Previously each feature called wal_checkpoint(TRUNCATE) individually
-  // (~68ms each × 3-4 features). One FULL checkpoint suffices.
-  if (ctx.nativeDb && ctx.engineOpts) {
-    ctx.db.pragma('wal_checkpoint(FULL)');
-    ctx.engineOpts.suspendJsDb = () => {};
-    ctx.engineOpts.resumeJsDb = () => {};
-  }
-
-  try {
-    const { runAnalyses: runAnalysesFn } = (await import('../../../ast-analysis/engine.js')) as {
-      runAnalyses: (
-        db: BetterSqlite3Database,
-        fileSymbols: Map<string, ExtractorOutput>,
-        rootDir: string,
-        opts: Record<string, unknown>,
-        engineOpts?: Record<string, unknown>,
-      ) => Promise<{ astMs?: number; complexityMs?: number; cfgMs?: number; dataflowMs?: number }>;
-    };
-    const result = await runAnalysesFn(
-      ctx.db,
-      analysisFileSymbols,
-      ctx.rootDir,
-      ctx.opts as Record<string, unknown>,
-      ctx.engineOpts as unknown as Record<string, unknown> | undefined,
-    );
-    timing.astMs = result.astMs ?? 0;
-    timing.complexityMs = result.complexityMs ?? 0;
-    timing.cfgMs = result.cfgMs ?? 0;
-    timing.dataflowMs = result.dataflowMs ?? 0;
-  } catch (err) {
-    warn(`Analysis phases failed after native build: ${toErrorMessage(err)}`);
-  }
-
-  // Close nativeDb after analyses — TRUNCATE checkpoint flushes all Rust
-  // WAL writes so JS and external readers can see them. Runs once after
-  // all analysis features complete (not per-feature).
-  if (ctx.nativeDb) {
-    try {
-      ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
-    } catch {
-      /* ignore checkpoint errors */
-    }
-    try {
-      ctx.nativeDb.close();
-    } catch {
-      /* ignore close errors */
-    }
-    ctx.nativeDb = undefined;
-    if (ctx.engineOpts) {
-      ctx.engineOpts.nativeDb = undefined;
-      ctx.engineOpts.suspendJsDb = undefined;
-      ctx.engineOpts.resumeJsDb = undefined;
-    }
-  }
-
-  return timing;
-}
-
-/** Format timing result from native orchestrator phases + JS post-processing. */
-function formatNativeTimingResult(
-  p: Record<string, number>,
-  structurePatchMs: number,
-  analysisTiming: { astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number },
-): BuildResult {
-  return {
-    phases: {
-      setupMs: +(p.setupMs ?? 0).toFixed(1),
-      collectMs: +(p.collectMs ?? 0).toFixed(1),
-      detectMs: +(p.detectMs ?? 0).toFixed(1),
-      parseMs: +(p.parseMs ?? 0).toFixed(1),
-      insertMs: +(p.insertMs ?? 0).toFixed(1),
-      resolveMs: +(p.resolveMs ?? 0).toFixed(1),
-      edgesMs: +(p.edgesMs ?? 0).toFixed(1),
-      structureMs: +((p.structureMs ?? 0) + structurePatchMs).toFixed(1),
-      rolesMs: +(p.rolesMs ?? 0).toFixed(1),
-      astMs: +(analysisTiming.astMs ?? 0).toFixed(1),
-      complexityMs: +(analysisTiming.complexityMs ?? 0).toFixed(1),
-      cfgMs: +(analysisTiming.cfgMs ?? 0).toFixed(1),
-      dataflowMs: +(analysisTiming.dataflowMs ?? 0).toFixed(1),
-      finalizeMs: +(p.finalizeMs ?? 0).toFixed(1),
-    },
-  };
-}
-
-/** Try the native build orchestrator. Returns a BuildResult on success, undefined to fall through to JS pipeline. */
-async function tryNativeOrchestrator(
-  ctx: PipelineContext,
-): Promise<BuildResult | undefined | 'early-exit'> {
-  const skipReason = shouldSkipNativeOrchestrator(ctx);
-  if (skipReason) {
-    debug(`Skipping native orchestrator: ${skipReason}`);
-    return undefined;
-  }
-
-  // Open NativeDatabase on demand — deferred from setupPipeline to skip the
-  // ~60ms cost on no-op/early-exit builds. Close the better-sqlite3 connection
-  // first to avoid dual-connection WAL corruption.
-  if (!ctx.nativeDb && ctx.nativeAvailable) {
-    const native = loadNative();
-    if (native?.NativeDatabase) {
-      try {
-        // Close better-sqlite3 before opening rusqlite to avoid WAL conflicts.
-        // Uses raw close() instead of closeDb() intentionally — the advisory lock
-        // is kept and transferred to the NativeDbProxy below, not released here.
-        ctx.db.close();
-        acquireAdvisoryLock(ctx.dbPath);
-        ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
-        ctx.nativeDb.initSchema();
-        // Replace ctx.db with a NativeDbProxy so post-native JS fallback
-        // (structure, analysis) can use it without reopening better-sqlite3.
-        const proxy = new NativeDbProxy(ctx.nativeDb);
-        proxy.__lockPath = `${ctx.dbPath}.lock`;
-        ctx.db = proxy as unknown as typeof ctx.db;
-        ctx.nativeFirstProxy = true;
-      } catch (err) {
-        warn(`NativeDatabase setup failed, falling back to JS: ${toErrorMessage(err)}`);
-        try {
-          ctx.nativeDb?.close();
-        } catch (e) {
-          debug(`tryNativeOrchestrator: close failed during fallback: ${toErrorMessage(e)}`);
-        }
-        ctx.nativeDb = undefined;
-        ctx.nativeFirstProxy = false; // defensive: reset in case future refactors move the assignment above throwing lines
-        releaseAdvisoryLock(`${ctx.dbPath}.lock`);
-        // Reopen better-sqlite3 for JS pipeline fallback
-        ctx.db = openDb(ctx.dbPath);
-      }
-    }
-  }
-
-  if (!ctx.nativeDb?.buildGraph) return undefined;
-
-  const resultJson = ctx.nativeDb.buildGraph(
-    ctx.rootDir,
-    JSON.stringify(ctx.config),
-    JSON.stringify(ctx.aliases),
-    JSON.stringify(ctx.opts),
-  );
-  const result = JSON.parse(resultJson) as NativeOrchestratorResult;
-
-  if (result.earlyExit) {
-    info('No changes detected');
-    // Even on no-op rebuilds, dropped-language files added since the last
-    // full build are still missing from `nodes`/`file_hashes` (#1083), and
-    // WASM-only files deleted from disk leave stale rows behind (#1073).
-    // The orchestrator's file_collector skipped them, so its earlyExit
-    // doesn't imply DB consistency. Run the gap repair before returning.
-    const gap = detectDroppedLanguageGap(ctx);
-    if (gap.missingAbs.length > 0 || gap.staleRel.length > 0) {
-      await backfillNativeDroppedFiles(ctx, gap);
-    }
-    closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
-    return 'early-exit';
-  }
-
-  // Log incremental status to match JS pipeline output
-  const changed = result.changedCount ?? 0;
-  const removed = result.removedCount ?? 0;
-  if (!result.isFullBuild && (changed > 0 || removed > 0)) {
-    info(`Incremental: ${changed} changed, ${removed} removed`);
-  }
-
-  const p = result.phases;
-
-  // Sync build_meta so JS-side version/engine checks work on next build.
-  // Use the binary's CARGO_PKG_VERSION (ctx.nativeBinaryVersion), not the
-  // platform package.json version (ctx.engineVersion). The Rust side's
-  // check_version_mismatch compares against CARGO_PKG_VERSION; writing
-  // the package.json value would create a permanent mismatch whenever
-  // the binary and platform package.json diverge — e.g., CI hot-swap
-  // via ci-install-native.mjs (#1066) — forcing every subsequent build
-  // to be a full rebuild.
-  //
-  // When the native addon doesn't expose engineVersion() (older addon),
-  // fall back to CODEGRAPH_VERSION — same fallback used by both
-  // checkEngineSchemaMismatch (read path) and persistBuildMetadata
-  // (the JS-pipeline write path in finalize.ts). Using ctx.engineVersion
-  // here would re-introduce the asymmetry this PR fixes for that case.
-  const nativeVersionForMeta = ctx.nativeBinaryVersion || CODEGRAPH_VERSION;
-  setBuildMeta(ctx.db, {
-    engine: ctx.engineName,
-    engine_version: nativeVersionForMeta,
-    codegraph_version: nativeVersionForMeta,
-    schema_version: String(ctx.schemaVersion),
-    built_at: new Date().toISOString(),
-  });
-
-  info(
-    `Native build orchestrator completed: ${result.nodeCount ?? 0} nodes, ${result.edgeCount ?? 0} edges, ${result.fileCount ?? 0} files`,
-  );
-
-  // ── Post-native structure + analysis ──────────────────────────────
-  let analysisTiming = {
-    astMs: +(p.astMs ?? 0),
-    complexityMs: +(p.complexityMs ?? 0),
-    cfgMs: +(p.cfgMs ?? 0),
-    dataflowMs: +(p.dataflowMs ?? 0),
-  };
-  let structurePatchMs = 0;
-  // Skip JS structure when the Rust pipeline's small-incremental fast path
-  // already handled it. For full builds and large incrementals where Rust
-  // skipped structure, we must run the JS fallback.
-  const needsStructure = !result.structureHandled;
-  // When the Rust addon doesn't include analysis persistence (older addon
-  // version or analysis failed), fall back to JS-side analysis.
-  const needsAnalysisFallback =
-    !result.analysisComplete &&
-    (ctx.opts.ast !== false ||
-      ctx.opts.complexity !== false ||
-      ctx.opts.cfg !== false ||
-      ctx.opts.dataflow !== false);
-
-  if (needsStructure || needsAnalysisFallback) {
-    // When analysis fallback is needed, handoff to better-sqlite3 — the
-    // analysis engine uses the suspend/resume WAL pattern that requires a
-    // real better-sqlite3 connection, not the NativeDbProxy.
-    if (needsAnalysisFallback && ctx.nativeFirstProxy) {
-      closeNativeDb(ctx, 'pre-analysis-fallback');
-      ctx.db = openDb(ctx.dbPath);
-      ctx.nativeFirstProxy = false;
-    } else if (!ctx.nativeFirstProxy && !handoffWalAfterNativeBuild(ctx)) {
-      // DB reopen failed — return partial result
-      return formatNativeTimingResult(p, 0, analysisTiming);
-    }
-
-    const fileSymbols = reconstructFileSymbolsFromDb(ctx);
-
-    if (needsStructure) {
-      structurePatchMs = await runPostNativeStructure(
-        ctx,
-        fileSymbols,
-        !!result.isFullBuild,
-        result.changedFiles,
-      );
-    }
-
-    if (needsAnalysisFallback) {
-      analysisTiming = await runPostNativeAnalysis(ctx, fileSymbols, result.changedFiles);
-    }
-  }
-
-  // Engine parity: the native orchestrator silently drops files whose
-  // Rust extractor/grammar is missing or fails (e.g. HCL, Scala, Swift on
-  // stale native binaries). WASM handles those — backfill via WASM so both
-  // engines process the same file set (#967).
-  //
-  // Detect the gap once (fs walk + 2 DB queries, ~20–30ms) and use it for
-  // both gating and the backfill itself. On dirty incrementals/full builds
-  // the orchestrator signals trigger backfill, so the walk happens once
-  // (instead of redundantly inside backfill). On quiet incrementals we
-  // still pay the walk so we can detect brand-new files in dropped-language
-  // extensions — a gap that the orchestrator's `detect_removed_files`
-  // filter (#1070) leaves open (#1083, #1091). The pre-check is cheap
-  // because the expensive part (WASM re-parse of the missing set) is
-  // gated below.
-  const removedCount = result.removedCount ?? 0;
-  const changedCount = result.changedCount ?? 0;
-  const gap = detectDroppedLanguageGap(ctx);
-  if (
-    result.isFullBuild ||
-    removedCount > 0 ||
-    changedCount > 0 ||
-    gap.missingAbs.length > 0 ||
-    gap.staleRel.length > 0
-  ) {
-    await backfillNativeDroppedFiles(ctx, gap);
-  }
-
-  closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
-  return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
-}
-
-/** Files the native orchestrator silently dropped — the working set for backfill. */
-interface DroppedLanguageGap {
-  /** Relative paths (normalized) of files missing from `nodes` or `file_hashes`. */
-  missingRel: string[];
-  /** Absolute paths, aligned by index with `missingRel`. */
-  missingAbs: string[];
-  /**
-   * Relative paths of WASM-only files present in DB but absent from disk (#1073).
-   * Rust's `detect_removed_files` filter (#1070) skips these, so the JS-side
-   * backfill must purge them. Always disjoint from `missingRel`.
-   */
-  staleRel: string[];
-}
-
-/**
- * Inputs to {@link computeWasmOnlyStaleFiles}. Sets are passed in so the helper
- * is pure and unit-testable independently of `getInstalledWasmExtensions` and
- * the `NATIVE_SUPPORTED_EXTENSIONS` global state.
- */
-export interface WasmOnlyStaleFilesInput {
-  /** Distinct `file` values from the `nodes` table. */
-  existingNodes: ReadonlySet<string>;
-  /** Distinct `file` values from the `file_hashes` table. */
-  existingHashes: ReadonlySet<string>;
-  /** Relative paths currently on disk (from `collectFilesUtil`). */
-  expected: ReadonlySet<string>;
-  /** Lowercased extensions whose WASM grammar is installed. */
-  installedExts: ReadonlySet<string>;
-  /** Extensions covered by the Rust addon — Rust owns deletion for these. */
-  nativeSupported: ReadonlySet<string>;
-}
-
-/**
- * Compute the WASM-only files present in the DB but missing from disk (#1073).
- *
- * Returns relative paths that:
- *   - appear in `existingNodes` or `existingHashes` (in DB),
- *   - are absent from `expected` (not on disk),
- *   - have an extension installed for WASM, AND
- *   - have an extension NOT covered by `nativeSupported` — Rust's
- *     `purge_changed_files` handles deletion for natively-supported extensions
- *     via its own `detect_removed_files`, so the caller must not double-purge.
- *
- * Extensions are lowercased before lookup to match the registry and Rust's
- * `LanguageKind::from_extension` (which normalises case for the languages
- * where both cases are conventional, e.g. R's `.r` / `.R`).
- *
- * DB paths are forced to forward slashes before comparison with `expected`
- * (which is always normalised). The on-disk invariant is that DB rows are
- * written with forward slashes, but a stale row written by older code on
- * Windows could carry back-slashes — normalising here makes the comparison
- * platform-safe and prevents false-positive purges of live rows. We replace
- * `\\` explicitly (rather than calling `normalizePath`, which only touches
- * `path.sep`) so the defence works when running on POSIX against a DB that
- * was migrated from Windows.
- *
- * Exported for unit testing.
- */
-export function computeWasmOnlyStaleFiles(input: WasmOnlyStaleFilesInput): string[] {
-  const { existingNodes, existingHashes, expected, installedExts, nativeSupported } = input;
-  const stale: string[] = [];
-  const seen = new Set<string>();
-  const consider = (rawRel: string): void => {
-    const rel = rawRel.replace(/\\/g, '/');
-    if (expected.has(rel) || seen.has(rel)) return;
-    const ext = path.extname(rel).toLowerCase();
-    if (nativeSupported.has(ext)) return;
-    if (!installedExts.has(ext)) return;
-    seen.add(rel);
-    // Push the ORIGINAL raw path (not the normalised form) so the eventual
-    // `DELETE FROM nodes WHERE file = ?` predicate in `purgeFilesData`
-    // matches the actual stored row. The dedup `seen` set keeps the
-    // normalised form so a file written once with `\` and once with `/`
-    // is still treated as one entry — but the value the SQL sees has to
-    // be byte-identical to what's on disk in the DB.
-    stale.push(rawRel);
-  };
-  for (const rel of existingNodes) consider(rel);
-  for (const rel of existingHashes) consider(rel);
-  return stale;
-}
-
-/**
- * Group relative paths by their lowercased extension. Shape matches the bucket
- * type that `formatDropExtensionSummary` consumes, so callers can render a
- * log-friendly per-extension summary without going through `classifyNativeDrops`
- * when the reason is already known (e.g. the stale-purge path where every path
- * is guaranteed `unsupported-by-native`).
- */
-function groupByExtension(relPaths: Iterable<string>): Map<string, string[]> {
-  const buckets = new Map<string, string[]>();
-  for (const rel of relPaths) {
-    const ext = path.extname(rel).toLowerCase();
-    let list = buckets.get(ext);
-    if (!list) {
-      list = [];
-      buckets.set(ext, list);
-    }
-    list.push(rel);
-  }
-  return buckets;
-}
-
-/**
- * Detect files the native orchestrator silently dropped.
- *
- * Walks the filesystem and compares against `nodes` + `file_hashes`. A file
- * is "missing" if it's absent from EITHER table — both must be present for
- * the fast-skip pre-flight (#1054) to work, and the two can diverge (e.g.
- * legacy DBs where `nodes` was populated but `file_hashes` was not).
- *
- * Restricted to files with an installed WASM grammar; extensions in
- * `LANGUAGE_REGISTRY` without a shipped grammar (e.g. groovy on minimal
- * installs) can't be parsed by either engine, so they're not a native
- * regression — excluding them keeps the warn count in
- * `backfillNativeDroppedFiles` meaningful.
- *
- * Also detects WASM-only files deleted from disk (#1073). Rust's
- * `detect_removed_files` filter (#1070) skips files outside its supported
- * extensions, so deletions of WASM-only languages don't reach the native
- * purge path; the rest of the backfill only inserts rows, so without this
- * step stale `nodes`/`file_hashes` rows would linger across incremental
- * rebuilds until the next full rebuild.
- *
- * Cheap (no DB handoff, no parsing): used both to gate the backfill call
- * and as its working set. NativeDbProxy supports `.prepare().all()`, so
- * this works whether `ctx.db` is a proxy or a real better-sqlite3
- * connection — letting us skip the close-native / reopen-better-sqlite3
- * cost when there's nothing to backfill.
- */
-function detectDroppedLanguageGap(ctx: PipelineContext): DroppedLanguageGap {
-  const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set<string>());
-  const expected = new Set(
-    collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))),
-  );
-
-  const existingNodeRows = ctx.db
-    .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
-    .all() as Array<{ file: string }>;
-  const existingNodes = new Set(existingNodeRows.map((r) => r.file));
-
-  let existingHashes = new Set<string>();
-  try {
-    const existingHashRows = ctx.db
-      .prepare('SELECT DISTINCT file FROM file_hashes')
-      .all() as Array<{ file: string }>;
-    existingHashes = new Set(existingHashRows.map((r) => r.file));
-  } catch (e) {
-    // file_hashes table may not exist on legacy DBs; treat as fully missing
-    // so the backfill writes rows on the upsert path below.
-    debug(
-      `detectDroppedLanguageGap: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`,
-    );
-  }
-
-  const installedExts = getInstalledWasmExtensions();
-  const missingRel: string[] = [];
-  const missingAbs: string[] = [];
-  for (const rel of expected) {
-    if (existingNodes.has(rel) && existingHashes.has(rel)) continue;
-    const ext = path.extname(rel).toLowerCase();
-    if (!installedExts.has(ext)) continue;
-    missingRel.push(rel);
-    missingAbs.push(path.join(ctx.rootDir, rel));
-  }
-
-  const staleRel = computeWasmOnlyStaleFiles({
-    existingNodes,
-    existingHashes,
-    expected,
-    installedExts,
-    nativeSupported: NATIVE_SUPPORTED_EXTENSIONS,
-  });
-
-  return { missingRel, missingAbs, staleRel };
-}
-
-/**
- * Backfill files that the native orchestrator silently dropped during parse.
- * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
- *
- * Also purges stale rows for WASM-only files deleted from disk (#1073), which
- * Rust's `detect_removed_files` filter (#1070) skips.
- *
- * Accepts a pre-computed `gap` from `detectDroppedLanguageGap` so the caller
- * can use the same scan for both gating and the actual backfill — avoiding
- * a redundant fs walk when the orchestrator's signals already triggered.
- */
-async function backfillNativeDroppedFiles(
-  ctx: PipelineContext,
-  gap: DroppedLanguageGap,
-): Promise<void> {
-  const { missingRel, missingAbs, staleRel } = gap;
-  if (missingAbs.length === 0 && staleRel.length === 0) return;
-
-  // Now that we know there's work to do, hand off to better-sqlite3 (needed
-  // for the INSERT path below).
-  if (ctx.nativeFirstProxy) {
-    closeNativeDb(ctx, 'pre-parity-backfill');
-    ctx.db = openDb(ctx.dbPath);
-    ctx.nativeFirstProxy = false;
-  }
-
-  const dbConn = ctx.db as unknown as BetterSqlite3Database;
-
-  // Purge WASM-only files that were deleted from disk (#1073). Rust's
-  // detect_removed_files skips them and the insert path below never visits
-  // them, so without this their rows would persist across rebuilds until the
-  // next full rebuild reset the DB.
-  if (staleRel.length > 0) {
-    // `computeWasmOnlyStaleFiles` guarantees every path here has an extension
-    // outside NATIVE_SUPPORTED_EXTENSIONS, so `classifyNativeDrops` would
-    // always bucket 100% into `unsupported-by-native`. Build the extension
-    // summary directly to avoid a redundant classification pass.
-    const staleByExt = groupByExtension(staleRel);
-    info(
-      `Detected ${staleRel.length} deleted WASM-only file(s) across ${staleByExt.size} extension(s) the native orchestrator skipped; purging stale rows:${formatDropExtensionSummary(staleByExt)}`,
-    );
-    purgeFilesData(dbConn, staleRel);
-  }
-
-  if (missingAbs.length === 0) return;
-
-  // Classify drops so users see per-extension reasons instead of just a count
-  // (#1011). `unsupported-by-native` is a legitimate parser limit (no Rust
-  // extractor); `native-extractor-failure` indicates a real native bug since
-  // the language IS supported by the addon yet the file was dropped anyway.
-  const { byReason, totals } = classifyNativeDrops(missingRel);
-  if (totals['unsupported-by-native'] > 0) {
-    const buckets = byReason['unsupported-by-native'];
-    info(
-      `Native orchestrator skipped ${totals['unsupported-by-native']} file(s) across ${buckets.size} extension(s) in languages without a Rust extractor; backfilling via WASM:${formatDropExtensionSummary(buckets)}`,
-    );
-  }
-  if (totals['native-extractor-failure'] > 0) {
-    const buckets = byReason['native-extractor-failure'];
-    warn(
-      `Native orchestrator dropped ${totals['native-extractor-failure']} file(s) across ${buckets.size} extension(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM:${formatDropExtensionSummary(buckets)}`,
-    );
-  }
-  const wasmResults = await parseFilesWasmForBackfill(missingAbs, ctx.rootDir);
-
-  const rows: unknown[][] = [];
-  const exportKeys: unknown[][] = [];
-  for (const [relPath, symbols] of wasmResults) {
-    // File row — mirrors insertDefinitionsAndExports: qualified_name is null.
-    rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]);
-    for (const def of symbols.definitions ?? []) {
-      // Populate qualified_name/scope the same way the JS fallback does so
-      // downstream queries (cross-file references, "go to definition") find
-      // these symbols.
-      const dotIdx = def.name.lastIndexOf('.');
-      const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null;
-      rows.push([
-        def.name,
-        def.kind,
-        relPath,
-        def.line,
-        def.endLine ?? null,
-        null,
-        def.name,
-        scope,
-        def.visibility ?? null,
-      ]);
-    }
-    // Exports: insert the row (INSERT OR IGNORE — a matching definition row
-    // is a no-op) and queue a key for the second-pass exported=1 update, so
-    // queries filtering on exported=1 find backfilled symbols (#970).
-    for (const exp of symbols.exports ?? []) {
-      rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]);
-      exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
-    }
-  }
-  const db = dbConn;
-  batchInsertNodes(db, rows);
-
-  // Mark exported symbols in batches — mirrors insertDefinitionsAndExports.
-  if (exportKeys.length > 0) {
-    const EXPORT_CHUNK = 500;
-    const exportStmtCache = new Map<number, SqliteStatement>();
-    for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) {
-      const end = Math.min(i + EXPORT_CHUNK, exportKeys.length);
-      const chunkSize = end - i;
-      let updateStmt = exportStmtCache.get(chunkSize);
-      if (!updateStmt) {
-        const conditions = Array.from(
-          { length: chunkSize },
-          () => '(name = ? AND kind = ? AND file = ? AND line = ?)',
-        ).join(' OR ');
-        updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`);
-        exportStmtCache.set(chunkSize, updateStmt);
-      }
-      const vals: unknown[] = [];
-      for (let j = i; j < end; j++) {
-        const k = exportKeys[j] as unknown[];
-        vals.push(k[0], k[1], k[2], k[3]);
-      }
-      updateStmt.run(...vals);
-    }
-  }
-
-  // Persist file_hashes rows for every backfilled file. The Rust orchestrator
-  // only hashes files it parsed itself, so without this step files in
-  // optional-language extensions (e.g. .clj when no Rust extractor exists)
-  // would be missing from `file_hashes` — permanently breaking the JS-side
-  // fast-skip pre-flight (#1054), which rejects on `collected file missing
-  // from file_hashes` and forces every no-op rebuild back through the full
-  // ~2s native pipeline (#1068).
-  //
-  // Iterates `missingRel` (every collected file the Rust orchestrator
-  // dropped), not `wasmResults`, so files that produced zero symbols still
-  // get a row.
-  try {
-    const upsertHash = db.prepare(
-      'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)',
-    );
-    const writeHashes = db.transaction(() => {
-      for (let i = 0; i < missingRel.length; i++) {
-        const relPath = missingRel[i];
-        const absPath = missingAbs[i];
-        if (!relPath || !absPath) continue;
-        let code: string | null;
-        try {
-          code = readFileSafe(absPath);
-        } catch (e) {
-          debug(`backfillNativeDroppedFiles: read failed for ${relPath}: ${toErrorMessage(e)}`);
-          continue;
-        }
-        if (code === null) continue;
-        const stat = fileStat(absPath);
-        const mtime = stat ? stat.mtime : 0;
-        const size = stat ? stat.size : 0;
-        upsertHash.run(relPath, fileHash(code), mtime, size);
-      }
-    });
-    writeHashes();
-  } catch (e) {
-    debug(
-      `backfillNativeDroppedFiles: file_hashes write failed (table may not exist): ${toErrorMessage(e)}`,
-    );
-  }
-
-  // Free WASM parse trees from the inline backfill path (#1058).
-  // `parseFilesWasmInline` sets `symbols._tree` (a live web-tree-sitter Tree
-  // backed by WASM linear memory) on every result, but these symbols are
-  // consumed locally for DB row construction and never added to
-  // `ctx.allSymbols`, so the finalize-stage `releaseWasmTrees` sweep never
-  // sees them. Without this, trees leak WASM memory until process exit —
-  // bounded per run but cumulative across in-process integration tests.
-  // Mirrors the cleanup discipline established for #931.
-  for (const [, symbols] of wasmResults) {
-    const tree = (symbols as { _tree?: { delete?: () => void } })._tree;
-    if (tree && typeof tree.delete === 'function') {
-      try {
-        tree.delete();
-      } catch {
-        /* ignore cleanup errors */
-      }
-    }
-    (symbols as { _tree?: unknown; _langId?: unknown })._tree = undefined;
-    (symbols as { _tree?: unknown; _langId?: unknown })._langId = undefined;
-  }
-}
+// Native db lifecycle and orchestrator helpers live in dedicated stage
+// modules — see `./stages/native-db-lifecycle.ts` and `./stages/native-orchestrator.ts`.
 
 // ── Pipeline stages execution ───────────────────────────────────────────
 
diff --git a/src/domain/graph/builder/stages/native-db-lifecycle.ts b/src/domain/graph/builder/stages/native-db-lifecycle.ts
new file mode 100644
index 000000000..ac9e2568f
--- /dev/null
+++ b/src/domain/graph/builder/stages/native-db-lifecycle.ts
@@ -0,0 +1,74 @@
+/**
+ * NativeDatabase connection lifecycle helpers.
+ *
+ * The Rust orchestrator and the JS pipeline stages both juggle the same
+ * `nativeDb` handle (rusqlite) alongside `ctx.db` (better-sqlite3). These
+ * helpers centralise the open/close/reopen sequence so both call sites
+ * preserve the same WAL-safety invariants:
+ *
+ *   - Always checkpoint WAL before closing rusqlite — otherwise better-sqlite3's
+ *     internal WAL index can drift and surface as SQLITE_CORRUPT on the next
+ *     read (#715, #736).
+ *   - Always reopen better-sqlite3 after rusqlite writes to drop the stale
+ *     page cache.
+ *
+ * Lives in its own module so `tryNativeOrchestrator` (in `native-orchestrator.ts`)
+ * and the JS pipeline stages driver (in `pipeline.ts`) can share the helpers
+ * without either file importing the other.
+ */
+import { openDb } from '../../../../db/index.js';
+import { debug } from '../../../../infrastructure/logger.js';
+import { loadNative } from '../../../../infrastructure/native.js';
+import { toErrorMessage } from '../../../../shared/errors.js';
+import type { PipelineContext } from '../context.js';
+
+/** Checkpoint WAL through rusqlite and close the native connection. */
+export function closeNativeDb(ctx: PipelineContext, label: string): void {
+  if (!ctx.nativeDb) return;
+  try {
+    ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
+  } catch (e) {
+    debug(`${label} WAL checkpoint failed: ${toErrorMessage(e)}`);
+  }
+  try {
+    ctx.nativeDb.close();
+  } catch (e) {
+    debug(`${label} nativeDb close failed: ${toErrorMessage(e)}`);
+  }
+  ctx.nativeDb = undefined;
+}
+
+/** Try to reopen the native connection for a given pipeline phase. */
+export function reopenNativeDb(ctx: PipelineContext, label: string): void {
+  if ((ctx.opts.engine ?? 'auto') === 'wasm') return;
+  const native = loadNative();
+  if (!native?.NativeDatabase) return;
+  try {
+    ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
+  } catch (e) {
+    debug(`reopen nativeDb for ${label} failed: ${toErrorMessage(e)}`);
+    ctx.nativeDb = undefined;
+  }
+}
+
+/** Close nativeDb and clear stale references in engineOpts. */
+export function suspendNativeDb(ctx: PipelineContext, label: string): void {
+  closeNativeDb(ctx, label);
+  if (ctx.engineOpts?.nativeDb) {
+    ctx.engineOpts.nativeDb = undefined;
+  }
+}
+
+/**
+ * After native writes, reopen the JS db connection to get a fresh page cache.
+ * Rusqlite WAL truncation invalidates better-sqlite3's internal WAL index,
+ * causing SQLITE_CORRUPT on the next read (#715, #736).
+ */
+export function refreshJsDb(ctx: PipelineContext): void {
+  try {
+    ctx.db.close();
+  } catch (e) {
+    debug(`refreshJsDb close failed: ${toErrorMessage(e)}`);
+  }
+  ctx.db = openDb(ctx.dbPath);
+}
diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts
new file mode 100644
index 000000000..934dd8d05
--- /dev/null
+++ b/src/domain/graph/builder/stages/native-orchestrator.ts
@@ -0,0 +1,942 @@
+/**
+ * Native build orchestrator stage — runs the full Rust pipeline when available,
+ * with WASM fallback for files the native engine drops.
+ *
+ * Extracted from `pipeline.ts` to break the name-collision cycle between
+ * `buildGraph()` (this module's caller) and `ctx.nativeDb.buildGraph()` (the
+ * Rust orchestrator entry point invoked here). Codegraph's name-based call
+ * resolver previously conflated the two and reported a false-positive
+ * function-level cycle (`buildGraph ↔ tryNativeOrchestrator`).
+ *
+ * The orchestrator-selection strategy lives here so `pipeline.ts` stays a thin
+ * top-level controller: detect changes, try native, fall back to JS stages.
+ */
+import path from 'node:path';
+import { performance } from 'node:perf_hooks';
+import {
+  acquireAdvisoryLock,
+  closeDbPair,
+  openDb,
+  purgeFilesData,
+  releaseAdvisoryLock,
+  setBuildMeta,
+} from '../../../../db/index.js';
+import { debug, info, warn } from '../../../../infrastructure/logger.js';
+import { loadNative } from '../../../../infrastructure/native.js';
+import { semverCompare } from '../../../../infrastructure/update-check.js';
+import { normalizePath } from '../../../../shared/constants.js';
+import { toErrorMessage } from '../../../../shared/errors.js';
+import { CODEGRAPH_VERSION } from '../../../../shared/version.js';
+import type {
+  BetterSqlite3Database,
+  BuildResult,
+  Definition,
+  ExtractorOutput,
+  SqliteStatement,
+} from '../../../../types.js';
+import {
+  classifyNativeDrops,
+  formatDropExtensionSummary,
+  getInstalledWasmExtensions,
+  NATIVE_SUPPORTED_EXTENSIONS,
+  parseFilesWasmForBackfill,
+} from '../../../parser.js';
+import type { PipelineContext } from '../context.js';
+import {
+  batchInsertNodes,
+  collectFiles as collectFilesUtil,
+  fileHash,
+  fileStat,
+  readFileSafe,
+} from '../helpers.js';
+import { NativeDbProxy } from '../native-db-proxy.js';
+import { closeNativeDb } from './native-db-lifecycle.js';
+
+// ── Native orchestrator types ──────────────────────────────────────────
+
+interface NativeOrchestratorResult {
+  phases: Record<string, number>;
+  earlyExit?: boolean;
+  nodeCount?: number;
+  edgeCount?: number;
+  fileCount?: number;
+  changedFiles?: string[];
+  changedCount?: number;
+  removedCount?: number;
+  isFullBuild?: boolean;
+  /** Whether the Rust pipeline handled the structure phase (small-incremental fast path). */
+  structureHandled?: boolean;
+  /** Whether the Rust pipeline wrote AST/complexity/CFG/dataflow to DB. */
+  analysisComplete?: boolean;
+}
+
+/** Files the native orchestrator silently dropped — the working set for backfill. */
+interface DroppedLanguageGap {
+  /** Relative paths (normalized) of files missing from `nodes` or `file_hashes`. */
+  missingRel: string[];
+  /** Absolute paths, aligned by index with `missingRel`. */
+  missingAbs: string[];
+  /**
+   * Relative paths of WASM-only files present in DB but absent from disk (#1073).
+   * Rust's `detect_removed_files` filter (#1070) skips these, so the JS-side
+   * backfill must purge them. Always disjoint from `missingRel`.
+   */
+  staleRel: string[];
+}
+
+/**
+ * Inputs to {@link computeWasmOnlyStaleFiles}. Sets are passed in so the helper
+ * is pure and unit-testable independently of `getInstalledWasmExtensions` and
+ * the `NATIVE_SUPPORTED_EXTENSIONS` global state.
+ */
+export interface WasmOnlyStaleFilesInput {
+  /** Distinct `file` values from the `nodes` table. */
+  existingNodes: ReadonlySet<string>;
+  /** Distinct `file` values from the `file_hashes` table. */
+  existingHashes: ReadonlySet<string>;
+  /** Relative paths currently on disk (from `collectFilesUtil`). */
+  expected: ReadonlySet<string>;
+  /** Lowercased extensions whose WASM grammar is installed. */
+  installedExts: ReadonlySet<string>;
+  /** Extensions covered by the Rust addon — Rust owns deletion for these. */
+  nativeSupported: ReadonlySet<string>;
+}
+
+// ── Native orchestrator helpers ───────────────────────────────────────
+
+/** Determine whether the native orchestrator should be skipped. Returns a reason string, or null if it should run. */
+function shouldSkipNativeOrchestrator(ctx: PipelineContext): string | null {
+  if (ctx.forceFullRebuild) return 'forceFullRebuild';
+  // v3.9.0 addon had buggy incremental purge (wrong SQL on analysis tables,
+  // scoped removal over-detection). Fixed in v3.9.1 by PR #865. Gate on
+  // < 3.9.1 so v3.9.1+ uses the fast Rust orchestrator path.
+  const orchestratorBuggy = !!ctx.engineVersion && semverCompare(ctx.engineVersion, '3.9.1') < 0;
+  if (orchestratorBuggy) return `buggy addon ${ctx.engineVersion}`;
+  if (ctx.engineName !== 'native') return `engine=${ctx.engineName}`;
+  return null;
+}
+
+/** Checkpoint WAL through rusqlite, close nativeDb, and reopen better-sqlite3.
+ *  Returns false if the DB reopen fails (caller should return partial result). */
+function handoffWalAfterNativeBuild(ctx: PipelineContext): boolean {
+  closeNativeDb(ctx, 'post-native-build');
+  try {
+    ctx.db.close();
+  } catch (e) {
+    debug(`handoffWal JS db close failed: ${toErrorMessage(e)}`);
+  }
+  try {
+    ctx.db = openDb(ctx.dbPath);
+    return true;
+  } catch (reopenErr) {
+    warn(`Failed to reopen DB after native build: ${(reopenErr as Error).message}`);
+    return false;
+  }
+}
+
+/**
+ * Reconstruct fileSymbols from the DB after a native orchestrator build.
+ * When `scopeFiles` is provided, only loads those files (for analysis-only).
+ * When omitted, loads all files (needed for structure rebuilds).
+ */
+function reconstructFileSymbolsFromDb(
+  ctx: PipelineContext,
+  scopeFiles?: string[],
+): Map<string, ExtractorOutput> {
+  let query =
+    'SELECT file, name, kind, line, end_line as endLine FROM nodes WHERE file IS NOT NULL';
+  const params: string[] = [];
+  if (scopeFiles && scopeFiles.length > 0) {
+    const placeholders = scopeFiles.map(() => '?').join(',');
+    query += ` AND file IN (${placeholders})`;
+    params.push(...scopeFiles);
+  }
+  query += ' ORDER BY file, line';
+
+  const rows = ctx.db.prepare(query).all(...params) as {
+    file: string;
+    name: string;
+    kind: string;
+    line: number;
+    endLine: number | null;
+  }[];
+
+  const fileSymbols = new Map<string, ExtractorOutput>();
+  for (const row of rows) {
+    let entry = fileSymbols.get(row.file);
+    if (!entry) {
+      entry = {
+        definitions: [],
+        calls: [],
+        imports: [],
+        classes: [],
+        exports: [],
+        typeMap: new Map(),
+      };
+      fileSymbols.set(row.file, entry);
+    }
+    entry.definitions.push({
+      name: row.name,
+      kind: row.kind as Definition['kind'],
+      line: row.line,
+      endLine: row.endLine ?? undefined,
+    });
+  }
+
+  // Populate import/export counts from DB edges so buildStructure
+  // computes correct import_count/export_count in node_metrics.
+  // The extractor arrays aren't persisted to the DB, so we derive
+  // counts from edge data instead (#804).
+  const importCountRows = ctx.db
+    .prepare(
+      `SELECT n.file, COUNT(*) AS cnt
+       FROM edges e JOIN nodes n ON e.source_id = n.id
+       WHERE e.kind IN ('imports', 'imports-type', 'dynamic-imports')
+         AND n.file IS NOT NULL
+       GROUP BY n.file`,
+    )
+    .all() as { file: string; cnt: number }[];
+  for (const row of importCountRows) {
+    const entry = fileSymbols.get(row.file);
+    if (entry) entry.imports = new Array(row.cnt) as ExtractorOutput['imports'];
+  }
+
+  const exportCountRows = ctx.db
+    .prepare(
+      `SELECT n_tgt.file, COUNT(DISTINCT n_tgt.id) AS cnt
+       FROM edges e
+       JOIN nodes n_tgt ON e.target_id = n_tgt.id
+       JOIN nodes n_src ON e.source_id = n_src.id
+       WHERE e.kind IN ('imports', 'imports-type', 'reexports')
+         AND n_tgt.file IS NOT NULL
+         AND n_src.file != n_tgt.file
+       GROUP BY n_tgt.file`,
+    )
+    .all() as { file: string; cnt: number }[];
+  for (const row of exportCountRows) {
+    const entry = fileSymbols.get(row.file);
+    if (entry) entry.exports = new Array(row.cnt) as ExtractorOutput['exports'];
+  }
+
+  return fileSymbols;
+}
+
+/**
+ * Run JS buildStructure() after native orchestrator to fill directory nodes + contains edges.
+ * For full builds, passes changedFiles=null (full rebuild).
+ * For incremental builds, passes the changed file list to scope the update.
+ */
+async function runPostNativeStructure(
+  ctx: PipelineContext,
+  allFileSymbols: Map<string, ExtractorOutput>,
+  isFullBuild: boolean,
+  changedFiles: string[] | undefined,
+): Promise<number> {
+  const structureStart = performance.now();
+  try {
+    const directories = new Set<string>();
+    for (const relPath of allFileSymbols.keys()) {
+      const parts = relPath.split('/');
+      for (let i = 1; i < parts.length; i++) {
+        directories.add(parts.slice(0, i).join('/'));
+      }
+    }
+
+    const lineCountMap = new Map<string, number>();
+    const cachedLineCounts = ctx.db
+      .prepare(
+        `SELECT n.name AS file, m.line_count
+         FROM node_metrics m JOIN nodes n ON m.node_id = n.id
+         WHERE n.kind = 'file'`,
+      )
+      .all() as Array<{ file: string; line_count: number }>;
+    for (const row of cachedLineCounts) {
+      lineCountMap.set(row.file, row.line_count);
+    }
+
+    // Full builds need null (rebuild everything). Incremental builds pass the
+    // changed file list so buildStructure only updates those files' metrics
+    // and contains edges — matching the JS pipeline's medium-incremental path.
+    const changedFilePaths = isFullBuild || !changedFiles?.length ? null : changedFiles;
+    const { buildStructure: buildStructureFn } = (await import(
+      '../../../../features/structure.js'
+    )) as {
+      buildStructure: (
+        db: typeof ctx.db,
+        fileSymbols: Map<string, ExtractorOutput>,
+        rootDir: string,
+        lineCountMap: Map<string, number>,
+        directories: Set<string>,
+        changedFiles: string[] | null,
+      ) => void;
+    };
+    buildStructureFn(
+      ctx.db,
+      allFileSymbols,
+      ctx.rootDir,
+      lineCountMap,
+      directories,
+      changedFilePaths,
+    );
+    debug(
+      `Structure phase completed after native orchestrator${changedFilePaths ? ` (${changedFilePaths.length} files)` : ' (full)'}`,
+    );
+  } catch (err) {
+    warn(`Structure phase failed after native build: ${toErrorMessage(err)}`);
+  }
+  return performance.now() - structureStart;
+}
+
+/**
+ * JS fallback for AST/complexity/CFG/dataflow analysis after native orchestrator.
+ * Used when the Rust addon doesn't include analysis persistence (older addon
+ * version) or when analysis failed on the Rust side.
+ */
+async function runPostNativeAnalysis(
+  ctx: PipelineContext,
+  allFileSymbols: Map<string, ExtractorOutput>,
+  changedFiles: string[] | undefined,
+): Promise<{ astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number }> {
+  const timing = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 };
+
+  // Scope analysis fileSymbols to changed files only
+  let analysisFileSymbols: Map<string, ExtractorOutput>;
+  if (changedFiles && changedFiles.length > 0) {
+    analysisFileSymbols = new Map();
+    for (const f of changedFiles) {
+      const entry = allFileSymbols.get(f);
+      if (entry) analysisFileSymbols.set(f, entry);
+    }
+  } else {
+    analysisFileSymbols = allFileSymbols;
+  }
+
+  // Reopen nativeDb for analysis features (suspend/resume WAL pattern).
+  const native = loadNative();
+  if (native?.NativeDatabase) {
+    try {
+      ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
+      if (ctx.engineOpts) ctx.engineOpts.nativeDb = ctx.nativeDb;
+    } catch {
+      ctx.nativeDb = undefined;
+      if (ctx.engineOpts) ctx.engineOpts.nativeDb = undefined;
+    }
+  }
+
+  // Flush JS WAL pages once so Rust can see them, then no-op callbacks.
+  // Previously each feature called wal_checkpoint(TRUNCATE) individually
+  // (~68ms each × 3-4 features). One FULL checkpoint suffices.
+  if (ctx.nativeDb && ctx.engineOpts) {
+    ctx.db.pragma('wal_checkpoint(FULL)');
+    ctx.engineOpts.suspendJsDb = () => {};
+    ctx.engineOpts.resumeJsDb = () => {};
+  }
+
+  try {
+    const { runAnalyses: runAnalysesFn } = (await import('../../../../ast-analysis/engine.js')) as {
+      runAnalyses: (
+        db: BetterSqlite3Database,
+        fileSymbols: Map<string, ExtractorOutput>,
+        rootDir: string,
+        opts: Record<string, unknown>,
+        engineOpts?: Record<string, unknown>,
+      ) => Promise<{ astMs?: number; complexityMs?: number; cfgMs?: number; dataflowMs?: number }>;
+    };
+    const result = await runAnalysesFn(
+      ctx.db,
+      analysisFileSymbols,
+      ctx.rootDir,
+      ctx.opts as Record<string, unknown>,
+      ctx.engineOpts as unknown as Record<string, unknown> | undefined,
+    );
+    timing.astMs = result.astMs ?? 0;
+    timing.complexityMs = result.complexityMs ?? 0;
+    timing.cfgMs = result.cfgMs ?? 0;
+    timing.dataflowMs = result.dataflowMs ?? 0;
+  } catch (err) {
+    warn(`Analysis phases failed after native build: ${toErrorMessage(err)}`);
+  }
+
+  // Close nativeDb after analyses — TRUNCATE checkpoint flushes all Rust
+  // WAL writes so JS and external readers can see them. Runs once after
+  // all analysis features complete (not per-feature).
+  if (ctx.nativeDb) {
+    try {
+      ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)');
+    } catch {
+      /* ignore checkpoint errors */
+    }
+    try {
+      ctx.nativeDb.close();
+    } catch {
+      /* ignore close errors */
+    }
+    ctx.nativeDb = undefined;
+    if (ctx.engineOpts) {
+      ctx.engineOpts.nativeDb = undefined;
+      ctx.engineOpts.suspendJsDb = undefined;
+      ctx.engineOpts.resumeJsDb = undefined;
+    }
+  }
+
+  return timing;
+}
+
+/** Format timing result from native orchestrator phases + JS post-processing. */
+function formatNativeTimingResult(
+  p: Record<string, number>,
+  structurePatchMs: number,
+  analysisTiming: { astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number },
+): BuildResult {
+  return {
+    phases: {
+      setupMs: +(p.setupMs ?? 0).toFixed(1),
+      collectMs: +(p.collectMs ?? 0).toFixed(1),
+      detectMs: +(p.detectMs ?? 0).toFixed(1),
+      parseMs: +(p.parseMs ?? 0).toFixed(1),
+      insertMs: +(p.insertMs ?? 0).toFixed(1),
+      resolveMs: +(p.resolveMs ?? 0).toFixed(1),
+      edgesMs: +(p.edgesMs ?? 0).toFixed(1),
+      structureMs: +((p.structureMs ?? 0) + structurePatchMs).toFixed(1),
+      rolesMs: +(p.rolesMs ?? 0).toFixed(1),
+      astMs: +(analysisTiming.astMs ?? 0).toFixed(1),
+      complexityMs: +(analysisTiming.complexityMs ?? 0).toFixed(1),
+      cfgMs: +(analysisTiming.cfgMs ?? 0).toFixed(1),
+      dataflowMs: +(analysisTiming.dataflowMs ?? 0).toFixed(1),
+      finalizeMs: +(p.finalizeMs ?? 0).toFixed(1),
+    },
+  };
+}
+
+/**
+ * Compute the WASM-only files present in the DB but missing from disk (#1073).
+ *
+ * Returns relative paths that:
+ *   - appear in `existingNodes` or `existingHashes` (in DB),
+ *   - are absent from `expected` (not on disk),
+ *   - have an extension installed for WASM, AND
+ *   - have an extension NOT covered by `nativeSupported` — Rust's
+ *     `purge_changed_files` handles deletion for natively-supported extensions
+ *     via its own `detect_removed_files`, so the caller must not double-purge.
+ *
+ * Extensions are lowercased before lookup to match the registry and Rust's
+ * `LanguageKind::from_extension` (which normalises case for the languages
+ * where both cases are conventional, e.g. R's `.r` / `.R`).
+ *
+ * DB paths are forced to forward slashes before comparison with `expected`
+ * (which is always normalised). The on-disk invariant is that DB rows are
+ * written with forward slashes, but a stale row written by older code on
+ * Windows could carry back-slashes — normalising here makes the comparison
+ * platform-safe and prevents false-positive purges of live rows. We replace
+ * `\\` explicitly (rather than calling `normalizePath`, which only touches
+ * `path.sep`) so the defence works when running on POSIX against a DB that
+ * was migrated from Windows.
+ *
+ * Exported for unit testing.
+ */
+export function computeWasmOnlyStaleFiles(input: WasmOnlyStaleFilesInput): string[] {
+  const { existingNodes, existingHashes, expected, installedExts, nativeSupported } = input;
+  const stale: string[] = [];
+  const seen = new Set<string>();
+  const consider = (rawRel: string): void => {
+    const rel = rawRel.replace(/\\/g, '/');
+    if (expected.has(rel) || seen.has(rel)) return;
+    const ext = path.extname(rel).toLowerCase();
+    if (nativeSupported.has(ext)) return;
+    if (!installedExts.has(ext)) return;
+    seen.add(rel);
+    // Push the ORIGINAL raw path (not the normalised form) so the eventual
+    // `DELETE FROM nodes WHERE file = ?` predicate in `purgeFilesData`
+    // matches the actual stored row. The dedup `seen` set keeps the
+    // normalised form so a file written once with `\` and once with `/`
+    // is still treated as one entry — but the value the SQL sees has to
+    // be byte-identical to what's on disk in the DB.
+    stale.push(rawRel);
+  };
+  for (const rel of existingNodes) consider(rel);
+  for (const rel of existingHashes) consider(rel);
+  return stale;
+}
+
+/**
+ * Group relative paths by their lowercased extension. Shape matches the bucket
+ * type that `formatDropExtensionSummary` consumes, so callers can render a
+ * log-friendly per-extension summary without going through `classifyNativeDrops`
+ * when the reason is already known (e.g. the stale-purge path where every path
+ * is guaranteed `unsupported-by-native`).
+ */
+function groupByExtension(relPaths: Iterable<string>): Map<string, string[]> {
+  const buckets = new Map<string, string[]>();
+  for (const rel of relPaths) {
+    const ext = path.extname(rel).toLowerCase();
+    let list = buckets.get(ext);
+    if (!list) {
+      list = [];
+      buckets.set(ext, list);
+    }
+    list.push(rel);
+  }
+  return buckets;
+}
+
+/**
+ * Detect files the native orchestrator silently dropped.
+ *
+ * Walks the filesystem and compares against `nodes` + `file_hashes`. A file
+ * is "missing" if it's absent from EITHER table — both must be present for
+ * the fast-skip pre-flight (#1054) to work, and the two can diverge (e.g.
+ * legacy DBs where `nodes` was populated but `file_hashes` was not).
+ *
+ * Restricted to files with an installed WASM grammar; extensions in
+ * `LANGUAGE_REGISTRY` without a shipped grammar (e.g. groovy on minimal
+ * installs) can't be parsed by either engine, so they're not a native
+ * regression — excluding them keeps the warn count in
+ * `backfillNativeDroppedFiles` meaningful.
+ *
+ * Also detects WASM-only files deleted from disk (#1073). Rust's
+ * `detect_removed_files` filter (#1070) skips files outside its supported
+ * extensions, so deletions of WASM-only languages don't reach the native
+ * purge path; the rest of the backfill only inserts rows, so without this
+ * step stale `nodes`/`file_hashes` rows would linger across incremental
+ * rebuilds until the next full rebuild.
+ *
+ * Cheap (no DB handoff, no parsing): used both to gate the backfill call
+ * and as its working set. NativeDbProxy supports `.prepare().all()`, so
+ * this works whether `ctx.db` is a proxy or a real better-sqlite3
+ * connection — letting us skip the close-native / reopen-better-sqlite3
+ * cost when there's nothing to backfill.
+ */
+function detectDroppedLanguageGap(ctx: PipelineContext): DroppedLanguageGap {
+  const collected = collectFilesUtil(ctx.rootDir, [], ctx.config, new Set<string>());
+  const expected = new Set(
+    collected.files.map((f) => normalizePath(path.relative(ctx.rootDir, f))),
+  );
+
+  const existingNodeRows = ctx.db
+    .prepare("SELECT DISTINCT file FROM nodes WHERE kind = 'file'")
+    .all() as Array<{ file: string }>;
+  const existingNodes = new Set(existingNodeRows.map((r) => r.file));
+
+  let existingHashes = new Set<string>();
+  try {
+    const existingHashRows = ctx.db
+      .prepare('SELECT DISTINCT file FROM file_hashes')
+      .all() as Array<{ file: string }>;
+    existingHashes = new Set(existingHashRows.map((r) => r.file));
+  } catch (e) {
+    // file_hashes table may not exist on legacy DBs; treat as fully missing
+    // so the backfill writes rows on the upsert path below.
+    debug(
+      `detectDroppedLanguageGap: file_hashes read failed (table may not exist): ${toErrorMessage(e)}`,
+    );
+  }
+
+  const installedExts = getInstalledWasmExtensions();
+  const missingRel: string[] = [];
+  const missingAbs: string[] = [];
+  for (const rel of expected) {
+    if (existingNodes.has(rel) && existingHashes.has(rel)) continue;
+    const ext = path.extname(rel).toLowerCase();
+    if (!installedExts.has(ext)) continue;
+    missingRel.push(rel);
+    missingAbs.push(path.join(ctx.rootDir, rel));
+  }
+
+  const staleRel = computeWasmOnlyStaleFiles({
+    existingNodes,
+    existingHashes,
+    expected,
+    installedExts,
+    nativeSupported: NATIVE_SUPPORTED_EXTENSIONS,
+  });
+
+  return { missingRel, missingAbs, staleRel };
+}
+
+/**
+ * Backfill files that the native orchestrator silently dropped during parse.
+ * Falls back to WASM + inserts file/symbol nodes so engine counts match (#967).
+ *
+ * Also purges stale rows for WASM-only files deleted from disk (#1073), which
+ * Rust's `detect_removed_files` filter (#1070) skips.
+ *
+ * Accepts a pre-computed `gap` from `detectDroppedLanguageGap` so the caller
+ * can use the same scan for both gating and the actual backfill — avoiding
+ * a redundant fs walk when the orchestrator's signals already triggered.
+ */
+async function backfillNativeDroppedFiles(
+  ctx: PipelineContext,
+  gap: DroppedLanguageGap,
+): Promise<void> {
+  const { missingRel, missingAbs, staleRel } = gap;
+  if (missingAbs.length === 0 && staleRel.length === 0) return;
+
+  // Now that we know there's work to do, hand off to better-sqlite3 (needed
+  // for the INSERT path below).
+  if (ctx.nativeFirstProxy) {
+    closeNativeDb(ctx, 'pre-parity-backfill');
+    ctx.db = openDb(ctx.dbPath);
+    ctx.nativeFirstProxy = false;
+  }
+
+  const dbConn = ctx.db as unknown as BetterSqlite3Database;
+
+  // Purge WASM-only files that were deleted from disk (#1073). Rust's
+  // detect_removed_files skips them and the insert path below never visits
+  // them, so without this their rows would persist across rebuilds until the
+  // next full rebuild reset the DB.
+  if (staleRel.length > 0) {
+    // `computeWasmOnlyStaleFiles` guarantees every path here has an extension
+    // outside NATIVE_SUPPORTED_EXTENSIONS, so `classifyNativeDrops` would
+    // always bucket 100% into `unsupported-by-native`. Build the extension
+    // summary directly to avoid a redundant classification pass.
+    const staleByExt = groupByExtension(staleRel);
+    info(
+      `Detected ${staleRel.length} deleted WASM-only file(s) across ${staleByExt.size} extension(s) the native orchestrator skipped; purging stale rows:${formatDropExtensionSummary(staleByExt)}`,
+    );
+    purgeFilesData(dbConn, staleRel);
+  }
+
+  if (missingAbs.length === 0) return;
+
+  // Classify drops so users see per-extension reasons instead of just a count
+  // (#1011). `unsupported-by-native` is a legitimate parser limit (no Rust
+  // extractor); `native-extractor-failure` indicates a real native bug since
+  // the language IS supported by the addon yet the file was dropped anyway.
+  const { byReason, totals } = classifyNativeDrops(missingRel);
+  if (totals['unsupported-by-native'] > 0) {
+    const buckets = byReason['unsupported-by-native'];
+    info(
+      `Native orchestrator skipped ${totals['unsupported-by-native']} file(s) across ${buckets.size} extension(s) in languages without a Rust extractor; backfilling via WASM:${formatDropExtensionSummary(buckets)}`,
+    );
+  }
+  if (totals['native-extractor-failure'] > 0) {
+    const buckets = byReason['native-extractor-failure'];
+    warn(
+      `Native orchestrator dropped ${totals['native-extractor-failure']} file(s) across ${buckets.size} extension(s) in natively-supported languages — likely a Rust extractor bug. Backfilling via WASM:${formatDropExtensionSummary(buckets)}`,
+    );
+  }
+  const wasmResults = await parseFilesWasmForBackfill(missingAbs, ctx.rootDir);
+
+  const rows: unknown[][] = [];
+  const exportKeys: unknown[][] = [];
+  for (const [relPath, symbols] of wasmResults) {
+    // File row — mirrors insertDefinitionsAndExports: qualified_name is null.
+    rows.push([relPath, 'file', relPath, 0, null, null, null, null, null]);
+    for (const def of symbols.definitions ?? []) {
+      // Populate qualified_name/scope the same way the JS fallback does so
+      // downstream queries (cross-file references, "go to definition") find
+      // these symbols.
+      const dotIdx = def.name.lastIndexOf('.');
+      const scope = dotIdx !== -1 ? def.name.slice(0, dotIdx) : null;
+      rows.push([
+        def.name,
+        def.kind,
+        relPath,
+        def.line,
+        def.endLine ?? null,
+        null,
+        def.name,
+        scope,
+        def.visibility ?? null,
+      ]);
+    }
+    // Exports: insert the row (INSERT OR IGNORE — a matching definition row
+    // is a no-op) and queue a key for the second-pass exported=1 update, so
+    // queries filtering on exported=1 find backfilled symbols (#970).
+    for (const exp of symbols.exports ?? []) {
+      rows.push([exp.name, exp.kind, relPath, exp.line, null, null, exp.name, null, null]);
+      exportKeys.push([exp.name, exp.kind, relPath, exp.line]);
+    }
+  }
+  const db = dbConn;
+  batchInsertNodes(db, rows);
+
+  // Mark exported symbols in batches — mirrors insertDefinitionsAndExports.
+  if (exportKeys.length > 0) {
+    const EXPORT_CHUNK = 500;
+    const exportStmtCache = new Map<number, SqliteStatement>();
+    for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) {
+      const end = Math.min(i + EXPORT_CHUNK, exportKeys.length);
+      const chunkSize = end - i;
+      let updateStmt = exportStmtCache.get(chunkSize);
+      if (!updateStmt) {
+        const conditions = Array.from(
+          { length: chunkSize },
+          () => '(name = ? AND kind = ? AND file = ? AND line = ?)',
+        ).join(' OR ');
+        updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`);
+        exportStmtCache.set(chunkSize, updateStmt);
+      }
+      const vals: unknown[] = [];
+      for (let j = i; j < end; j++) {
+        const k = exportKeys[j] as unknown[];
+        vals.push(k[0], k[1], k[2], k[3]);
+      }
+      updateStmt.run(...vals);
+    }
+  }
+
+  // Persist file_hashes rows for every backfilled file. The Rust orchestrator
+  // only hashes files it parsed itself, so without this step files in
+  // optional-language extensions (e.g. .clj when no Rust extractor exists)
+  // would be missing from `file_hashes` — permanently breaking the JS-side
+  // fast-skip pre-flight (#1054), which rejects on `collected file missing
+  // from file_hashes` and forces every no-op rebuild back through the full
+  // ~2s native pipeline (#1068).
+  //
+  // Iterates `missingRel` (every collected file the Rust orchestrator
+  // dropped), not `wasmResults`, so files that produced zero symbols still
+  // get a row.
+  try {
+    const upsertHash = db.prepare(
+      'INSERT OR REPLACE INTO file_hashes (file, hash, mtime, size) VALUES (?, ?, ?, ?)',
+    );
+    const writeHashes = db.transaction(() => {
+      for (let i = 0; i < missingRel.length; i++) {
+        const relPath = missingRel[i];
+        const absPath = missingAbs[i];
+        if (!relPath || !absPath) continue;
+        let code: string | null;
+        try {
+          code = readFileSafe(absPath);
+        } catch (e) {
+          debug(`backfillNativeDroppedFiles: read failed for ${relPath}: ${toErrorMessage(e)}`);
+          continue;
+        }
+        if (code === null) continue;
+        const stat = fileStat(absPath);
+        const mtime = stat ? stat.mtime : 0;
+        const size = stat ? stat.size : 0;
+        upsertHash.run(relPath, fileHash(code), mtime, size);
+      }
+    });
+    writeHashes();
+  } catch (e) {
+    debug(
+      `backfillNativeDroppedFiles: file_hashes write failed (table may not exist): ${toErrorMessage(e)}`,
+    );
+  }
+
+  // Free WASM parse trees from the inline backfill path (#1058).
+  // `parseFilesWasmInline` sets `symbols._tree` (a live web-tree-sitter Tree
+  // backed by WASM linear memory) on every result, but these symbols are
+  // consumed locally for DB row construction and never added to
+  // `ctx.allSymbols`, so the finalize-stage `releaseWasmTrees` sweep never
+  // sees them. Without this, trees leak WASM memory until process exit —
+  // bounded per run but cumulative across in-process integration tests.
+  // Mirrors the cleanup discipline established for #931.
+  for (const [, symbols] of wasmResults) {
+    const tree = (symbols as { _tree?: { delete?: () => void } })._tree;
+    if (tree && typeof tree.delete === 'function') {
+      try {
+        tree.delete();
+      } catch {
+        /* ignore cleanup errors */
+      }
+    }
+    (symbols as { _tree?: unknown; _langId?: unknown })._tree = undefined;
+    (symbols as { _tree?: unknown; _langId?: unknown })._langId = undefined;
+  }
+}
+
+/**
+ * Try the native build orchestrator.
+ *
+ * Returns:
+ *   - `BuildResult` on success (caller should return it directly).
+ *   - `'early-exit'` when the orchestrator detected no changes (caller should return undefined).
+ *   - `undefined` when native is unavailable or skipped (caller should fall through to the JS pipeline).
+ *
+ * Encapsulates the orchestrator-selection strategy: open `NativeDatabase`,
+ * invoke `nativeDb.buildGraph()` (the Rust pipeline), and run post-native
+ * structure + analysis fallbacks. Lives in its own file to keep the Rust
+ * orchestrator entry point separated from the JS-side `buildGraph()` driver
+ * in `pipeline.ts`.
+ */
+export async function tryNativeOrchestrator(
+  ctx: PipelineContext,
+): Promise<BuildResult | undefined | 'early-exit'> {
+  const skipReason = shouldSkipNativeOrchestrator(ctx);
+  if (skipReason) {
+    debug(`Skipping native orchestrator: ${skipReason}`);
+    return undefined;
+  }
+
+  // Open NativeDatabase on demand — deferred from setupPipeline to skip the
+  // ~60ms cost on no-op/early-exit builds. Close the better-sqlite3 connection
+  // first to avoid dual-connection WAL corruption.
+  if (!ctx.nativeDb && ctx.nativeAvailable) {
+    const native = loadNative();
+    if (native?.NativeDatabase) {
+      try {
+        // Close better-sqlite3 before opening rusqlite to avoid WAL conflicts.
+        // Uses raw close() instead of closeDb() intentionally — the advisory lock
+        // is kept and transferred to the NativeDbProxy below, not released here.
+        ctx.db.close();
+        acquireAdvisoryLock(ctx.dbPath);
+        ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath);
+        ctx.nativeDb.initSchema();
+        // Replace ctx.db with a NativeDbProxy so post-native JS fallback
+        // (structure, analysis) can use it without reopening better-sqlite3.
+        const proxy = new NativeDbProxy(ctx.nativeDb);
+        proxy.__lockPath = `${ctx.dbPath}.lock`;
+        ctx.db = proxy as unknown as typeof ctx.db;
+        ctx.nativeFirstProxy = true;
+      } catch (err) {
+        warn(`NativeDatabase setup failed, falling back to JS: ${toErrorMessage(err)}`);
+        try {
+          ctx.nativeDb?.close();
+        } catch (e) {
+          debug(`tryNativeOrchestrator: close failed during fallback: ${toErrorMessage(e)}`);
+        }
+        ctx.nativeDb = undefined;
+        ctx.nativeFirstProxy = false; // defensive: reset in case future refactors move the assignment above throwing lines
+        releaseAdvisoryLock(`${ctx.dbPath}.lock`);
+        // Reopen better-sqlite3 for JS pipeline fallback
+        ctx.db = openDb(ctx.dbPath);
+      }
+    }
+  }
+
+  if (!ctx.nativeDb?.buildGraph) return undefined;
+
+  const resultJson = ctx.nativeDb.buildGraph(
+    ctx.rootDir,
+    JSON.stringify(ctx.config),
+    JSON.stringify(ctx.aliases),
+    JSON.stringify(ctx.opts),
+  );
+  const result = JSON.parse(resultJson) as NativeOrchestratorResult;
+
+  if (result.earlyExit) {
+    info('No changes detected');
+    // Even on no-op rebuilds, dropped-language files added since the last
+    // full build are still missing from `nodes`/`file_hashes` (#1083), and
+    // WASM-only files deleted from disk leave stale rows behind (#1073).
+    // The orchestrator's file_collector skipped them, so its earlyExit
+    // doesn't imply DB consistency. Run the gap repair before returning.
+    const gap = detectDroppedLanguageGap(ctx);
+    if (gap.missingAbs.length > 0 || gap.staleRel.length > 0) {
+      await backfillNativeDroppedFiles(ctx, gap);
+    }
+    closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
+    return 'early-exit';
+  }
+
+  // Log incremental status to match JS pipeline output
+  const changed = result.changedCount ?? 0;
+  const removed = result.removedCount ?? 0;
+  if (!result.isFullBuild && (changed > 0 || removed > 0)) {
+    info(`Incremental: ${changed} changed, ${removed} removed`);
+  }
+
+  const p = result.phases;
+
+  // Sync build_meta so JS-side version/engine checks work on next build.
+  // Use the binary's CARGO_PKG_VERSION (ctx.nativeBinaryVersion), not the
+  // platform package.json version (ctx.engineVersion). The Rust side's
+  // check_version_mismatch compares against CARGO_PKG_VERSION; writing
+  // the package.json value would create a permanent mismatch whenever
+  // the binary and platform package.json diverge — e.g., CI hot-swap
+  // via ci-install-native.mjs (#1066) — forcing every subsequent build
+  // to be a full rebuild.
+  //
+  // When the native addon doesn't expose engineVersion() (older addon),
+  // fall back to CODEGRAPH_VERSION — same fallback used by both
+  // checkEngineSchemaMismatch (read path) and persistBuildMetadata
+  // (the JS-pipeline write path in finalize.ts). Using ctx.engineVersion
+  // here would re-introduce the asymmetry this PR fixes for that case.
+  const nativeVersionForMeta = ctx.nativeBinaryVersion || CODEGRAPH_VERSION;
+  setBuildMeta(ctx.db, {
+    engine: ctx.engineName,
+    engine_version: nativeVersionForMeta,
+    codegraph_version: nativeVersionForMeta,
+    schema_version: String(ctx.schemaVersion),
+    built_at: new Date().toISOString(),
+  });
+
+  info(
+    `Native build orchestrator completed: ${result.nodeCount ?? 0} nodes, ${result.edgeCount ?? 0} edges, ${result.fileCount ?? 0} files`,
+  );
+
+  // ── Post-native structure + analysis ──────────────────────────────
+  let analysisTiming = {
+    astMs: +(p.astMs ?? 0),
+    complexityMs: +(p.complexityMs ?? 0),
+    cfgMs: +(p.cfgMs ?? 0),
+    dataflowMs: +(p.dataflowMs ?? 0),
+  };
+  let structurePatchMs = 0;
+  // Skip JS structure when the Rust pipeline's small-incremental fast path
+  // already handled it. For full builds and large incrementals where Rust
+  // skipped structure, we must run the JS fallback.
+  const needsStructure = !result.structureHandled;
+  // When the Rust addon doesn't include analysis persistence (older addon
+  // version or analysis failed), fall back to JS-side analysis.
+  const needsAnalysisFallback =
+    !result.analysisComplete &&
+    (ctx.opts.ast !== false ||
+      ctx.opts.complexity !== false ||
+      ctx.opts.cfg !== false ||
+      ctx.opts.dataflow !== false);
+
+  if (needsStructure || needsAnalysisFallback) {
+    // When analysis fallback is needed, handoff to better-sqlite3 — the
+    // analysis engine uses the suspend/resume WAL pattern that requires a
+    // real better-sqlite3 connection, not the NativeDbProxy.
+    if (needsAnalysisFallback && ctx.nativeFirstProxy) {
+      closeNativeDb(ctx, 'pre-analysis-fallback');
+      ctx.db = openDb(ctx.dbPath);
+      ctx.nativeFirstProxy = false;
+    } else if (!ctx.nativeFirstProxy && !handoffWalAfterNativeBuild(ctx)) {
+      // DB reopen failed — return partial result
+      return formatNativeTimingResult(p, 0, analysisTiming);
+    }
+
+    const fileSymbols = reconstructFileSymbolsFromDb(ctx);
+
+    if (needsStructure) {
+      structurePatchMs = await runPostNativeStructure(
+        ctx,
+        fileSymbols,
+        !!result.isFullBuild,
+        result.changedFiles,
+      );
+    }
+
+    if (needsAnalysisFallback) {
+      analysisTiming = await runPostNativeAnalysis(ctx, fileSymbols, result.changedFiles);
+    }
+  }
+
+  // Engine parity: the native orchestrator silently drops files whose
+  // Rust extractor/grammar is missing or fails (e.g. HCL, Scala, Swift on
+  // stale native binaries). WASM handles those — backfill via WASM so both
+  // engines process the same file set (#967).
+  //
+  // Detect the gap once (fs walk + 2 DB queries, ~20–30ms) and use it for
+  // both gating and the backfill itself. On dirty incrementals/full builds
+  // the orchestrator signals trigger backfill, so the walk happens once
+  // (instead of redundantly inside backfill). On quiet incrementals we
+  // still pay the walk so we can detect brand-new files in dropped-language
+  // extensions — a gap that the orchestrator's `detect_removed_files`
+  // filter (#1070) leaves open (#1083, #1091). The pre-check is cheap
+  // because the expensive part (WASM re-parse of the missing set) is
+  // gated below.
+  const removedCount = result.removedCount ?? 0;
+  const changedCount = result.changedCount ?? 0;
+  const gap = detectDroppedLanguageGap(ctx);
+  if (
+    result.isFullBuild ||
+    removedCount > 0 ||
+    changedCount > 0 ||
+    gap.missingAbs.length > 0 ||
+    gap.staleRel.length > 0
+  ) {
+    await backfillNativeDroppedFiles(ctx, gap);
+  }
+
+  closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb });
+  return formatNativeTimingResult(p, structurePatchMs, analysisTiming);
+}

From 6637066bc6525af444f9e64c75380fab058ea437 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 12:44:34 -0600
Subject: [PATCH 11/27] refactor(builder): decompose builder stages and adopt
 shared helpers

---
 src/domain/graph/builder/helpers.ts           | 161 +++++----
 src/domain/graph/builder/incremental.ts       | 266 +++++++++-----
 .../graph/builder/stages/build-edges.ts       | 338 ++++++++++--------
 .../graph/builder/stages/build-structure.ts   | 197 +++++-----
 .../graph/builder/stages/detect-changes.ts    | 171 +++++----
 src/domain/graph/builder/stages/finalize.ts   | 142 ++++----
 .../graph/builder/stages/insert-nodes.ts      | 274 +++++++-------
 7 files changed, 908 insertions(+), 641 deletions(-)

diff --git a/src/domain/graph/builder/helpers.ts b/src/domain/graph/builder/helpers.ts
index c6cbd4845..4b3665a5d 100644
--- a/src/domain/graph/builder/helpers.ts
+++ b/src/domain/graph/builder/helpers.ts
@@ -76,108 +76,117 @@ export function passesIncludeExclude(
   return true;
 }
 
+/** Per-walk state computed once at the top-level invocation. */
+interface CollectContext {
+  readonly rootDir: string;
+  readonly includeRegexes: readonly RegExp[];
+  readonly excludeRegexes: readonly RegExp[];
+  readonly hasGlobFilters: boolean;
+  readonly extraIgnore: Set<string> | null;
+  readonly visited: Set<string>;
+}
+
+/** Detect a symlink loop for `dir`. Returns true if `dir` was already visited. */
+function isSymlinkLoop(dir: string, visited: Set<string>): boolean {
+  let realDir: string;
+  try {
+    realDir = fs.realpathSync(dir);
+  } catch {
+    return true;
+  }
+  if (visited.has(realDir)) {
+    warn(`Symlink loop detected, skipping: ${dir}`);
+    return true;
+  }
+  visited.add(realDir);
+  return false;
+}
+
+/** Read directory entries, returning null on error (already logged). */
+function readDirSafe(dir: string): fs.Dirent[] | null {
+  try {
+    return fs.readdirSync(dir, { withFileTypes: true });
+  } catch (err: unknown) {
+    warn(`Cannot read directory ${dir}: ${(err as Error).message}`);
+    return null;
+  }
+}
+
+/** True if `entry` is a source file we should collect under `ctx`. */
+function isCollectableSourceFile(full: string, entry: fs.Dirent, ctx: CollectContext): boolean {
+  if (!EXTENSIONS.has(path.extname(entry.name))) return false;
+  if (!ctx.hasGlobFilters) return true;
+  const rel = normalizePath(path.relative(ctx.rootDir, full));
+  return passesIncludeExclude(rel, ctx.includeRegexes, ctx.excludeRegexes);
+}
+
+function walkCollect(
+  dir: string,
+  files: string[],
+  directories: Set<string> | null,
+  ctx: CollectContext,
+): void {
+  if (isSymlinkLoop(dir, ctx.visited)) return;
+
+  const entries = readDirSafe(dir);
+  if (!entries) return;
+
+  let hasFiles = false;
+  for (const entry of entries) {
+    if (shouldSkipEntry(entry, ctx.extraIgnore)) continue;
+
+    const full = path.join(dir, entry.name);
+    if (entry.isDirectory()) {
+      walkCollect(full, files, directories, ctx);
+    } else if (isCollectableSourceFile(full, entry, ctx)) {
+      files.push(full);
+      hasFiles = true;
+    }
+  }
+  if (directories && hasFiles) {
+    directories.add(dir);
+  }
+}
+
 /**
  * Recursively collect all source files under `dir`.
  * When `directories` is a Set, also tracks which directories contain files.
  *
- * The first invocation establishes `dir` as the project root against which
- * `config.include` / `config.exclude` globs are matched.
+ * `dir` establishes the project root against which `config.include` /
+ * `config.exclude` globs are matched.
  */
 export function collectFiles(
   dir: string,
   files: string[],
   config: Partial<CodegraphConfig>,
   directories: Set<string>,
-  _visited?: Set<string>,
-  _rootDir?: string,
-  _includeRegexes?: readonly RegExp[],
-  _excludeRegexes?: readonly RegExp[],
 ): { files: string[]; directories: Set<string> };
 export function collectFiles(
   dir: string,
   files?: string[],
   config?: Partial<CodegraphConfig>,
   directories?: null,
-  _visited?: Set<string>,
-  _rootDir?: string,
-  _includeRegexes?: readonly RegExp[],
-  _excludeRegexes?: readonly RegExp[],
 ): string[];
 export function collectFiles(
   dir: string,
   files: string[] = [],
   config: Partial<CodegraphConfig> = {},
   directories: Set<string> | null = null,
-  _visited: Set<string> = new Set(),
-  _rootDir?: string,
-  _includeRegexes?: readonly RegExp[],
-  _excludeRegexes?: readonly RegExp[],
 ): string[] | { files: string[]; directories: Set<string> } {
   const trackDirs = directories instanceof Set;
-  let hasFiles = false;
-
-  // First call: compute root and compile include/exclude patterns once,
-  // then pass them down recursive calls so we don't recompile per directory.
-  const rootDir = _rootDir ?? dir;
-  const includeRegexes = _includeRegexes ?? compileGlobs(config.include);
-  const excludeRegexes = _excludeRegexes ?? compileGlobs(config.exclude);
-  const hasGlobFilters = includeRegexes.length > 0 || excludeRegexes.length > 0;
-
-  // Merge config ignoreDirs with defaults
-  const extraIgnore = config.ignoreDirs ? new Set(config.ignoreDirs) : null;
-
-  // Detect symlink loops (before I/O to avoid wasted readdirSync)
-  let realDir: string;
-  try {
-    realDir = fs.realpathSync(dir);
-  } catch {
-    return trackDirs ? { files, directories: directories as Set<string> } : files;
-  }
-  if (_visited.has(realDir)) {
-    warn(`Symlink loop detected, skipping: ${dir}`);
-    return trackDirs ? { files, directories: directories as Set<string> } : files;
-  }
-  _visited.add(realDir);
-
-  let entries: fs.Dirent[];
-  try {
-    entries = fs.readdirSync(dir, { withFileTypes: true });
-  } catch (err: unknown) {
-    warn(`Cannot read directory ${dir}: ${(err as Error).message}`);
-    return trackDirs ? { files, directories: directories as Set<string> } : files;
-  }
+  const includeRegexes = compileGlobs(config.include);
+  const excludeRegexes = compileGlobs(config.exclude);
+  const ctx: CollectContext = {
+    rootDir: dir,
+    includeRegexes,
+    excludeRegexes,
+    hasGlobFilters: includeRegexes.length > 0 || excludeRegexes.length > 0,
+    extraIgnore: config.ignoreDirs ? new Set(config.ignoreDirs) : null,
+    visited: new Set(),
+  };
 
-  for (const entry of entries) {
-    if (shouldSkipEntry(entry, extraIgnore)) continue;
+  walkCollect(dir, files, trackDirs ? (directories as Set<string>) : null, ctx);
 
-    const full = path.join(dir, entry.name);
-    if (entry.isDirectory()) {
-      if (trackDirs) {
-        collectFiles(
-          full,
-          files,
-          config,
-          directories as Set<string>,
-          _visited,
-          rootDir,
-          includeRegexes,
-          excludeRegexes,
-        );
-      } else {
-        collectFiles(full, files, config, null, _visited, rootDir, includeRegexes, excludeRegexes);
-      }
-    } else if (EXTENSIONS.has(path.extname(entry.name))) {
-      if (hasGlobFilters) {
-        const rel = normalizePath(path.relative(rootDir, full));
-        if (!passesIncludeExclude(rel, includeRegexes, excludeRegexes)) continue;
-      }
-      files.push(full);
-      hasFiles = true;
-    }
-  }
-  if (trackDirs && hasFiles) {
-    (directories as Set<string>).add(dir);
-  }
   return trackDirs ? { files, directories: directories as Set<string> } : files;
 }
 
diff --git a/src/domain/graph/builder/incremental.ts b/src/domain/graph/builder/incremental.ts
index 66853983e..d7aa488ed 100644
--- a/src/domain/graph/builder/incremental.ts
+++ b/src/domain/graph/builder/incremental.ts
@@ -307,6 +307,63 @@ function resolveBarrelImportEdges(
   return edgesAdded;
 }
 
+/** Emit symbol-level `imports-type` edges for a single `import type` statement. */
+function emitTypeOnlySymbolEdges(
+  db: BetterSqlite3Database | null,
+  stmts: IncrementalStmts,
+  imp: ExtractorOutput['imports'][number],
+  resolvedPath: string,
+  fileNodeId: number,
+): number {
+  let edgesAdded = 0;
+  for (const name of imp.names) {
+    const cleanName = name.replace(/^\*\s+as\s+/, '');
+    let targetFile = resolvedPath;
+    if (db && isBarrelFile(db, resolvedPath)) {
+      const actual = resolveBarrelTarget(db, resolvedPath, cleanName);
+      if (actual) targetFile = actual;
+    }
+    const candidates = stmts.findNodeInFile.all(cleanName, targetFile) as Array<{
+      id: number;
+      file: string;
+    }>;
+    if (candidates.length === 0) continue;
+    stmts.insertEdge.run(fileNodeId, candidates[0]!.id, 'imports-type', 1.0, 0);
+    edgesAdded++;
+  }
+  return edgesAdded;
+}
+
+/**
+ * Process a single import statement: emit the file→file edge, any
+ * symbol-level type-only edges, and barrel re-export edges.
+ */
+function emitEdgesForImport(
+  stmts: IncrementalStmts,
+  imp: ExtractorOutput['imports'][number],
+  fileNodeId: number,
+  relPath: string,
+  rootDir: string,
+  aliases: PathAliases,
+  db: BetterSqlite3Database | null,
+): number {
+  const resolvedPath = resolveImportPath(path.join(rootDir, relPath), imp.source, rootDir, aliases);
+  const targetRow = stmts.getNodeId.get(resolvedPath, 'file', resolvedPath, 0);
+  if (!targetRow) return 0;
+
+  const edgeKind = imp.reexport ? 'reexports' : imp.typeOnly ? 'imports-type' : 'imports';
+  stmts.insertEdge.run(fileNodeId, targetRow.id, edgeKind, 1.0, 0);
+  let edgesAdded = 1;
+
+  if (imp.typeOnly) {
+    edgesAdded += emitTypeOnlySymbolEdges(db, stmts, imp, resolvedPath, fileNodeId);
+  }
+  if (!imp.reexport && db) {
+    edgesAdded += resolveBarrelImportEdges(db, stmts, fileNodeId, resolvedPath, imp);
+  }
+  return edgesAdded;
+}
+
 function buildImportEdges(
   stmts: IncrementalStmts,
   relPath: string,
@@ -318,44 +375,7 @@ function buildImportEdges(
 ): number {
   let edgesAdded = 0;
   for (const imp of symbols.imports) {
-    const resolvedPath = resolveImportPath(
-      path.join(rootDir, relPath),
-      imp.source,
-      rootDir,
-      aliases,
-    );
-    const targetRow = stmts.getNodeId.get(resolvedPath, 'file', resolvedPath, 0);
-    if (targetRow) {
-      const edgeKind = imp.reexport ? 'reexports' : imp.typeOnly ? 'imports-type' : 'imports';
-      stmts.insertEdge.run(fileNodeId, targetRow.id, edgeKind, 1.0, 0);
-      edgesAdded++;
-
-      // Type-only imports: create symbol-level edges so the target symbols
-      // get fan-in credit and aren't falsely classified as dead code.
-      if (imp.typeOnly) {
-        for (const name of imp.names) {
-          const cleanName = name.replace(/^\*\s+as\s+/, '');
-          let targetFile = resolvedPath;
-          if (db && isBarrelFile(db, resolvedPath)) {
-            const actual = resolveBarrelTarget(db, resolvedPath, cleanName);
-            if (actual) targetFile = actual;
-          }
-          const candidates = stmts.findNodeInFile.all(cleanName, targetFile) as Array<{
-            id: number;
-            file: string;
-          }>;
-          if (candidates.length > 0) {
-            stmts.insertEdge.run(fileNodeId, candidates[0]!.id, 'imports-type', 1.0, 0);
-            edgesAdded++;
-          }
-        }
-      }
-
-      // Barrel resolution: create edges through re-export chains
-      if (!imp.reexport && db) {
-        edgesAdded += resolveBarrelImportEdges(db, stmts, fileNodeId, resolvedPath, imp);
-      }
-    }
+    edgesAdded += emitEdgesForImport(stmts, imp, fileNodeId, relPath, rootDir, aliases, db);
   }
   return edgesAdded;
 }
@@ -491,6 +511,122 @@ function buildCallEdges(
 
 // ── Main entry point ────────────────────────────────────────────────────
 
+/** Build the "this file was deleted" result returned by `rebuildFile`. */
+function buildDeletionResult(
+  relPath: string,
+  oldNodes: number,
+  oldSymbols: unknown[],
+  diffSymbols: ((old: unknown[], new_: unknown[]) => unknown) | undefined,
+): RebuildResult {
+  const symbolDiff = diffSymbols ? diffSymbols(oldSymbols, []) : null;
+  return {
+    file: relPath,
+    nodesAdded: 0,
+    nodesRemoved: oldNodes,
+    edgesAdded: 0,
+    deleted: true,
+    event: 'deleted',
+    symbolDiff,
+    nodesBefore: oldNodes,
+    nodesAfter: 0,
+  };
+}
+
+/** Rebuild all edges originating in the single (just-parsed) target file. */
+function rebuildEdgesForTargetFile(
+  db: BetterSqlite3Database,
+  stmts: IncrementalStmts,
+  relPath: string,
+  symbols: ExtractorOutput,
+  fileNodeRow: { id: number },
+  rootDir: string,
+): number {
+  const aliases: PathAliases = { baseUrl: null, paths: {} };
+  let edgesAdded = buildContainmentEdges(db, stmts, relPath, symbols);
+  edgesAdded += rebuildDirContainment(db, stmts, relPath);
+  edgesAdded += buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeRow.id, aliases, db);
+  const importedNames = buildImportedNamesMap(symbols, rootDir, relPath, aliases);
+  edgesAdded += buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames);
+  return edgesAdded;
+}
+
+/**
+ * Re-parse the reverse-deps and delete their outgoing edges so the cascade
+ * can rebuild them.
+ */
+async function parseReverseDeps(
+  db: BetterSqlite3Database,
+  rootDir: string,
+  reverseDeps: string[],
+  engineOpts: EngineOpts,
+  cache: unknown,
+): Promise<Map<string, ExtractorOutput>> {
+  const depSymbols = new Map<string, ExtractorOutput>();
+  for (const depRelPath of reverseDeps) {
+    const symbols_ = await parseReverseDep(rootDir, depRelPath, engineOpts, cache);
+    if (symbols_) {
+      deleteOutgoingEdges(db, depRelPath);
+      depSymbols.set(depRelPath, symbols_);
+    }
+  }
+  return depSymbols;
+}
+
+/**
+ * Pass 2 of the reverse-dep cascade: now that the changed file's `reexports`
+ * edges exist, resolve barrel imports for every reverse-dep so transitive
+ * call edges through the barrel still find their targets.
+ */
+function emitBarrelImportEdgesForReverseDeps(
+  db: BetterSqlite3Database,
+  stmts: IncrementalStmts,
+  depSymbols: Map<string, ExtractorOutput>,
+  rootDir: string,
+): number {
+  let edgesAdded = 0;
+  for (const [depRelPath, symbols_] of depSymbols) {
+    const fileNodeRow_ = stmts.getNodeId.get(depRelPath, 'file', depRelPath, 0);
+    if (!fileNodeRow_) continue;
+    const aliases_: PathAliases = { baseUrl: null, paths: {} };
+    for (const imp of symbols_.imports) {
+      if (imp.reexport) continue;
+      const resolvedPath = resolveImportPath(
+        path.join(rootDir, depRelPath),
+        imp.source,
+        rootDir,
+        aliases_,
+      );
+      edgesAdded += resolveBarrelImportEdges(db, stmts, fileNodeRow_.id, resolvedPath, imp);
+    }
+  }
+  return edgesAdded;
+}
+
+/**
+ * Two-pass reverse-dep cascade:
+ *   1. Rebuild direct edges (creating `reexports` edges for barrels).
+ *   2. Add barrel import edges (which need `reexports` edges to exist).
+ */
+async function runReverseDepCascade(
+  db: BetterSqlite3Database,
+  rootDir: string,
+  reverseDeps: string[],
+  stmts: IncrementalStmts,
+  engineOpts: EngineOpts,
+  cache: unknown,
+): Promise<number> {
+  const depSymbols = await parseReverseDeps(db, rootDir, reverseDeps, engineOpts, cache);
+
+  let edgesAdded = 0;
+  // Pass 1: direct edges only (no barrel resolution) — creates reexports edges
+  for (const [depRelPath, symbols_] of depSymbols) {
+    edgesAdded += rebuildReverseDepEdges(db, rootDir, depRelPath, symbols_, stmts, true);
+  }
+  // Pass 2: add barrel import edges (reexports edges now exist)
+  edgesAdded += emitBarrelImportEdgesForReverseDeps(db, stmts, depSymbols, rootDir);
+  return edgesAdded;
+}
+
 /**
  * Parse a single file and update the database incrementally.
  */
@@ -519,18 +655,7 @@ export async function rebuildFile(
 
   if (!fs.existsSync(filePath)) {
     if (cache) (cache as { remove(p: string): void }).remove(filePath);
-    const symbolDiff = diffSymbols ? diffSymbols(oldSymbols, []) : null;
-    return {
-      file: relPath,
-      nodesAdded: 0,
-      nodesRemoved: oldNodes,
-      edgesAdded: 0,
-      deleted: true,
-      event: 'deleted',
-      symbolDiff,
-      nodesBefore: oldNodes,
-      nodesAfter: 0,
-    };
+    return buildDeletionResult(relPath, oldNodes, oldSymbols, diffSymbols);
   }
 
   let code: string;
@@ -553,45 +678,8 @@ export async function rebuildFile(
   if (!fileNodeRow)
     return { file: relPath, nodesAdded: newNodes, nodesRemoved: oldNodes, edgesAdded: 0 };
 
-  const aliases: PathAliases = { baseUrl: null, paths: {} };
-
-  let edgesAdded = buildContainmentEdges(db, stmts, relPath, symbols);
-  edgesAdded += rebuildDirContainment(db, stmts, relPath);
-  edgesAdded += buildImportEdges(stmts, relPath, symbols, rootDir, fileNodeRow.id, aliases, db);
-  const importedNames = buildImportedNamesMap(symbols, rootDir, relPath, aliases);
-  edgesAdded += buildCallEdges(stmts, relPath, symbols, fileNodeRow, importedNames);
-
-  // Cascade: rebuild outgoing edges for reverse-dep files.
-  // Two-pass approach: first rebuild direct edges (creating reexports edges for barrels),
-  // then add barrel import edges (which need reexports edges to exist for resolution).
-  const depSymbols = new Map<string, ExtractorOutput>();
-  for (const depRelPath of reverseDeps) {
-    const symbols_ = await parseReverseDep(rootDir, depRelPath, engineOpts, cache);
-    if (symbols_) {
-      deleteOutgoingEdges(db, depRelPath);
-      depSymbols.set(depRelPath, symbols_);
-    }
-  }
-  // Pass 1: direct edges only (no barrel resolution) — creates reexports edges
-  for (const [depRelPath, symbols_] of depSymbols) {
-    edgesAdded += rebuildReverseDepEdges(db, rootDir, depRelPath, symbols_, stmts, true);
-  }
-  // Pass 2: add barrel import edges (reexports edges now exist)
-  for (const [depRelPath, symbols_] of depSymbols) {
-    const fileNodeRow_ = stmts.getNodeId.get(depRelPath, 'file', depRelPath, 0);
-    if (!fileNodeRow_) continue;
-    const aliases_: PathAliases = { baseUrl: null, paths: {} };
-    for (const imp of symbols_.imports) {
-      if (imp.reexport) continue;
-      const resolvedPath = resolveImportPath(
-        path.join(rootDir, depRelPath),
-        imp.source,
-        rootDir,
-        aliases_,
-      );
-      edgesAdded += resolveBarrelImportEdges(db, stmts, fileNodeRow_.id, resolvedPath, imp);
-    }
-  }
+  let edgesAdded = rebuildEdgesForTargetFile(db, stmts, relPath, symbols, fileNodeRow, rootDir);
+  edgesAdded += await runReverseDepCascade(db, rootDir, reverseDeps, stmts, engineOpts, cache);
 
   const symbolDiff = diffSymbols ? diffSymbols(oldSymbols, newSymbols) : null;
   const event = oldNodes === 0 ? 'added' : 'modified';
diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts
index fc08160b3..9a531ed5c 100644
--- a/src/domain/graph/builder/stages/build-edges.ts
+++ b/src/domain/graph/builder/stages/build-edges.ts
@@ -89,12 +89,74 @@ function setupNodeLookups(ctx: PipelineContext, allNodes: QueryNodeRow[]): void
 
 // ── Import edges ────────────────────────────────────────────────────────
 
+/** Pick the edge kind for an import statement based on its modifiers. */
+function importEdgeKind(imp: Import): string {
+  if (imp.reexport) return 'reexports';
+  if (imp.typeOnly) return 'imports-type';
+  if (imp.dynamicImport) return 'dynamic-imports';
+  return 'imports';
+}
+
+/**
+ * For a `import type` statement, emit symbol-level `imports-type` edges so
+ * the target symbols get fan-in credit and aren't classified as dead code.
+ */
+function emitTypeOnlySymbolEdges(
+  ctx: PipelineContext,
+  imp: Import,
+  resolvedPath: string,
+  fileNodeId: number,
+  allEdgeRows: EdgeRowTuple[],
+): void {
+  if (!ctx.nodesByNameAndFile) return;
+  for (const name of imp.names) {
+    const cleanName = name.replace(/^\*\s+as\s+/, '');
+    let targetFile = resolvedPath;
+    if (isBarrelFile(ctx, resolvedPath)) {
+      const actual = resolveBarrelExport(ctx, resolvedPath, cleanName);
+      if (actual) targetFile = actual;
+    }
+    const candidates = ctx.nodesByNameAndFile.get(`${cleanName}|${targetFile}`);
+    if (candidates && candidates.length > 0) {
+      allEdgeRows.push([fileNodeId, candidates[0]!.id, 'imports-type', 1.0, 0]);
+    }
+  }
+}
+
+/**
+ * Process a single import statement and emit all resulting edges (file→file,
+ * type-only symbol-level, and barrel re-export targets).
+ */
+function emitEdgesForImport(
+  ctx: PipelineContext,
+  imp: Import,
+  fileNodeId: number,
+  relPath: string,
+  getNodeIdStmt: NodeIdStmt,
+  allEdgeRows: EdgeRowTuple[],
+): void {
+  const resolvedPath = getResolved(ctx, path.join(ctx.rootDir, relPath), imp.source);
+  const targetRow = getNodeIdStmt.get(resolvedPath, 'file', resolvedPath, 0);
+  if (!targetRow) return;
+
+  const edgeKind = importEdgeKind(imp);
+  allEdgeRows.push([fileNodeId, targetRow.id, edgeKind, 1.0, 0]);
+
+  if (imp.typeOnly) {
+    emitTypeOnlySymbolEdges(ctx, imp, resolvedPath, fileNodeId, allEdgeRows);
+  }
+
+  if (!imp.reexport && isBarrelFile(ctx, resolvedPath)) {
+    buildBarrelEdges(ctx, imp, resolvedPath, fileNodeId, edgeKind, getNodeIdStmt, allEdgeRows);
+  }
+}
+
 function buildImportEdges(
   ctx: PipelineContext,
   getNodeIdStmt: NodeIdStmt,
   allEdgeRows: EdgeRowTuple[],
 ): void {
-  const { fileSymbols, barrelOnlyFiles, rootDir } = ctx;
+  const { fileSymbols, barrelOnlyFiles } = ctx;
 
   for (const [relPath, symbols] of fileSymbols) {
     const isBarrelOnly = barrelOnlyFiles.has(relPath);
@@ -105,40 +167,7 @@ function buildImportEdges(
     for (const imp of symbols.imports) {
       // Barrel-only files: only emit reexport edges, skip regular imports
       if (isBarrelOnly && !imp.reexport) continue;
-
-      const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source);
-      const targetRow = getNodeIdStmt.get(resolvedPath, 'file', resolvedPath, 0);
-      if (!targetRow) continue;
-
-      const edgeKind = imp.reexport
-        ? 'reexports'
-        : imp.typeOnly
-          ? 'imports-type'
-          : imp.dynamicImport
-            ? 'dynamic-imports'
-            : 'imports';
-      allEdgeRows.push([fileNodeId, targetRow.id, edgeKind, 1.0, 0]);
-
-      // Type-only imports: create symbol-level edges so the target symbols
-      // get fan-in credit and aren't falsely classified as dead code.
-      if (imp.typeOnly && ctx.nodesByNameAndFile) {
-        for (const name of imp.names) {
-          const cleanName = name.replace(/^\*\s+as\s+/, '');
-          let targetFile = resolvedPath;
-          if (isBarrelFile(ctx, resolvedPath)) {
-            const actual = resolveBarrelExport(ctx, resolvedPath, cleanName);
-            if (actual) targetFile = actual;
-          }
-          const candidates = ctx.nodesByNameAndFile.get(`${cleanName}|${targetFile}`);
-          if (candidates && candidates.length > 0) {
-            allEdgeRows.push([fileNodeId, candidates[0]!.id, 'imports-type', 1.0, 0]);
-          }
-        }
-      }
-
-      if (!imp.reexport && isBarrelFile(ctx, resolvedPath)) {
-        buildBarrelEdges(ctx, imp, resolvedPath, fileNodeId, edgeKind, getNodeIdStmt, allEdgeRows);
-      }
+      emitEdgesForImport(ctx, imp, fileNodeId, relPath, getNodeIdStmt, allEdgeRows);
     }
   }
 }
@@ -174,83 +203,98 @@ function buildBarrelEdges(
 
 // ── Import edges (native engine) ────────────────────────────────────────
 
-function buildImportEdgesNative(
-  ctx: PipelineContext,
-  getNodeIdStmt: NodeIdStmt,
-  allEdgeRows: EdgeRowTuple[],
-  native: NativeAddon,
-): void {
-  const { fileSymbols, barrelOnlyFiles, rootDir } = ctx;
+/** Native FFI input shape for a single import statement. */
+interface NativeImportInfo {
+  source: string;
+  names: string[];
+  reexport: boolean;
+  typeOnly: boolean;
+  dynamicImport: boolean;
+  wildcardReexport: boolean;
+}
 
-  // 1. Build per-file input data
-  const files: Array<{
-    file: string;
-    fileNodeId: number;
-    isBarrelOnly: boolean;
-    imports: Array<{
-      source: string;
-      names: string[];
-      reexport: boolean;
-      typeOnly: boolean;
-      dynamicImport: boolean;
-      wildcardReexport: boolean;
-    }>;
-    definitionNames: string[];
-  }> = [];
-
-  // Collect all file node IDs we'll need (sources + targets)
-  const fileNodeIds: Array<{ file: string; nodeId: number }> = [];
-  const seenNodeFiles = new Set<string>();
-
-  const addFileNodeId = (relPath: string): { id: number } | undefined => {
-    if (seenNodeFiles.has(relPath)) return fileNodeRowCache.get(relPath);
-    const row = getNodeIdStmt.get(relPath, 'file', relPath, 0);
-    if (row) {
-      seenNodeFiles.add(relPath);
-      fileNodeIds.push({ file: relPath, nodeId: row.id });
-      fileNodeRowCache.set(relPath, row);
-    }
-    return row;
+/** Native FFI input shape for a single file. */
+interface NativeFileInput {
+  file: string;
+  fileNodeId: number;
+  isBarrelOnly: boolean;
+  imports: NativeImportInfo[];
+  definitionNames: string[];
+}
+
+/** Native FFI input shape for re-exports of a single file. */
+interface NativeReexportInput {
+  file: string;
+  reexports: Array<{ source: string; names: string[]; wildcardReexport: boolean }>;
+}
+
+/** Lazily-resolving cache of file-node rows for the native input arrays. */
+interface FileNodeIdRegistry {
+  ids: Array<{ file: string; nodeId: number }>;
+  add(relPath: string): { id: number } | undefined;
+}
+
+function createFileNodeIdRegistry(getNodeIdStmt: NodeIdStmt): FileNodeIdRegistry {
+  const ids: Array<{ file: string; nodeId: number }> = [];
+  const seen = new Set<string>();
+  const cache = new Map<string, { id: number }>();
+  return {
+    ids,
+    add(relPath: string) {
+      if (seen.has(relPath)) return cache.get(relPath);
+      const row = getNodeIdStmt.get(relPath, 'file', relPath, 0);
+      if (row) {
+        seen.add(relPath);
+        ids.push({ file: relPath, nodeId: row.id });
+        cache.set(relPath, row);
+      }
+      return row;
+    },
   };
-  const fileNodeRowCache = new Map<string, { id: number }>();
+}
 
-  // 2. Pre-resolve all imports and build resolved imports array.
-  // Keys use forward-slash-normalized rootDir + "/" + relPath to match the Rust
-  // lookup format (format!("{}/{}", root_dir.replace('\\', "/"), file)).
-  // On Windows, rootDir has backslashes but Rust normalizes them — the JS side
-  // must do the same or every resolve key lookup misses (#750).
-  const resolvedImports: Array<{ key: string; resolvedPath: string }> = [];
+function toNativeImportInfo(imp: Import): NativeImportInfo {
+  return {
+    source: imp.source,
+    names: imp.names,
+    reexport: !!imp.reexport,
+    typeOnly: !!imp.typeOnly,
+    dynamicImport: !!imp.dynamicImport,
+    wildcardReexport: !!imp.wildcardReexport,
+  };
+}
+
+/**
+ * Pre-resolve every import for the given files, registering each resolved
+ * target with the registry so the native side has full node-id coverage.
+ *
+ * Resolved-import keys use forward-slash-normalized rootDir + "/" + relPath to
+ * match the Rust lookup format. On Windows, rootDir has backslashes but Rust
+ * normalizes them — the JS side must do the same or every key lookup misses
+ * (#750).
+ */
+function buildNativeFileInputs(
+  ctx: PipelineContext,
+  registry: FileNodeIdRegistry,
+): {
+  files: NativeFileInput[];
+  resolvedImports: Array<{ key: string; resolvedPath: string }>;
+} {
+  const { fileSymbols, barrelOnlyFiles, rootDir } = ctx;
   const fwdRootDir = rootDir.replace(/\\/g, '/');
+  const files: NativeFileInput[] = [];
+  const resolvedImports: Array<{ key: string; resolvedPath: string }> = [];
 
   for (const [relPath, symbols] of fileSymbols) {
-    const fileNodeRow = addFileNodeId(relPath);
+    const fileNodeRow = registry.add(relPath);
     if (!fileNodeRow) continue;
 
-    const importInfos: Array<{
-      source: string;
-      names: string[];
-      reexport: boolean;
-      typeOnly: boolean;
-      dynamicImport: boolean;
-      wildcardReexport: boolean;
-    }> = [];
-
+    const importInfos: NativeImportInfo[] = [];
     for (const imp of symbols.imports) {
-      // Pre-resolve and register target file node
       const resolvedPath = getResolved(ctx, path.join(rootDir, relPath), imp.source);
-      addFileNodeId(resolvedPath);
-
-      // Key matches Rust's format!("{}/{}", root_dir.replace('\\', "/"), file_input.file)
+      registry.add(resolvedPath);
       resolvedImports.push({ key: `${fwdRootDir}/${relPath}|${imp.source}`, resolvedPath });
-
-      importInfos.push({
-        source: imp.source,
-        names: imp.names,
-        reexport: !!imp.reexport,
-        typeOnly: !!imp.typeOnly,
-        dynamicImport: !!imp.dynamicImport,
-        wildcardReexport: !!imp.wildcardReexport,
-      });
+      importInfos.push(toNativeImportInfo(imp));
     }
 
     files.push({
@@ -261,61 +305,75 @@ function buildImportEdgesNative(
       definitionNames: symbols.definitions.map((d) => d.name),
     });
   }
+  return { files, resolvedImports };
+}
 
-  // 4. Flatten reexportMap
-  const fileReexports: Array<{
-    file: string;
-    reexports: Array<{
-      source: string;
-      names: string[];
-      wildcardReexport: boolean;
-    }>;
-  }> = [];
-  if (ctx.reexportMap) {
-    for (const [file, entries] of ctx.reexportMap) {
-      const reexports = (
-        entries as Array<{ source: string; names: string[]; wildcardReexport: boolean }>
-      ).map((re) => ({
-        source: re.source,
-        names: re.names,
-        wildcardReexport: !!re.wildcardReexport,
-      }));
-      fileReexports.push({ file, reexports });
+/** Flatten `ctx.reexportMap` into the array shape the native side expects. */
+function buildNativeReexports(
+  ctx: PipelineContext,
+  registry: FileNodeIdRegistry,
+): NativeReexportInput[] {
+  const fileReexports: NativeReexportInput[] = [];
+  if (!ctx.reexportMap) return fileReexports;
+
+  for (const [file, entries] of ctx.reexportMap) {
+    const reexports = (
+      entries as Array<{ source: string; names: string[]; wildcardReexport: boolean }>
+    ).map((re) => ({
+      source: re.source,
+      names: re.names,
+      wildcardReexport: !!re.wildcardReexport,
+    }));
+    fileReexports.push({ file, reexports });
 
-      // Register reexport target files for node ID lookup
-      for (const re of reexports) {
-        addFileNodeId(re.source);
-      }
+    for (const re of reexports) {
+      registry.add(re.source);
     }
   }
+  return fileReexports;
+}
 
-  // 5. Compute barrel file list
+function collectBarrelFiles(ctx: PipelineContext): string[] {
   const barrelFiles: string[] = [];
-  for (const [relPath] of fileSymbols) {
-    if (isBarrelFile(ctx, relPath)) {
-      barrelFiles.push(relPath);
-    }
+  for (const [relPath] of ctx.fileSymbols) {
+    if (isBarrelFile(ctx, relPath)) barrelFiles.push(relPath);
   }
+  return barrelFiles;
+}
 
-  // 6. Build symbol node entries for type-only import resolution
+function collectSymbolNodes(
+  ctx: PipelineContext,
+): Array<{ name: string; file: string; nodeId: number }> {
   const symbolNodes: Array<{ name: string; file: string; nodeId: number }> = [];
-  if (ctx.nodesByNameAndFile) {
-    for (const [key, nodes] of ctx.nodesByNameAndFile) {
-      if (nodes.length > 0) {
-        const [name, file] = key.split('|');
-        symbolNodes.push({ name: name!, file: file!, nodeId: nodes[0]!.id });
-      }
-    }
+  if (!ctx.nodesByNameAndFile) return symbolNodes;
+  for (const [key, nodes] of ctx.nodesByNameAndFile) {
+    if (nodes.length === 0) continue;
+    const [name, file] = key.split('|');
+    symbolNodes.push({ name: name!, file: file!, nodeId: nodes[0]!.id });
   }
+  return symbolNodes;
+}
+
+function buildImportEdgesNative(
+  ctx: PipelineContext,
+  getNodeIdStmt: NodeIdStmt,
+  allEdgeRows: EdgeRowTuple[],
+  native: NativeAddon,
+): void {
+  const registry = createFileNodeIdRegistry(getNodeIdStmt);
+
+  const { files, resolvedImports } = buildNativeFileInputs(ctx, registry);
+  const fileReexports = buildNativeReexports(ctx, registry);
+  const barrelFiles = collectBarrelFiles(ctx);
+  const symbolNodes = collectSymbolNodes(ctx);
 
-  // 7. Call native
   const nativeEdges = native.buildImportEdges!(
     files,
     resolvedImports,
     fileReexports,
-    fileNodeIds,
+    registry.ids,
     barrelFiles,
-    rootDir,
+    ctx.rootDir,
     symbolNodes,
   ) as NativeEdge[];
 
diff --git a/src/domain/graph/builder/stages/build-structure.ts b/src/domain/graph/builder/stages/build-structure.ts
index 1a59353be..144537dfe 100644
--- a/src/domain/graph/builder/stages/build-structure.ts
+++ b/src/domain/graph/builder/stages/build-structure.ts
@@ -11,87 +11,104 @@ import type { ExtractorOutput } from '../../../../types.js';
 import type { PipelineContext } from '../context.js';
 import { readFileSafe } from '../helpers.js';
 
-export async function buildStructure(ctx: PipelineContext): Promise<void> {
-  const { db, fileSymbols, rootDir, discoveredDirs, allSymbols, isFullBuild } = ctx;
-
-  // Build line count map (prefer cached _lineCount from parser)
+/** Populate `ctx.lineCountMap` from cached parser results, falling back to disk. */
+function populateLineCountMap(ctx: PipelineContext): void {
+  const { fileSymbols, rootDir } = ctx;
   ctx.lineCountMap = new Map();
   for (const [relPath, symbols] of fileSymbols) {
     const lineCount =
       (symbols as ExtractorOutput & { lineCount?: number }).lineCount ?? symbols._lineCount;
     if (lineCount) {
       ctx.lineCountMap.set(relPath, lineCount);
-    } else {
-      const absPath = path.join(rootDir, relPath);
-      try {
-        const content = readFileSafe(absPath);
-        ctx.lineCountMap.set(relPath, content.split('\n').length);
-      } catch {
-        ctx.lineCountMap.set(relPath, 0);
-      }
+      continue;
+    }
+    const absPath = path.join(rootDir, relPath);
+    try {
+      const content = readFileSafe(absPath);
+      ctx.lineCountMap.set(relPath, content.split('\n').length);
+    } catch {
+      ctx.lineCountMap.set(relPath, 0);
     }
   }
+}
 
-  const changedFileList = isFullBuild ? null : [...allSymbols.keys()];
-
-  // For small incremental builds on large codebases, use a fast path that
-  // updates only the changed files' metrics via targeted SQL instead of
-  // loading ALL definitions from DB (~8ms) and recomputing ALL metrics (~15ms).
-  // Gate: ≤smallFilesThreshold changed files AND significantly more existing files (>20) to
-  // avoid triggering on small test fixtures where directory metrics matter.
+/** Count file-kind nodes already in the DB, preferring the native connection. */
+function countExistingFiles(ctx: PipelineContext): number {
   const useNativeReads = ctx.engineName === 'native' && !!ctx.nativeDb;
-  const existingFileCount = !isFullBuild
-    ? (
-        (useNativeReads
-          ? ctx.nativeDb!.queryGet("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'", [])
-          : db.prepare("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'").get()) as {
-          c: number;
-        }
-      ).c
-    : 0;
-  const useSmallIncrementalFastPath =
-    !isFullBuild &&
-    changedFileList != null &&
-    changedFileList.length <= ctx.config.build.smallFilesThreshold &&
-    existingFileCount > 20;
-
-  if (!isFullBuild && !useSmallIncrementalFastPath) {
-    // Medium/large incremental: load unchanged files from DB for complete structure
-    loadUnchangedFilesFromDb(ctx);
-  }
+  const row = (
+    useNativeReads
+      ? ctx.nativeDb!.queryGet("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'", [])
+      : ctx.db.prepare("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'").get()
+  ) as { c: number };
+  return row.c;
+}
 
-  // Build directory structure
-  const t0 = performance.now();
+/**
+ * Build directory structure + metrics. Chooses between the fast incremental
+ * path (a handful of files changed on a large codebase) and the full path
+ * (delegated to `features/structure`).
+ */
+async function buildDirectoryStructure(
+  ctx: PipelineContext,
+  changedFileList: string[] | null,
+  useSmallIncrementalFastPath: boolean,
+): Promise<void> {
   if (useSmallIncrementalFastPath) {
     updateChangedFileMetrics(ctx, changedFileList!);
-  } else {
-    const relDirs = new Set<string>();
-    for (const absDir of discoveredDirs) {
-      relDirs.add(normalizePath(path.relative(rootDir, absDir)));
-    }
-    try {
-      const { buildStructure: buildStructureFn } = (await import(
-        '../../../../features/structure.js'
-      )) as {
-        buildStructure: (
-          db: PipelineContext['db'],
-          fileSymbols: Map<string, ExtractorOutput>,
-          rootDir: string,
-          lineCountMap: Map<string, number>,
-          directories: Set<string>,
-          changedFiles: string[] | null,
-        ) => void;
-      };
-      const changedFilePaths = isFullBuild ? null : [...allSymbols.keys()];
-      buildStructureFn(db, fileSymbols, rootDir, ctx.lineCountMap, relDirs, changedFilePaths);
-    } catch (err) {
-      debug(`Structure analysis failed: ${(err as Error).message}`);
-    }
+    return;
   }
-  ctx.timing.structureMs = performance.now() - t0;
 
-  // Classify node roles (incremental: only reclassify changed files' nodes)
-  const t1 = performance.now();
+  const { db, fileSymbols, rootDir, discoveredDirs, allSymbols, isFullBuild } = ctx;
+  const relDirs = new Set<string>();
+  for (const absDir of discoveredDirs) {
+    relDirs.add(normalizePath(path.relative(rootDir, absDir)));
+  }
+  try {
+    const { buildStructure: buildStructureFn } = (await import(
+      '../../../../features/structure.js'
+    )) as {
+      buildStructure: (
+        db: PipelineContext['db'],
+        fileSymbols: Map<string, ExtractorOutput>,
+        rootDir: string,
+        lineCountMap: Map<string, number>,
+        directories: Set<string>,
+        changedFiles: string[] | null,
+      ) => void;
+    };
+    const changedFilePaths = isFullBuild ? null : [...allSymbols.keys()];
+    buildStructureFn(db, fileSymbols, rootDir, ctx.lineCountMap, relDirs, changedFilePaths);
+  } catch (err) {
+    debug(`Structure analysis failed: ${(err as Error).message}`);
+  }
+}
+
+/** Convert a `NativeDatabase.classifyRoles*` result into the JS summary shape. */
+function nativeRoleSummaryToRecord(
+  nativeResult: NonNullable<
+    ReturnType<NonNullable<PipelineContext['nativeDb']>['classifyRolesFull']>
+  >,
+): Record<string, number> {
+  return {
+    entry: nativeResult.entry,
+    core: nativeResult.core,
+    utility: nativeResult.utility,
+    adapter: nativeResult.adapter,
+    dead: nativeResult.dead,
+    'dead-leaf': nativeResult.deadLeaf,
+    'dead-entry': nativeResult.deadEntry,
+    'dead-ffi': nativeResult.deadFfi,
+    'dead-unresolved': nativeResult.deadUnresolved,
+    'test-only': nativeResult.testOnly,
+    leaf: nativeResult.leaf,
+  };
+}
+
+async function classifyRoles(
+  ctx: PipelineContext,
+  changedFileList: string[] | null,
+): Promise<void> {
+  const useNativeReads = ctx.engineName === 'native' && !!ctx.nativeDb;
   try {
     let roleSummary: Record<string, number> | null = null;
 
@@ -103,24 +120,9 @@ export async function buildStructure(ctx: PipelineContext): Promise<void> {
         changedFileList && changedFileList.length > 0
           ? ctx.nativeDb.classifyRolesIncremental(changedFileList)
           : ctx.nativeDb.classifyRolesFull();
-      if (nativeResult) {
-        roleSummary = {
-          entry: nativeResult.entry,
-          core: nativeResult.core,
-          utility: nativeResult.utility,
-          adapter: nativeResult.adapter,
-          dead: nativeResult.dead,
-          'dead-leaf': nativeResult.deadLeaf,
-          'dead-entry': nativeResult.deadEntry,
-          'dead-ffi': nativeResult.deadFfi,
-          'dead-unresolved': nativeResult.deadUnresolved,
-          'test-only': nativeResult.testOnly,
-          leaf: nativeResult.leaf,
-        };
-      }
+      if (nativeResult) roleSummary = nativeRoleSummaryToRecord(nativeResult);
     }
 
-    // Fall back to JS path
     if (!roleSummary) {
       const { classifyNodeRoles } = (await import('../../../../features/structure.js')) as {
         classifyNodeRoles: (
@@ -141,6 +143,37 @@ export async function buildStructure(ctx: PipelineContext): Promise<void> {
   } catch (err) {
     debug(`Role classification failed: ${(err as Error).message}`);
   }
+}
+
+export async function buildStructure(ctx: PipelineContext): Promise<void> {
+  const { allSymbols, isFullBuild } = ctx;
+
+  populateLineCountMap(ctx);
+
+  const changedFileList = isFullBuild ? null : [...allSymbols.keys()];
+
+  // For small incremental builds on large codebases, use a fast path that
+  // updates only the changed files' metrics via targeted SQL instead of
+  // loading ALL definitions from DB (~8ms) and recomputing ALL metrics (~15ms).
+  // Gate: ≤smallFilesThreshold changed files AND significantly more existing files (>20) to
+  // avoid triggering on small test fixtures where directory metrics matter.
+  const existingFileCount = !isFullBuild ? countExistingFiles(ctx) : 0;
+  const useSmallIncrementalFastPath =
+    !isFullBuild &&
+    changedFileList != null &&
+    changedFileList.length <= ctx.config.build.smallFilesThreshold &&
+    existingFileCount > 20;
+
+  if (!isFullBuild && !useSmallIncrementalFastPath) {
+    loadUnchangedFilesFromDb(ctx);
+  }
+
+  const t0 = performance.now();
+  await buildDirectoryStructure(ctx, changedFileList, useSmallIncrementalFastPath);
+  ctx.timing.structureMs = performance.now() - t0;
+
+  const t1 = performance.now();
+  await classifyRoles(ctx, changedFileList);
   ctx.timing.rolesMs = performance.now() - t1;
 }
 
diff --git a/src/domain/graph/builder/stages/detect-changes.ts b/src/domain/graph/builder/stages/detect-changes.ts
index cc51155dc..222d92e42 100644
--- a/src/domain/graph/builder/stages/detect-changes.ts
+++ b/src/domain/graph/builder/stages/detect-changes.ts
@@ -162,14 +162,14 @@ function tryJournalTier(
   return { changed, removed: [...removedSet], isFullBuild: false };
 }
 
-function mtimeAndHashTiers(
+/** Tier 1: mtime+size triage. Returns the files that still need hashing. */
+function tierMtimeSize(
   existing: Map<string, FileHashRow>,
   allFiles: string[],
   rootDir: string,
-  removed: string[],
-): ChangeResult {
+): { needsHash: NeedsHashItem[]; skipped: number } {
   const needsHash: NeedsHashItem[] = [];
-  const skipped: string[] = [];
+  let skipped = 0;
 
   for (const file of allFiles) {
     const relPath = normalizePath(path.relative(rootDir, file));
@@ -183,16 +183,17 @@ function mtimeAndHashTiers(
     const storedMtime = record.mtime || 0;
     const storedSize = record.size || 0;
     if (storedSize > 0 && stat.mtime === storedMtime && stat.size === storedSize) {
-      skipped.push(relPath);
+      skipped++;
       continue;
     }
     needsHash.push({ file, relPath, stat });
   }
 
-  if (needsHash.length > 0) {
-    debug(`Tier 1: ${skipped.length} skipped by mtime+size, ${needsHash.length} need hash check`);
-  }
+  return { needsHash, skipped };
+}
 
+/** Tier 2: hash candidates from tier 1, classifying changed vs metadata-only. */
+function tierHash(existing: Map<string, FileHashRow>, needsHash: NeedsHashItem[]): ChangedFile[] {
   const changed: ChangedFile[] = [];
   for (const item of needsHash) {
     let content: string | undefined;
@@ -217,11 +218,26 @@ function mtimeAndHashTiers(
       });
     }
   }
+  return changed;
+}
+
+function mtimeAndHashTiers(
+  existing: Map<string, FileHashRow>,
+  allFiles: string[],
+  rootDir: string,
+  removed: string[],
+): ChangeResult {
+  const { needsHash, skipped } = tierMtimeSize(existing, allFiles, rootDir);
+  if (needsHash.length > 0) {
+    debug(`Tier 1: ${skipped} skipped by mtime+size, ${needsHash.length} need hash check`);
+  }
+
+  const changed = tierHash(existing, needsHash);
 
-  const parseChanged = changed.filter((c) => !c.metadataOnly);
   if (needsHash.length > 0) {
+    const parseChangedLen = changed.filter((c) => !c.metadataOnly).length;
     debug(
-      `Tier 2: ${parseChanged.length} actually changed, ${changed.length - parseChanged.length} metadata-only`,
+      `Tier 2: ${parseChangedLen} actually changed, ${changed.length - parseChangedLen} metadata-only`,
     );
   }
 
@@ -512,61 +528,43 @@ function handleIncrementalBuild(ctx: PipelineContext): void {
   purgeAndAddReverseDeps(ctx, changePaths, reverseDeps);
 }
 
-/**
- * Read-only pre-flight check for the native orchestrator.
- *
- * Returns true iff every collected source file has matching mtime+size in
- * `file_hashes` and no DB-tracked file has been removed. When true, the
- * caller can short-circuit before invoking the native orchestrator —
- * matching WASM's ~20 ms early-exit path and avoiding the ~2s flat
- * per-call native rebuild overhead seen in CI (#1054).
- *
- * Intentionally Tier-0/Tier-1 only (journal + mtime/size). Tier-2 content
- * hashing is left to the native side: when this returns false the caller
- * falls through to the orchestrator, which performs its own complete
- * detection and is the source of truth.
- *
- * Conservatively returns false when CFG or dataflow analysis is enabled
- * but the corresponding tables are empty — otherwise the fast-skip would
- * silently suppress the pending-analysis pass that the JS path runs via
- * `runPendingAnalysis`, and CFG/dataflow data would never populate on
- * repos where source files don't change between builds.
- *
- * Pure read of `db` and the filesystem — never mutates either.
- */
-export function detectNoChanges(
-  db: BetterSqlite3Database,
-  allFiles: string[],
-  rootDir: string,
-  opts?: Record<string, unknown>,
-): boolean {
-  // Diagnostic logging gated by env var — used by the bench gate to surface
-  // why the fast-skip is not firing on CI runners (#1066). Off by default to
-  // avoid noise on every regular incremental build.
+/** Diagnostic logger gated by env var, used by both `detectNoChanges` branches. */
+function makeFastSkipLogger(): (reason: string) => void {
   const diag = process.env.CODEGRAPH_FAST_SKIP_DIAG === '1';
-  const log = (reason: string): void => {
+  return (reason: string): void => {
     if (diag) info(`[fast-skip] ${reason}`);
   };
+}
 
-  let hasTable = false;
+/**
+ * Load the `file_hashes` table for the no-change pre-flight.  Returns null
+ * if the table is missing or empty (both → caller must fall through).
+ */
+function loadFileHashesForPreflight(
+  db: BetterSqlite3Database,
+  log: (reason: string) => void,
+): Map<string, FileHashRow> | null {
   try {
     db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get();
-    hasTable = true;
   } catch {
-    /* table missing — first build */
-  }
-  if (!hasTable) {
     log('false: file_hashes table missing');
-    return false;
+    return null;
   }
-
   const rows = db.prepare('SELECT file, hash, mtime, size FROM file_hashes').all() as FileHashRow[];
   if (rows.length === 0) {
     log('false: file_hashes table empty');
-    return false;
+    return null;
   }
-  const existing = new Map<string, FileHashRow>(rows.map((r) => [r.file, r]));
+  return new Map<string, FileHashRow>(rows.map((r) => [r.file, r]));
+}
 
+/** Returns true iff every file in `allFiles` matches a stored mtime+size record. */
+function allFilesMatchStoredStat(
+  existing: Map<string, FileHashRow>,
+  allFiles: string[],
+  rootDir: string,
+  log: (reason: string) => void,
+): boolean {
   const currentFiles = new Set<string>();
   for (const file of allFiles) {
     currentFiles.add(normalizePath(path.relative(rootDir, file)));
@@ -603,21 +601,66 @@ export function detectNoChanges(
       return false;
     }
   }
+  return true;
+}
 
-  // Pending-analysis guard: if CFG/dataflow is enabled but the corresponding
-  // table is empty (analysis newly enabled, or tables wiped between builds),
-  // fall through so the orchestrator / JS pipeline can run runPendingAnalysis.
-  // Mirrors the check at the top of runPendingAnalysis (see line ~244).
-  if (opts) {
-    if (opts.cfg !== false && hasEmptyAnalysisTable(db, 'cfg_blocks')) {
-      log('false: pending-analysis guard — cfg_blocks is empty');
-      return false;
-    }
-    if (opts.dataflow !== false && hasEmptyAnalysisTable(db, 'dataflow')) {
-      log('false: pending-analysis guard — dataflow is empty');
-      return false;
-    }
+/**
+ * Pending-analysis guard: if CFG/dataflow is enabled but the corresponding
+ * table is empty (analysis newly enabled, or tables wiped between builds),
+ * fall through so the orchestrator / JS pipeline can run runPendingAnalysis.
+ * Mirrors the check at the top of runPendingAnalysis.
+ */
+function passesPendingAnalysisGuard(
+  db: BetterSqlite3Database,
+  opts: Record<string, unknown> | undefined,
+  log: (reason: string) => void,
+): boolean {
+  if (!opts) return true;
+  if (opts.cfg !== false && hasEmptyAnalysisTable(db, 'cfg_blocks')) {
+    log('false: pending-analysis guard — cfg_blocks is empty');
+    return false;
   }
+  if (opts.dataflow !== false && hasEmptyAnalysisTable(db, 'dataflow')) {
+    log('false: pending-analysis guard — dataflow is empty');
+    return false;
+  }
+  return true;
+}
+
+/**
+ * Read-only pre-flight check for the native orchestrator.
+ *
+ * Returns true iff every collected source file has matching mtime+size in
+ * `file_hashes` and no DB-tracked file has been removed. When true, the
+ * caller can short-circuit before invoking the native orchestrator —
+ * matching WASM's ~20 ms early-exit path and avoiding the ~2s flat
+ * per-call native rebuild overhead seen in CI (#1054).
+ *
+ * Intentionally Tier-0/Tier-1 only (journal + mtime/size). Tier-2 content
+ * hashing is left to the native side: when this returns false the caller
+ * falls through to the orchestrator, which performs its own complete
+ * detection and is the source of truth.
+ *
+ * Conservatively returns false when CFG or dataflow analysis is enabled
+ * but the corresponding tables are empty — otherwise the fast-skip would
+ * silently suppress the pending-analysis pass that the JS path runs via
+ * `runPendingAnalysis`, and CFG/dataflow data would never populate on
+ * repos where source files don't change between builds.
+ *
+ * Pure read of `db` and the filesystem — never mutates either.
+ */
+export function detectNoChanges(
+  db: BetterSqlite3Database,
+  allFiles: string[],
+  rootDir: string,
+  opts?: Record<string, unknown>,
+): boolean {
+  const log = makeFastSkipLogger();
+  const existing = loadFileHashesForPreflight(db, log);
+  if (!existing) return false;
+
+  if (!allFilesMatchStoredStat(existing, allFiles, rootDir, log)) return false;
+  if (!passesPendingAnalysisGuard(db, opts, log)) return false;
 
   log(`true: all checks passed (${allFiles.length} files)`);
   return true;
diff --git a/src/domain/graph/builder/stages/finalize.ts b/src/domain/graph/builder/stages/finalize.ts
index d59fe016a..ab2e1d429 100644
--- a/src/domain/graph/builder/stages/finalize.ts
+++ b/src/domain/graph/builder/stages/finalize.ts
@@ -136,82 +136,72 @@ function persistBuildMetadata(
   }
 }
 
-/**
- * Run advisory checks on full builds: orphaned embeddings, stale embeddings,
- * and unused exports. Informational only — does not affect correctness.
- */
-function runAdvisoryChecks(ctx: PipelineContext, hasEmbeddings: boolean, buildNow: Date): void {
-  // Batched native path: single napi call for all 3 advisory checks
-  if (ctx.engineName === 'native' && ctx.nativeDb?.runAdvisoryChecks) {
-    const result = ctx.nativeDb.runAdvisoryChecks(hasEmbeddings);
-    if (result.orphanedEmbeddings > 0) {
-      warn(
-        `${result.orphanedEmbeddings} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`,
-      );
-    }
-    if (result.embedBuiltAt) {
-      const embedTime = new Date(result.embedBuiltAt).getTime();
-      if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) {
-        warn(
-          'Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.',
-        );
-      }
-    }
-    if (result.unusedExports > 0) {
-      warn(
-        `${result.unusedExports} exported symbol${result.unusedExports > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports <file> --unused" to inspect.`,
-      );
+/** Format the "X exports have zero consumers" warning, with correct plural agreement. */
+function unusedExportsMessage(count: number): string {
+  return `${count} exported symbol${count > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports <file> --unused" to inspect.`;
+}
+
+/** Run all three advisory checks via the batched native FFI. */
+function runAdvisoryChecksNative(
+  ctx: PipelineContext,
+  hasEmbeddings: boolean,
+  buildNow: Date,
+): void {
+  const result = ctx.nativeDb!.runAdvisoryChecks!(hasEmbeddings);
+  if (result.orphanedEmbeddings > 0) {
+    warn(
+      `${result.orphanedEmbeddings} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`,
+    );
+  }
+  if (result.embedBuiltAt) {
+    const embedTime = new Date(result.embedBuiltAt).getTime();
+    if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) {
+      warn('Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.');
     }
-    return;
   }
+  if (result.unusedExports > 0) {
+    warn(unusedExportsMessage(result.unusedExports));
+  }
+}
 
-  const { db } = ctx;
-
-  // Orphaned embeddings warning
-  if (hasEmbeddings) {
-    try {
-      const orphaned = (
-        db
-          .prepare(
-            'SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)',
-          )
-          .get() as { c: number }
-      ).c;
-      if (orphaned > 0) {
-        warn(
-          `${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`,
-        );
-      }
-    } catch {
-      /* ignore - embeddings table may have been dropped */
+function checkOrphanedEmbeddings(ctx: PipelineContext): void {
+  try {
+    const orphaned = (
+      ctx.db
+        .prepare('SELECT COUNT(*) as c FROM embeddings WHERE node_id NOT IN (SELECT id FROM nodes)')
+        .get() as { c: number }
+    ).c;
+    if (orphaned > 0) {
+      warn(
+        `${orphaned} embeddings are orphaned (nodes changed). Run "codegraph embed" to refresh.`,
+      );
     }
+  } catch {
+    /* ignore - embeddings table may have been dropped */
   }
+}
 
-  // Stale embeddings warning (built before current graph rebuild)
-  if (hasEmbeddings) {
-    try {
-      const embedBuiltAt = (
-        db.prepare("SELECT value FROM embedding_meta WHERE key = 'built_at'").get() as
-          | { value: string }
-          | undefined
-      )?.value;
-      if (embedBuiltAt) {
-        const embedTime = new Date(embedBuiltAt).getTime();
-        if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) {
-          warn(
-            'Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.',
-          );
-        }
-      }
-    } catch {
-      /* ignore - embedding_meta table may not exist */
+function checkStaleEmbeddings(ctx: PipelineContext, buildNow: Date): void {
+  try {
+    const embedBuiltAt = (
+      ctx.db.prepare("SELECT value FROM embedding_meta WHERE key = 'built_at'").get() as
+        | { value: string }
+        | undefined
+    )?.value;
+    if (!embedBuiltAt) return;
+    const embedTime = new Date(embedBuiltAt).getTime();
+    if (!Number.isNaN(embedTime) && embedTime < buildNow.getTime()) {
+      warn('Embeddings were built before the last graph rebuild. Run "codegraph embed" to update.');
     }
+  } catch {
+    /* ignore - embedding_meta table may not exist */
   }
+}
 
-  // Unused exports warning
+function checkUnusedExports(ctx: PipelineContext): void {
   try {
     const unusedCount = (
-      db
+      ctx.db
         .prepare(
           `SELECT COUNT(*) as c FROM nodes
          WHERE exported = 1 AND kind != 'file'
@@ -224,16 +214,28 @@ function runAdvisoryChecks(ctx: PipelineContext, hasEmbeddings: boolean, buildNo
         )
         .get() as { c: number }
     ).c;
-    if (unusedCount > 0) {
-      warn(
-        `${unusedCount} exported symbol${unusedCount > 1 ? 's have' : ' has'} zero cross-file consumers. Run "codegraph exports <file> --unused" to inspect.`,
-      );
-    }
+    if (unusedCount > 0) warn(unusedExportsMessage(unusedCount));
   } catch {
     /* exported column may not exist on older DBs */
   }
 }
 
+/**
+ * Run advisory checks on full builds: orphaned embeddings, stale embeddings,
+ * and unused exports. Informational only — does not affect correctness.
+ */
+function runAdvisoryChecks(ctx: PipelineContext, hasEmbeddings: boolean, buildNow: Date): void {
+  if (ctx.engineName === 'native' && ctx.nativeDb?.runAdvisoryChecks) {
+    runAdvisoryChecksNative(ctx, hasEmbeddings, buildNow);
+    return;
+  }
+  if (hasEmbeddings) {
+    checkOrphanedEmbeddings(ctx);
+    checkStaleEmbeddings(ctx, buildNow);
+  }
+  checkUnusedExports(ctx);
+}
+
 export async function finalize(ctx: PipelineContext): Promise<void> {
   const { allSymbols, rootDir, isFullBuild, hasEmbeddings, opts } = ctx;
 
diff --git a/src/domain/graph/builder/stages/insert-nodes.ts b/src/domain/graph/builder/stages/insert-nodes.ts
index 88e403ec9..09aad25d8 100644
--- a/src/domain/graph/builder/stages/insert-nodes.ts
+++ b/src/domain/graph/builder/stages/insert-nodes.ts
@@ -92,23 +92,69 @@ function marshalSymbolBatches(allSymbols: Map<string, ExtractorOutput>): InsertN
   return batches;
 }
 
+/** A single file_hashes row. */
+interface FileHashRecord {
+  file: string;
+  hash: string;
+  mtime: number;
+  size: number;
+}
+
+/** Resolve the (hash, mtime, size) tuple for a relPath, reading from disk if needed. */
+function resolveHashFromPrecomputed(
+  relPath: string,
+  precomputed: PrecomputedFileData,
+  rootDir: string,
+  caller: string,
+): FileHashRecord | null {
+  if (precomputed.hash) {
+    let mtime: number;
+    let size: number;
+    if (precomputed.stat) {
+      mtime = precomputed.stat.mtime;
+      size = precomputed.stat.size;
+    } else {
+      const rawStat = fileStat(path.join(rootDir, relPath));
+      mtime = rawStat ? rawStat.mtime : 0;
+      size = rawStat ? rawStat.size : 0;
+    }
+    return { file: relPath, hash: precomputed.hash, mtime, size };
+  }
+
+  const absPath = path.join(rootDir, relPath);
+  let code: string | null;
+  try {
+    code = readFileSafe(absPath);
+  } catch (e) {
+    debug(`${caller}: readFileSafe failed for ${relPath}: ${toErrorMessage(e)}`);
+    code = null;
+  }
+  if (code === null) return null;
+  const stat = fileStat(absPath);
+  return {
+    file: relPath,
+    hash: fileHash(code),
+    mtime: stat ? stat.mtime : 0,
+    size: stat ? stat.size : 0,
+  };
+}
+
 /**
- * Build file hash entries for every collected file, including those that
- * produced zero symbols (empty files, parsers that silently no-op'd, or
- * optional-language extensions whose grammar wasn't installed). Iterating the
- * symbol map instead would skip such files and leave them missing from
- * `file_hashes`, which permanently breaks the JS-side fast-skip pre-flight on
- * any subsequent no-op rebuild (#1068).
+ * Walk every collected file once and yield a `FileHashRecord` for it, plus one
+ * record per metadata-only update.  Shared by `buildFileHashes` (native path)
+ * and `updateFileHashes` (JS fallback) so the iteration and hash-resolution
+ * logic stays in one place.
  *
- * Exported for unit testing.
+ * Files marked `_reverseDepOnly` are skipped — their hashes are already
+ * correct in the DB.
  */
-export function buildFileHashes(
+function* iterFileHashRecords(
   filesToParse: FileToParse[],
   precomputedData: Map<string, PrecomputedFileData>,
   metadataUpdates: MetadataUpdate[],
   rootDir: string,
-): Array<{ file: string; hash: string; mtime: number; size: number }> {
-  const fileHashes: Array<{ file: string; hash: string; mtime: number; size: number }> = [];
+  caller: string,
+): Generator<FileHashRecord> {
   const seen = new Set<string>();
 
   for (const item of filesToParse) {
@@ -117,47 +163,53 @@ export function buildFileHashes(
     seen.add(relPath);
 
     const precomputed = precomputedData.get(relPath);
-    if (precomputed?._reverseDepOnly) {
-      continue; // file unchanged, hash already correct
-    }
-    if (precomputed?.hash) {
-      let mtime: number;
-      let size: number;
-      if (precomputed.stat) {
-        mtime = precomputed.stat.mtime;
-        size = precomputed.stat.size;
-      } else {
-        const rawStat = fileStat(path.join(rootDir, relPath));
-        mtime = rawStat ? rawStat.mtime : 0;
-        size = rawStat ? rawStat.size : 0;
-      }
-      fileHashes.push({ file: relPath, hash: precomputed.hash, mtime, size });
-    } else {
-      const absPath = path.join(rootDir, relPath);
-      let code: string | null;
-      try {
-        code = readFileSafe(absPath);
-      } catch (e) {
-        debug(`buildFileHashes: readFileSafe failed for ${relPath}: ${toErrorMessage(e)}`);
-        code = null;
-      }
-      if (code !== null) {
-        const stat = fileStat(absPath);
-        const mtime = stat ? stat.mtime : 0;
-        const size = stat ? stat.size : 0;
-        fileHashes.push({ file: relPath, hash: fileHash(code), mtime, size });
-      }
-    }
+    if (precomputed?._reverseDepOnly) continue;
+
+    const record = resolveHashFromPrecomputed(
+      relPath,
+      precomputed ?? ({} as PrecomputedFileData),
+      rootDir,
+      caller,
+    );
+    if (record) yield record;
   }
 
-  // Also include metadata-only updates (self-heal mtime/size without re-parse)
+  // Metadata-only updates (self-heal mtime/size without re-parse)
   for (const item of metadataUpdates) {
-    const mtime = item.stat ? item.stat.mtime : 0;
-    const size = item.stat ? item.stat.size : 0;
-    fileHashes.push({ file: item.relPath, hash: item.hash, mtime, size });
+    yield {
+      file: item.relPath,
+      hash: item.hash,
+      mtime: item.stat ? item.stat.mtime : 0,
+      size: item.stat ? item.stat.size : 0,
+    };
   }
+}
 
-  return fileHashes;
+/**
+ * Build file hash entries for every collected file, including those that
+ * produced zero symbols (empty files, parsers that silently no-op'd, or
+ * optional-language extensions whose grammar wasn't installed). Iterating the
+ * symbol map instead would skip such files and leave them missing from
+ * `file_hashes`, which permanently breaks the JS-side fast-skip pre-flight on
+ * any subsequent no-op rebuild (#1068).
+ *
+ * Exported for unit testing.
+ */
+export function buildFileHashes(
+  filesToParse: FileToParse[],
+  precomputedData: Map<string, PrecomputedFileData>,
+  metadataUpdates: MetadataUpdate[],
+  rootDir: string,
+): FileHashRecord[] {
+  return [
+    ...iterFileHashRecords(
+      filesToParse,
+      precomputedData,
+      metadataUpdates,
+      rootDir,
+      'buildFileHashes',
+    ),
+  ];
 }
 
 // ── Native fast-path ─────────────────────────────────────────────────
@@ -260,36 +312,38 @@ function insertDefinitionsAndExports(
 
 // ── JS fallback: Phase 2+3 ──────────────────────────────────────────
 
-function insertChildrenAndEdges(
+/** Build the in-memory `name|kind|line` → node-id map for a single file. */
+function loadFileNodeIdMap(db: BetterSqlite3Database, relPath: string): Map<string, number> {
+  const map = new Map<string, number>();
+  for (const row of bulkNodeIdsByFile(db, relPath)) {
+    map.set(`${row.name}|${row.kind}|${row.line}`, row.id);
+  }
+  return map;
+}
+
+/**
+ * First pass: for every file, emit file→def containment edges and collect
+ * the child-node insertion rows.
+ */
+function collectChildRowsAndFileEdges(
   db: BetterSqlite3Database,
   allSymbols: Map<string, ExtractorOutput>,
+  childRows: unknown[][],
+  edgeRows: unknown[][],
 ): void {
-  const childRows: unknown[][] = [];
-  const edgeRows: unknown[][] = [];
-
   for (const [relPath, symbols] of allSymbols) {
-    // First pass: collect file→def edges and child rows
-    const nodeIdMap = new Map<string, number>();
-    for (const row of bulkNodeIdsByFile(db, relPath)) {
-      nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
-    }
-
+    const nodeIdMap = loadFileNodeIdMap(db, relPath);
     const fileId = nodeIdMap.get(`${relPath}|file|0`);
 
     for (const def of symbols.definitions) {
       const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);
 
-      // Containment edge: file -> definition
       if (fileId && defId) {
         edgeRows.push([fileId, defId, 'contains', 1.0, 0]);
       }
-
-      if (!def.children?.length) continue;
-      if (!defId) continue;
+      if (!def.children?.length || !defId) continue;
 
       for (const child of def.children) {
-        // Child node
-        const qualifiedName = `${def.name}.${child.name}`;
         childRows.push([
           child.name,
           child.kind,
@@ -297,39 +351,55 @@ function insertChildrenAndEdges(
           child.line,
           child.endLine || null,
           defId,
-          qualifiedName,
+          `${def.name}.${child.name}`,
           def.name,
           child.visibility || null,
         ]);
       }
     }
   }
+}
 
-  // Insert children first (so they exist for edge lookup)
-  batchInsertNodes(db, childRows);
-
-  // Now re-fetch IDs to include newly-inserted children, then add child edges
+/**
+ * Second pass (after child nodes have been inserted): emit def→child
+ * containment edges and child→def `parameter_of` edges.
+ */
+function collectChildEdges(
+  db: BetterSqlite3Database,
+  allSymbols: Map<string, ExtractorOutput>,
+  edgeRows: unknown[][],
+): void {
   for (const [relPath, symbols] of allSymbols) {
-    const nodeIdMap = new Map<string, number>();
-    for (const row of bulkNodeIdsByFile(db, relPath)) {
-      nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id);
-    }
+    const nodeIdMap = loadFileNodeIdMap(db, relPath);
     for (const def of symbols.definitions) {
       if (!def.children?.length) continue;
       const defId = nodeIdMap.get(`${def.name}|${def.kind}|${def.line}`);
       if (!defId) continue;
       for (const child of def.children) {
         const childId = nodeIdMap.get(`${child.name}|${child.kind}|${child.line}`);
-        if (childId) {
-          edgeRows.push([defId, childId, 'contains', 1.0, 0]);
-          if (child.kind === 'parameter') {
-            edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]);
-          }
+        if (!childId) continue;
+        edgeRows.push([defId, childId, 'contains', 1.0, 0]);
+        if (child.kind === 'parameter') {
+          edgeRows.push([childId, defId, 'parameter_of', 1.0, 0]);
         }
       }
     }
   }
+}
+
+function insertChildrenAndEdges(
+  db: BetterSqlite3Database,
+  allSymbols: Map<string, ExtractorOutput>,
+): void {
+  const childRows: unknown[][] = [];
+  const edgeRows: unknown[][] = [];
 
+  collectChildRowsAndFileEdges(db, allSymbols, childRows, edgeRows);
+
+  // Insert children first (so they exist for edge lookup)
+  batchInsertNodes(db, childRows);
+
+  collectChildEdges(db, allSymbols, edgeRows);
   batchInsertEdges(db, edgeRows);
 }
 
@@ -348,50 +418,14 @@ function updateFileHashes(
   // Iterate every collected file (#1068): files that produced zero symbols
   // (empty, parser no-op, or grammar-missing optional language) still need a
   // hash row, otherwise the next no-op rebuild's fast-skip pre-flight rejects.
-  const seen = new Set<string>();
-  for (const item of filesToParse) {
-    const relPath = item.relPath ?? normalizePath(path.relative(rootDir, item.file));
-    if (seen.has(relPath)) continue;
-    seen.add(relPath);
-
-    const precomputed = precomputedData.get(relPath);
-    if (precomputed?._reverseDepOnly) {
-      // no-op: file unchanged, hash already correct
-    } else if (precomputed?.hash) {
-      let mtime: number;
-      let size: number;
-      if (precomputed.stat) {
-        mtime = precomputed.stat.mtime;
-        size = precomputed.stat.size;
-      } else {
-        const rawStat = fileStat(path.join(rootDir, relPath));
-        mtime = rawStat ? rawStat.mtime : 0;
-        size = rawStat ? rawStat.size : 0;
-      }
-      upsertHash.run(relPath, precomputed.hash, mtime, size);
-    } else {
-      const absPath = path.join(rootDir, relPath);
-      let code: string | null;
-      try {
-        code = readFileSafe(absPath);
-      } catch (e) {
-        debug(`updateFileHashes: readFileSafe failed for ${relPath}: ${toErrorMessage(e)}`);
-        code = null;
-      }
-      if (code !== null) {
-        const stat = fileStat(absPath);
-        const mtime = stat ? stat.mtime : 0;
-        const size = stat ? stat.size : 0;
-        upsertHash.run(relPath, fileHash(code), mtime, size);
-      }
-    }
-  }
-
-  // Also update metadata-only entries (self-heal mtime/size without re-parse)
-  for (const item of metadataUpdates) {
-    const mtime = item.stat ? item.stat.mtime : 0;
-    const size = item.stat ? item.stat.size : 0;
-    upsertHash.run(item.relPath, item.hash, mtime, size);
+  for (const record of iterFileHashRecords(
+    filesToParse,
+    precomputedData,
+    metadataUpdates,
+    rootDir,
+    'updateFileHashes',
+  )) {
+    upsertHash.run(record.file, record.hash, record.mtime, record.size);
   }
 }
 

From 40d418d149d06c127d7ce430f4863d5b72b93edc Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 12:50:10 -0600
Subject: [PATCH 12/27] refactor(graph): extract helpers in cycles and journal
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

docs check acknowledged — no doc-relevant changes (internal helper extraction).
---
 src/domain/graph/cycles.ts  | 100 +++++++++++------------
 src/domain/graph/journal.ts | 153 ++++++++++++++++++++----------------
 2 files changed, 135 insertions(+), 118 deletions(-)

diff --git a/src/domain/graph/cycles.ts b/src/domain/graph/cycles.ts
index 4ccc872f2..bb4d61168 100644
--- a/src/domain/graph/cycles.ts
+++ b/src/domain/graph/cycles.ts
@@ -3,6 +3,45 @@ import { loadNative } from '../../infrastructure/native.js';
 import { isTestFile } from '../../infrastructure/test-filter.js';
 import type { BetterSqlite3Database } from '../../types.js';
 
+type Edge = { source: string; target: string };
+type DbEdge = { source_id: number; target_id: number };
+
+/**
+ * Build a label-based edge list from DB rows, filtering to known nodes and
+ * deduplicating. Self-loops are skipped (Tarjan treats them as trivial SCCs).
+ */
+function buildLabelEdges(dbEdges: DbEdge[], idToLabel: Map<number, string>): Edge[] {
+  const edges: Edge[] = [];
+  const seen = new Set<string>();
+  for (const e of dbEdges) {
+    if (e.source_id === e.target_id) continue;
+    const src = idToLabel.get(e.source_id);
+    const tgt = idToLabel.get(e.target_id);
+    if (src === undefined || tgt === undefined) continue;
+    const key = `${src}\0${tgt}`;
+    if (seen.has(key)) continue;
+    seen.add(key);
+    edges.push({ source: src, target: tgt });
+  }
+  return edges;
+}
+
+function buildFileLevelEdges(db: BetterSqlite3Database, noTests: boolean): Edge[] {
+  let nodes = getFileNodesAll(db);
+  if (noTests) nodes = nodes.filter((n) => !isTestFile(n.file));
+  const idToLabel = new Map<number, string>();
+  for (const n of nodes) idToLabel.set(n.id, n.file);
+  return buildLabelEdges(getImportEdges(db), idToLabel);
+}
+
+function buildCallableEdges(db: BetterSqlite3Database, noTests: boolean): Edge[] {
+  let nodes = getCallableNodes(db);
+  if (noTests) nodes = nodes.filter((n) => !isTestFile(n.file));
+  const idToLabel = new Map<number, string>();
+  for (const n of nodes) idToLabel.set(n.id, `${n.name}|${n.file}`);
+  return buildLabelEdges(getCallEdges(db), idToLabel);
+}
+
 /**
  * Find cycles using Tarjan's SCC algorithm.
  *
@@ -16,66 +55,20 @@ export function findCycles(
   const fileLevel = opts.fileLevel !== false;
   const noTests = opts.noTests || false;
 
-  const edges: Array<{ source: string; target: string }> = [];
-  const seen = new Set<string>();
-
-  if (fileLevel) {
-    let nodes = getFileNodesAll(db);
-    if (noTests) nodes = nodes.filter((n) => !isTestFile(n.file));
-    const nodeIds = new Set<number>();
-    const idToFile = new Map<number, string>();
-    for (const n of nodes) {
-      nodeIds.add(n.id);
-      idToFile.set(n.id, n.file);
-    }
-    for (const e of getImportEdges(db)) {
-      if (!nodeIds.has(e.source_id) || !nodeIds.has(e.target_id)) continue;
-      if (e.source_id === e.target_id) continue;
-      const src = idToFile.get(e.source_id)!;
-      const tgt = idToFile.get(e.target_id)!;
-      const key = `${src}\0${tgt}`;
-      if (seen.has(key)) continue;
-      seen.add(key);
-      edges.push({ source: src, target: tgt });
-    }
-  } else {
-    let nodes = getCallableNodes(db);
-    if (noTests) nodes = nodes.filter((n) => !isTestFile(n.file));
-    const nodeIds = new Set<number>();
-    const idToLabel = new Map<number, string>();
-    for (const n of nodes) {
-      nodeIds.add(n.id);
-      idToLabel.set(n.id, `${n.name}|${n.file}`);
-    }
-    for (const e of getCallEdges(db)) {
-      if (!nodeIds.has(e.source_id) || !nodeIds.has(e.target_id)) continue;
-      if (e.source_id === e.target_id) continue;
-      const src = idToLabel.get(e.source_id)!;
-      const tgt = idToLabel.get(e.target_id)!;
-      const key = `${src}\0${tgt}`;
-      if (seen.has(key)) continue;
-      seen.add(key);
-      edges.push({ source: src, target: tgt });
-    }
-  }
+  const edges = fileLevel ? buildFileLevelEdges(db, noTests) : buildCallableEdges(db, noTests);
 
   const native = loadNative();
   if (native) {
     return native.detectCycles(edges) as string[][];
   }
-
   return tarjanFromEdges(edges);
 }
 
-export function findCyclesJS(edges: Array<{ source: string; target: string }>): string[][] {
+export function findCyclesJS(edges: Edge[]): string[][] {
   return tarjanFromEdges(edges);
 }
 
-/**
- * Run Tarjan's SCC on a flat edge list. Returns SCCs with length > 1 (cycles).
- * Uses a simple adjacency-list Map instead of a full CodeGraph.
- */
-function tarjanFromEdges(edges: Array<{ source: string; target: string }>): string[][] {
+function buildAdjacency(edges: Edge[]): { adj: Map<string, string[]>; allNodes: Set<string> } {
   const adj = new Map<string, string[]>();
   const allNodes = new Set<string>();
   for (const { source, target } of edges) {
@@ -88,6 +81,15 @@ function tarjanFromEdges(edges: Array<{ source: string; target: string }>): stri
     }
     list.push(target);
   }
+  return { adj, allNodes };
+}
+
+/**
+ * Run Tarjan's SCC on a flat edge list. Returns SCCs with length > 1 (cycles).
+ * Uses a simple adjacency-list Map instead of a full CodeGraph.
+ */
+function tarjanFromEdges(edges: Edge[]): string[][] {
+  const { adj, allNodes } = buildAdjacency(edges);
 
   let index = 0;
   const stack: string[] = [];
diff --git a/src/domain/graph/journal.ts b/src/domain/graph/journal.ts
index 900e33546..d20c7dab9 100644
--- a/src/domain/graph/journal.ts
+++ b/src/domain/graph/journal.ts
@@ -91,62 +91,69 @@ function trySteal(lockPath: string): AcquiredLock | null {
   return { fd, nonce };
 }
 
-function acquireJournalLock(lockPath: string): AcquiredLock {
-  const start = Date.now();
-  for (;;) {
-    const nonce = `${process.pid}-${crypto.randomBytes(8).toString('hex')}`;
+/**
+ * Try to create the lockfile fresh via `wx`. Returns the acquired lock on
+ * success, `null` if another holder exists, or throws on unexpected errors.
+ *
+ * If the stamp write fails (ENOSPC, I/O error) we release the empty file —
+ * leaving it would look stale to concurrent waiters and admit double-acquire.
+ */
+function tryFreshAcquire(lockPath: string): AcquiredLock | null {
+  const nonce = `${process.pid}-${crypto.randomBytes(8).toString('hex')}`;
+  let fd: number;
+  try {
+    fd = fs.openSync(lockPath, 'wx');
+  } catch (e) {
+    if ((e as NodeJS.ErrnoException).code === 'EEXIST') return null;
+    throw e;
+  }
+  try {
+    fs.writeSync(fd, `${process.pid}\n${nonce}\n`);
+  } catch {
     try {
-      const fd = fs.openSync(lockPath, 'wx');
-      try {
-        fs.writeSync(fd, `${process.pid}\n${nonce}\n`);
-      } catch {
-        // Stamp write failed (ENOSPC, I/O error). An empty lockfile would
-        // look stale to concurrent waiters (Number('') === 0, isPidAlive(0)
-        // returns false), so they'd steal our live lock. Release and retry.
-        try {
-          fs.closeSync(fd);
-        } catch {
-          /* ignore */
-        }
-        try {
-          fs.unlinkSync(lockPath);
-        } catch {
-          /* ignore */
-        }
-        if (Date.now() - start > LOCK_TIMEOUT_MS) {
-          throw new Error(
-            `Failed to acquire journal lock at ${lockPath} within ${LOCK_TIMEOUT_MS}ms`,
-          );
-        }
-        sleepSync(LOCK_RETRY_MS);
-        continue;
-      }
-      return { fd, nonce };
-    } catch (e) {
-      if ((e as NodeJS.ErrnoException).code !== 'EEXIST') throw e;
+      fs.closeSync(fd);
+    } catch {
+      /* ignore */
     }
-
-    let holderAlive = true;
     try {
-      const pidContent = fs.readFileSync(lockPath, 'utf-8').split('\n')[0]!.trim();
-      holderAlive = isPidAlive(Number(pidContent));
+      fs.unlinkSync(lockPath);
     } catch {
-      /* unreadable — fall through to age check */
+      /* ignore */
     }
+    return null;
+  }
+  return { fd, nonce };
+}
 
-    let shouldSteal = !holderAlive;
-    if (holderAlive) {
-      try {
-        const stat = fs.statSync(lockPath);
-        if (Date.now() - stat.mtimeMs > LOCK_STALE_MS) {
-          shouldSteal = true;
-        }
-      } catch {
-        /* stat failed — keep retrying */
-      }
-    }
+/**
+ * Decide whether the current lock holder is stale and should be stolen.
+ * Returns true if the PID is dead, or if the lockfile mtime exceeds the
+ * staleness threshold.
+ */
+function isLockStale(lockPath: string): boolean {
+  let holderAlive = true;
+  try {
+    const pidContent = fs.readFileSync(lockPath, 'utf-8').split('\n')[0]!.trim();
+    holderAlive = isPidAlive(Number(pidContent));
+  } catch {
+    /* unreadable — fall through to age check */
+  }
+  if (!holderAlive) return true;
+  try {
+    const stat = fs.statSync(lockPath);
+    return Date.now() - stat.mtimeMs > LOCK_STALE_MS;
+  } catch {
+    return false;
+  }
+}
 
-    if (shouldSteal) {
+function acquireJournalLock(lockPath: string): AcquiredLock {
+  const start = Date.now();
+  for (;;) {
+    const fresh = tryFreshAcquire(lockPath);
+    if (fresh) return fresh;
+
+    if (isLockStale(lockPath)) {
       const stolen = trySteal(lockPath);
       if (stolen) return stolen;
       // Steal failed or lost the race — fall through to timeout check & retry.
@@ -227,27 +234,20 @@ interface JournalResult {
   removed?: string[];
 }
 
-export function readJournal(rootDir: string): JournalResult {
-  const journalPath = path.join(rootDir, '.codegraph', JOURNAL_FILENAME);
-  let content: string;
-  try {
-    content = fs.readFileSync(journalPath, 'utf-8');
-  } catch {
-    return { valid: false };
-  }
-
-  const lines = content.split('\n');
-  if (lines.length === 0 || !lines[0]!.startsWith(HEADER_PREFIX)) {
+function parseJournalHeader(firstLine: string | undefined): number | null {
+  if (!firstLine || !firstLine.startsWith(HEADER_PREFIX)) {
     debug('Journal has malformed or missing header');
-    return { valid: false };
+    return null;
   }
-
-  const timestamp = Number(lines[0]!.slice(HEADER_PREFIX.length).trim());
+  const timestamp = Number(firstLine.slice(HEADER_PREFIX.length).trim());
   if (!Number.isFinite(timestamp) || timestamp <= 0) {
     debug('Journal has invalid timestamp');
-    return { valid: false };
+    return null;
   }
+  return timestamp;
+}
 
+function parseJournalBody(lines: string[]): { changed: string[]; removed: string[] } {
   const changed: string[] = [];
   const removed: string[] = [];
   const seenChanged = new Set<string>();
@@ -263,14 +263,29 @@ export function readJournal(rootDir: string): JournalResult {
         seenRemoved.add(filePath);
         removed.push(filePath);
       }
-    } else {
-      if (!seenChanged.has(line)) {
-        seenChanged.add(line);
-        changed.push(line);
-      }
+    } else if (!seenChanged.has(line)) {
+      seenChanged.add(line);
+      changed.push(line);
     }
   }
 
+  return { changed, removed };
+}
+
+export function readJournal(rootDir: string): JournalResult {
+  const journalPath = path.join(rootDir, '.codegraph', JOURNAL_FILENAME);
+  let content: string;
+  try {
+    content = fs.readFileSync(journalPath, 'utf-8');
+  } catch {
+    return { valid: false };
+  }
+
+  const lines = content.split('\n');
+  const timestamp = parseJournalHeader(lines[0]);
+  if (timestamp === null) return { valid: false };
+
+  const { changed, removed } = parseJournalBody(lines);
   return { valid: true, timestamp, changed, removed };
 }
 

From b3c36f41e0d410eb23022fd4197c7bf4ff82cc57 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 12:54:24 -0600
Subject: [PATCH 13/27] refactor(core-rs): collapse walker mutual recursion
 into single-entry traversal

---
 crates/codegraph-core/src/dataflow.rs | 73 ++++++++++-----------------
 1 file changed, 28 insertions(+), 45 deletions(-)

diff --git a/crates/codegraph-core/src/dataflow.rs b/crates/codegraph-core/src/dataflow.rs
index 26ea2d21c..091b44dd0 100644
--- a/crates/codegraph-core/src/dataflow.rs
+++ b/crates/codegraph-core/src/dataflow.rs
@@ -971,6 +971,12 @@ struct DataflowOutput {
     mutations: Vec<DataflowMutation>,
 }
 
+/// Single-entry DFS traversal for dataflow extraction.
+///
+/// Dispatches the current node to the appropriate handler, then recurses into
+/// named children by calling `visit` directly. Children are walked inline to
+/// avoid a `visit` <-> `visit_children` mutual-recursion cycle (single entry
+/// point, single recursive call site).
 #[allow(clippy::too_many_arguments)]
 fn visit(
     node: &Node,
@@ -985,66 +991,43 @@ fn visit(
     }
 
     let t = node.kind();
+    let mut entered_scope = false;
 
-    // Enter function scope
+    // Dispatch to handler for this node kind. Children are always visited
+    // afterwards via the loop below — handlers must not recurse themselves.
     if is_function_node(rules, t) {
         enter_scope(node, rules, source, scope_stack, &mut out.parameters);
-        visit_children(node, rules, source, scope_stack, out, depth);
-        scope_stack.pop();
-        return;
-    }
-
-    // Return statements
-    if rules.return_node.is_some_and(|r| r == t) {
+        entered_scope = true;
+    } else if rules.return_node.is_some_and(|r| r == t) {
         handle_return_stmt(node, rules, source, scope_stack, &mut out.returns, depth);
-        visit_children(node, rules, source, scope_stack, out, depth);
-        return;
-    }
-
-    // Variable declarations (single or multi-type)
-    if rules.var_declarator_node.is_some_and(|v| v == t)
+    } else if rules.var_declarator_node.is_some_and(|v| v == t)
         || (!rules.var_declarator_nodes.is_empty() && rules.var_declarator_nodes.contains(&t))
     {
         handle_var_declarator(node, rules, source, scope_stack, &mut out.assignments);
-        visit_children(node, rules, source, scope_stack, out, depth);
-        return;
-    }
-
-    // Call expressions
-    if is_call_node(rules, t) {
+    } else if is_call_node(rules, t) {
         handle_call_expr(node, rules, source, scope_stack, &mut out.arg_flows);
-        visit_children(node, rules, source, scope_stack, out, depth);
-        return;
-    }
-
-    // Assignment expressions
-    if rules.assignment_node.is_some_and(|a| a == t) {
-        handle_assignment(node, rules, source, scope_stack, &mut out.assignments, &mut out.mutations);
-        visit_children(node, rules, source, scope_stack, out, depth);
-        return;
-    }
-
-    // Mutation detection via expression_statement
-    if t == rules.expression_stmt_node {
+    } else if rules.assignment_node.is_some_and(|a| a == t) {
+        handle_assignment(
+            node,
+            rules,
+            source,
+            scope_stack,
+            &mut out.assignments,
+            &mut out.mutations,
+        );
+    } else if t == rules.expression_stmt_node {
         handle_expr_stmt_mutation(node, rules, source, scope_stack, &mut out.mutations);
     }
 
-    visit_children(node, rules, source, scope_stack, out, depth);
-}
-
-/// Visit all named children of a node (shared DFS recursion helper).
-fn visit_children(
-    node: &Node,
-    rules: &DataflowRules,
-    source: &[u8],
-    scope_stack: &mut Vec<ScopeFrame>,
-    out: &mut DataflowOutput,
-    depth: usize,
-) {
+    // Recurse into named children inline — no helper indirection, no cycle.
     let cursor = &mut node.walk();
     for child in node.named_children(cursor) {
         visit(&child, rules, source, scope_stack, out, depth + 1);
     }
+
+    if entered_scope {
+        scope_stack.pop();
+    }
 }
 
 /// Handle a return statement: extract expression and referenced names.

From b49cab5dd94e96bc6b7b1e278df6f179f662800f Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 13:15:47 -0600
Subject: [PATCH 14/27] refactor(core-rs): decompose pipeline, read queries,
 and edge builders

docs check acknowledged - Rust internal helper extraction, no user-facing changes
---
 crates/codegraph-core/src/build_pipeline.rs   | 1026 +++++++------
 crates/codegraph-core/src/edge_builder.rs     |  243 +--
 crates/codegraph-core/src/graph_algorithms.rs |  109 +-
 crates/codegraph-core/src/import_edges.rs     |  236 +--
 .../codegraph-core/src/import_resolution.rs   |  125 +-
 crates/codegraph-core/src/read_queries.rs     | 1347 ++++++++++-------
 crates/codegraph-core/src/structure.rs        |  401 +++--
 7 files changed, 2001 insertions(+), 1486 deletions(-)

diff --git a/crates/codegraph-core/src/build_pipeline.rs b/crates/codegraph-core/src/build_pipeline.rs
index dba6e7f28..8691611d6 100644
--- a/crates/codegraph-core/src/build_pipeline.rs
+++ b/crates/codegraph-core/src/build_pipeline.rs
@@ -96,21 +96,23 @@ fn relative_path(root_dir: &str, abs_path: &str) -> String {
     }
 }
 
-/// Run the full build pipeline in Rust.
-///
-/// Called from `NativeDatabase.build_graph()` via napi.
-pub fn run_pipeline(
+/// Deserialized pipeline inputs assembled in Stage 1.
+struct PipelineSetup {
+    config: BuildConfig,
+    napi_aliases: crate::types::PathAliases,
+    opts: BuildOpts,
+    incremental: bool,
+    include_dataflow: bool,
+    include_ast: bool,
+    force_full_rebuild: bool,
+}
+
+fn pipeline_setup(
     conn: &Connection,
-    root_dir: &str,
     config_json: &str,
     aliases_json: &str,
     opts_json: &str,
-) -> Result<BuildPipelineResult, String> {
-    let total_start = Instant::now();
-    let mut timing = PipelineTiming::default();
-
-    // ── Stage 1: Deserialize config ────────────────────────────────────
-    let t0 = Instant::now();
+) -> Result<PipelineSetup, String> {
     let config: BuildConfig =
         serde_json::from_str(config_json).map_err(|e| format!("config parse error: {e}"))?;
     let aliases: BuildPathAliases =
@@ -122,9 +124,315 @@ pub fn run_pipeline(
     let incremental = opts.incremental.unwrap_or(config.build.incremental);
     let include_dataflow = opts.dataflow.unwrap_or(true);
     let include_ast = opts.ast.unwrap_or(true);
-
-    // Check engine/schema/version mismatch for forced full rebuild
     let force_full_rebuild = check_version_mismatch(conn);
+
+    Ok(PipelineSetup {
+        config,
+        napi_aliases,
+        opts,
+        incremental,
+        include_dataflow,
+        include_ast,
+        force_full_rebuild,
+    })
+}
+
+/// Build a no-op early-exit result when no source files changed and we are
+/// in an incremental build with no removals. Mirrors the early-exit branch
+/// in `run_pipeline` exactly so it can be lifted out without behaviour change.
+fn early_exit_result(
+    file_count: usize,
+    timing: PipelineTiming,
+    conn: &Connection,
+    root_dir: &str,
+    metadata_updates: &[change_detection::MetadataUpdate],
+) -> BuildPipelineResult {
+    change_detection::heal_metadata(conn, metadata_updates);
+    journal::write_journal_header(root_dir, now_ms());
+    BuildPipelineResult {
+        phases: timing,
+        node_count: 0,
+        edge_count: 0,
+        file_count,
+        early_exit: true,
+        changed_files: Some(vec![]),
+        changed_count: 0,
+        removed_count: 0,
+        is_full_build: false,
+        structure_handled: true,
+        analysis_complete: true,
+    }
+}
+
+/// Save reverse-dep edges (and reverse-deps of removed files) before purging
+/// changed files. Mirrors the JS save-then-purge sequence in `build-edges.ts`
+/// (#1012). Returns `(saved_reverse_dep_edges, removal_reverse_deps)` so the
+/// pipeline can reconnect them after Stage 5 and reclassify roles in Stage 8.
+fn save_and_purge_changed(
+    conn: &Connection,
+    parse_changes: &[&change_detection::ChangedFile],
+    change_result: &change_detection::ChangeResult,
+    opts: &BuildOpts,
+    root_dir: &str,
+) -> (Vec<change_detection::SavedReverseDepEdge>, Vec<String>) {
+    let mut saved_reverse_dep_edges: Vec<change_detection::SavedReverseDepEdge> = Vec::new();
+    let mut removal_reverse_deps: Vec<String> = Vec::new();
+
+    if change_result.is_full_build {
+        let has_embeddings = change_detection::has_embeddings(conn);
+        change_detection::clear_all_graph_data(conn, has_embeddings);
+        return (saved_reverse_dep_edges, removal_reverse_deps);
+    }
+
+    let changed_paths: Vec<String> = parse_changes.iter().map(|c| c.rel_path.clone()).collect();
+
+    if !opts.no_reverse_deps.unwrap_or(false) {
+        saved_reverse_dep_edges = change_detection::save_reverse_dep_edges(conn, &changed_paths);
+
+        if !change_result.removed.is_empty() {
+            let removed_set: HashSet<String> = change_result.removed.iter().cloned().collect();
+            removal_reverse_deps =
+                change_detection::find_reverse_dependencies(conn, &removed_set, root_dir)
+                    .into_iter()
+                    .collect();
+        }
+    }
+
+    let files_to_purge: Vec<String> = change_result
+        .removed
+        .iter()
+        .chain(parse_changes.iter().map(|c| &c.rel_path))
+        .cloned()
+        .collect();
+    change_detection::purge_changed_files(conn, &files_to_purge, &[]);
+
+    (saved_reverse_dep_edges, removal_reverse_deps)
+}
+
+/// Parse a changed-file slice in parallel and key the results by relative path.
+fn parse_and_index_files(
+    parse_changes: &[&change_detection::ChangedFile],
+    root_dir: &str,
+    include_dataflow: bool,
+    include_ast: bool,
+) -> HashMap<String, FileSymbols> {
+    let files_to_parse: Vec<String> =
+        parse_changes.iter().map(|c| c.abs_path.clone()).collect();
+    let parsed =
+        parallel::parse_files_parallel(&files_to_parse, root_dir, include_dataflow, include_ast);
+    let mut file_symbols: HashMap<String, FileSymbols> = HashMap::new();
+    for mut sym in parsed {
+        let rel = relative_path(root_dir, &sym.file);
+        sym.file = rel.clone();
+        file_symbols.insert(rel, sym);
+    }
+    file_symbols
+}
+
+/// Build the batched import-resolution input set and run resolution, returning
+/// `(batch_resolved, known_files)`. Mirrors stage 6 of `run_pipeline`.
+fn resolve_pipeline_imports(
+    file_symbols: &HashMap<String, FileSymbols>,
+    collect_files: &[String],
+    root_dir: &str,
+    napi_aliases: &crate::types::PathAliases,
+) -> (HashMap<String, String>, HashSet<String>) {
+    let mut batch_inputs: Vec<ImportResolutionInput> = Vec::new();
+    for (rel_path, symbols) in file_symbols {
+        let abs_file = Path::new(root_dir).join(rel_path);
+        let abs_str = abs_file.to_str().unwrap_or("").replace('\\', "/");
+        for imp in &symbols.imports {
+            batch_inputs.push(ImportResolutionInput {
+                from_file: abs_str.clone(),
+                import_source: imp.source.clone(),
+            });
+        }
+    }
+    let known_files: HashSet<String> =
+        collect_files.iter().map(|f| relative_path(root_dir, f)).collect();
+    let resolved =
+        import_resolution::resolve_imports_batch(&batch_inputs, root_dir, napi_aliases, Some(&known_files));
+    let mut batch_resolved: HashMap<String, String> = HashMap::new();
+    for r in &resolved {
+        let key = format!("{}|{}", r.from_file, r.import_source);
+        batch_resolved.insert(key, r.resolved_path.clone());
+    }
+    (batch_resolved, known_files)
+}
+
+/// Reconnect any saved reverse-dep edges to the new target node IDs (#1012).
+fn reconnect_saved_reverse_dep_edges(
+    conn: &Connection,
+    saved: &[change_detection::SavedReverseDepEdge],
+) {
+    if saved.is_empty() {
+        return;
+    }
+    let (reconnected, dropped) = change_detection::reconnect_reverse_dep_edges(conn, saved);
+    if dropped > 0 {
+        eprintln!(
+            "[codegraph] reconnect_reverse_dep_edges: {reconnected} reconnected, {dropped} dropped (target nodes not found)"
+        );
+    }
+}
+
+/// Stage 8 (structure): decide between the fast incremental path and a full
+/// structure rebuild based on the same gates as the JS pipeline. The change
+/// set is read from `file_symbols.keys()` because only truly-changed files
+/// are present (reverse-deps are reconnected, not re-parsed).
+fn run_structure_phase(
+    conn: &Connection,
+    file_symbols: &HashMap<String, FileSymbols>,
+    collect_directories: &HashSet<String>,
+    root_dir: &str,
+    line_count_map: &HashMap<String, i64>,
+    parse_changes_len: usize,
+    is_full_build: bool,
+) {
+    let changed_files: Vec<String> = file_symbols.keys().cloned().collect();
+    let existing_file_count = structure::get_existing_file_count(conn);
+    let use_fast_path = !is_full_build
+        && parse_changes_len <= FAST_PATH_MAX_CHANGED_FILES
+        && existing_file_count > FAST_PATH_MIN_EXISTING_FILES;
+
+    if use_fast_path {
+        structure::update_changed_file_metrics(conn, &changed_files, line_count_map, file_symbols);
+    } else {
+        let changed_for_structure: Option<Vec<String>> = if is_full_build {
+            None
+        } else {
+            Some(changed_files.clone())
+        };
+        structure::build_full_structure(
+            conn,
+            file_symbols,
+            collect_directories,
+            root_dir,
+            line_count_map,
+            changed_for_structure.as_deref(),
+        );
+    }
+}
+
+/// Stage 8 (roles): classify roles for the affected file set. Removal
+/// reverse-deps need to be seeded explicitly because their fan-in/out can
+/// no longer be discovered via neighbour expansion once the deleted file's
+/// nodes are gone (#1027).
+fn run_role_classification(
+    conn: &Connection,
+    file_symbols: &HashMap<String, FileSymbols>,
+    removal_reverse_deps: Vec<String>,
+    is_full_build: bool,
+) {
+    let changed_files: Vec<String> = file_symbols.keys().cloned().collect();
+    let changed_file_list: Option<Vec<String>> = if is_full_build {
+        None
+    } else {
+        let mut files = changed_files;
+        if !removal_reverse_deps.is_empty() {
+            let existing: HashSet<String> = files.iter().cloned().collect();
+            for f in removal_reverse_deps {
+                if !existing.contains(&f) {
+                    files.push(f);
+                }
+            }
+        }
+        Some(files)
+    };
+    if let Some(ref files) = changed_file_list {
+        if !files.is_empty() {
+            let _ = roles_db::do_classify_incremental(conn, files);
+        }
+    } else {
+        let _ = roles_db::do_classify_full(conn);
+    }
+}
+
+/// Stage 8b: persist AST, complexity, CFG, and dataflow data for the
+/// analysis scope. Returns `(do_analysis, analysis_ok)` so the caller can
+/// compute `analysis_complete`.
+fn run_analysis_persistence(
+    conn: &Connection,
+    file_symbols: &HashMap<String, FileSymbols>,
+    analysis_scope: Option<&Vec<String>>,
+    opts: &BuildOpts,
+    include_ast: bool,
+    include_dataflow: bool,
+    timing: &mut PipelineTiming,
+) -> (bool, bool) {
+    let include_complexity = opts.complexity.unwrap_or(true);
+    let include_cfg = opts.cfg.unwrap_or(true);
+    let do_analysis = include_ast || include_dataflow || include_cfg || include_complexity;
+    if !do_analysis {
+        return (false, true);
+    }
+
+    let analysis_file_set: HashSet<&str> = match analysis_scope {
+        Some(files) => files.iter().map(|s| s.as_str()).collect(),
+        None => file_symbols.keys().map(|s| s.as_str()).collect(),
+    };
+
+    let node_id_map = build_analysis_node_map(conn, &analysis_file_set);
+    let mut analysis_ok = true;
+
+    if include_ast {
+        let t0 = Instant::now();
+        let ast_batches = build_ast_batches(file_symbols, &analysis_file_set);
+        if ast_db::do_insert_ast_nodes(conn, &ast_batches).is_err() {
+            analysis_ok = false;
+        }
+        timing.ast_ms = t0.elapsed().as_secs_f64() * 1000.0;
+    }
+    if include_complexity {
+        let t0 = Instant::now();
+        if !write_complexity(conn, file_symbols, &analysis_file_set, &node_id_map) {
+            analysis_ok = false;
+        }
+        timing.complexity_ms = t0.elapsed().as_secs_f64() * 1000.0;
+    }
+    if include_cfg {
+        let t0 = Instant::now();
+        if !write_cfg(conn, file_symbols, &analysis_file_set, &node_id_map) {
+            analysis_ok = false;
+        }
+        timing.cfg_ms = t0.elapsed().as_secs_f64() * 1000.0;
+    }
+    if include_dataflow {
+        let t0 = Instant::now();
+        if !write_dataflow(conn, file_symbols, &analysis_file_set) {
+            analysis_ok = false;
+        }
+        timing.dataflow_ms = t0.elapsed().as_secs_f64() * 1000.0;
+    }
+
+    (do_analysis, analysis_ok)
+}
+
+/// Run the full build pipeline in Rust.
+///
+/// Called from `NativeDatabase.build_graph()` via napi.
+pub fn run_pipeline(
+    conn: &Connection,
+    root_dir: &str,
+    config_json: &str,
+    aliases_json: &str,
+    opts_json: &str,
+) -> Result<BuildPipelineResult, String> {
+    let total_start = Instant::now();
+    let mut timing = PipelineTiming::default();
+
+    // ── Stage 1: Deserialize config ────────────────────────────────────
+    let t0 = Instant::now();
+    let setup = pipeline_setup(conn, config_json, aliases_json, opts_json)?;
+    let PipelineSetup {
+        config,
+        napi_aliases,
+        opts,
+        incremental,
+        include_dataflow,
+        include_ast,
+        force_full_rebuild,
+    } = setup;
     timing.setup_ms = t0.elapsed().as_secs_f64() * 1000.0;
 
     // ── Stage 2: Collect files ─────────────────────────────────────────
@@ -162,89 +470,27 @@ pub fn run_pipeline(
     // Early exit: no changes
     if !change_result.is_full_build && parse_changes.is_empty() && change_result.removed.is_empty()
     {
-        // Heal metadata if needed
-        change_detection::heal_metadata(conn, &change_result.metadata_updates);
-        journal::write_journal_header(root_dir, now_ms());
-        return Ok(BuildPipelineResult {
-            phases: timing,
-            node_count: 0,
-            edge_count: 0,
-            file_count: collect_result.files.len(),
-            early_exit: true,
-            changed_files: Some(vec![]),
-            changed_count: 0,
-            removed_count: 0,
-            is_full_build: false,
-            structure_handled: true,
-            analysis_complete: true,
-        });
+        return Ok(early_exit_result(
+            collect_result.files.len(),
+            timing,
+            conn,
+            root_dir,
+            &change_result.metadata_updates,
+        ));
     }
 
-    // Save reverse-dep → changed-file edges before purge so we can reconnect
-    // them to new node IDs after Stage 5 (#1012). This matches the WASM/JS
-    // strategy and lets us skip re-parsing reverse-dep files entirely:
-    // parse/insert/structure/roles/analysis all scope to truly-changed files.
-    let mut saved_reverse_dep_edges: Vec<change_detection::SavedReverseDepEdge> = Vec::new();
-    // Files that import a removed file. Save+reconnect doesn't apply (the
-    // target node is gone for good), but their role records go stale because
-    // edges to the deleted file's nodes get purged in Stage 3. Reclassify them
-    // in Stage 8 so fan-out reflects reality. (#1027 review)
-    let mut removal_reverse_deps: Vec<String> = Vec::new();
-
-    // Handle full build: clear all graph data
-    if change_result.is_full_build {
-        let has_embeddings = change_detection::has_embeddings(conn);
-        change_detection::clear_all_graph_data(conn, has_embeddings);
-    } else {
-        // Incremental: save reverse-dep edges (if reverse-dep tracking is enabled),
-        // then purge changed files only.
-        let changed_paths: Vec<String> =
-            parse_changes.iter().map(|c| c.rel_path.clone()).collect();
-
-        if !opts.no_reverse_deps.unwrap_or(false) {
-            saved_reverse_dep_edges =
-                change_detection::save_reverse_dep_edges(conn, &changed_paths);
-
-            if !change_result.removed.is_empty() {
-                let removed_set: HashSet<String> =
-                    change_result.removed.iter().cloned().collect();
-                removal_reverse_deps =
-                    change_detection::find_reverse_dependencies(conn, &removed_set, root_dir)
-                        .into_iter()
-                        .collect();
-            }
-        }
-
-        let files_to_purge: Vec<String> = change_result
-            .removed
-            .iter()
-            .chain(parse_changes.iter().map(|c| &c.rel_path))
-            .cloned()
-            .collect();
-        // Pass empty reverse_dep_files: purge already deletes both directions
-        // for changed files (which removes the saved reverse-dep → changed-file
-        // edges from the live table), and other outgoing edges from reverse-dep
-        // files remain valid and must NOT be deleted — they will be reconnected
-        // to new target IDs after insert.
-        change_detection::purge_changed_files(conn, &files_to_purge, &[]);
-    }
+    // Stage 3b: save reverse-dep edges (incremental) or clear all (full),
+    // then purge changed files. Returns the saved edges for Stage 7
+    // reconnect and the removal reverse-dep set for Stage 8 reclassification.
+    let (saved_reverse_dep_edges, removal_reverse_deps) =
+        save_and_purge_changed(conn, &parse_changes, &change_result, &opts, root_dir);
 
     // ── Stage 4: Parse files ───────────────────────────────────────────
     // Only truly-changed files are parsed. Reverse-dep files are not re-parsed —
     // their edges to changed files are reconstructed via save+reconnect (#1012).
     let t0 = Instant::now();
-    let files_to_parse: Vec<String> =
-        parse_changes.iter().map(|c| c.abs_path.clone()).collect();
-    let parsed =
-        parallel::parse_files_parallel(&files_to_parse, root_dir, include_dataflow, include_ast);
-
-    // Build file symbols map (relative path → FileSymbols)
-    let mut file_symbols: HashMap<String, FileSymbols> = HashMap::new();
-    for mut sym in parsed {
-        let rel = relative_path(root_dir, &sym.file);
-        sym.file = rel.clone();
-        file_symbols.insert(rel, sym);
-    }
+    let mut file_symbols =
+        parse_and_index_files(&parse_changes, root_dir, include_dataflow, include_ast);
     timing.parse_ms = t0.elapsed().as_secs_f64() * 1000.0;
 
     // ── Stage 5: Insert nodes ──────────────────────────────────────────
@@ -257,44 +503,13 @@ pub fn run_pipeline(
         &file_hashes,
         &change_result.removed,
     );
-    // Also heal metadata-only updates
     change_detection::heal_metadata(conn, &change_result.metadata_updates);
     timing.insert_ms = t0.elapsed().as_secs_f64() * 1000.0;
 
     // ── Stage 6: Resolve imports ───────────────────────────────────────
     let t0 = Instant::now();
-    let mut batch_inputs: Vec<ImportResolutionInput> = Vec::new();
-    for (rel_path, symbols) in &file_symbols {
-        let abs_file = Path::new(root_dir).join(rel_path);
-        // Normalize to forward slashes so batch_resolved keys match Stage 6b lookups on Windows.
-        let abs_str = abs_file.to_str().unwrap_or("").replace('\\', "/");
-        for imp in &symbols.imports {
-            batch_inputs.push(ImportResolutionInput {
-                from_file: abs_str.clone(),
-                import_source: imp.source.clone(),
-            });
-        }
-    }
-
-    let known_files: HashSet<String> = collect_result
-        .files
-        .iter()
-        .map(|f| relative_path(root_dir, f))
-        .collect();
-
-    let resolved = import_resolution::resolve_imports_batch(
-        &batch_inputs,
-        root_dir,
-        &napi_aliases,
-        Some(&known_files),
-    );
-
-    // Build batch_resolved map: "absFile|importSource" -> resolved path
-    let mut batch_resolved: HashMap<String, String> = HashMap::new();
-    for r in &resolved {
-        let key = format!("{}|{}", r.from_file, r.import_source);
-        batch_resolved.insert(key, r.resolved_path.clone());
-    }
+    let (mut batch_resolved, known_files) =
+        resolve_pipeline_imports(&file_symbols, &collect_result.files, root_dir, &napi_aliases);
     timing.resolve_ms = t0.elapsed().as_secs_f64() * 1000.0;
 
     // ── Stage 6b: Re-parse barrel candidates (incremental only) ─────────
@@ -332,20 +547,7 @@ pub fn run_pipeline(
     // internal logic. We load nodes from DB and pass to the edge builder.
     build_and_insert_call_edges(conn, &file_symbols, &import_ctx, !change_result.is_full_build);
 
-    // Reconnect saved reverse-dep edges to new node IDs (#1012). Mirrors
-    // `reconnectReverseDepEdges` in build-edges.ts — for each saved edge,
-    // look up the new target node and recreate the edge with the original
-    // source_id (still valid; reverse-dep nodes were never purged).
-    if !saved_reverse_dep_edges.is_empty() {
-        let (reconnected, dropped) =
-            change_detection::reconnect_reverse_dep_edges(conn, &saved_reverse_dep_edges);
-        if dropped > 0 {
-            eprintln!(
-                "[codegraph] reconnect_reverse_dep_edges: {reconnected} reconnected, {dropped} dropped (target nodes not found)"
-            );
-        }
-    }
-
+    reconnect_saved_reverse_dep_edges(conn, &saved_reverse_dep_edges);
     timing.edges_ms = t0.elapsed().as_secs_f64() * 1000.0;
 
     // ── Stage 8: Structure + roles ─────────────────────────────────────
@@ -354,128 +556,41 @@ pub fn run_pipeline(
     // file_symbols only contains truly-changed files (reverse-deps are not
     // re-parsed; their edges are reconnected via save+reconnect — #1012), so
     // analysis_scope == changed_files.
-    let changed_files: Vec<String> = file_symbols.keys().cloned().collect();
     let analysis_scope: Option<Vec<String>> = if change_result.is_full_build {
         None
     } else {
-        Some(changed_files.clone())
+        Some(file_symbols.keys().cloned().collect())
     };
-
-    let existing_file_count = structure::get_existing_file_count(conn);
-    let use_fast_path =
-        !change_result.is_full_build && parse_changes.len() <= FAST_PATH_MAX_CHANGED_FILES && existing_file_count > FAST_PATH_MIN_EXISTING_FILES;
-
-    if use_fast_path {
-        structure::update_changed_file_metrics(
-            conn,
-            &changed_files,
-            &line_count_map,
-            &file_symbols,
-        );
-    } else {
-        // Full structure: directory nodes, contains edges, file + directory metrics.
-        let changed_for_structure: Option<Vec<String>> = if change_result.is_full_build {
-            None
-        } else {
-            Some(changed_files.clone())
-        };
-        structure::build_full_structure(
-            conn,
-            &file_symbols,
-            &collect_result.directories,
-            root_dir,
-            &line_count_map,
-            changed_for_structure.as_deref(),
-        );
-    }
+    run_structure_phase(
+        conn,
+        &file_symbols,
+        &collect_result.directories,
+        root_dir,
+        &line_count_map,
+        parse_changes.len(),
+        change_result.is_full_build,
+    );
     timing.structure_ms = t0.elapsed().as_secs_f64() * 1000.0;
 
     let t0 = Instant::now();
-    // Role classification needs the truly-changed files plus reverse-deps of
-    // any removed files. `do_classify_incremental` expands to neighbours via
-    // the edges table, so reverse-deps of *changed* files are picked up
-    // automatically when their fan-in/fan-out is affected. Reverse-deps of
-    // *removed* files have to be added explicitly — the deleted file's nodes
-    // are gone, so neighbour expansion can't reach the importer. Without this
-    // seed, removal-only builds skip role classification entirely. (#1027)
-    let changed_file_list: Option<Vec<String>> = if change_result.is_full_build {
-        None
-    } else {
-        let mut files = changed_files;
-        if !removal_reverse_deps.is_empty() {
-            let existing: HashSet<String> = files.iter().cloned().collect();
-            for f in removal_reverse_deps {
-                if !existing.contains(&f) {
-                    files.push(f);
-                }
-            }
-        }
-        Some(files)
-    };
-    if let Some(ref files) = changed_file_list {
-        if !files.is_empty() {
-            let _ = roles_db::do_classify_incremental(conn, files);
-        }
-    } else {
-        let _ = roles_db::do_classify_full(conn);
-    }
+    run_role_classification(
+        conn,
+        &file_symbols,
+        removal_reverse_deps,
+        change_result.is_full_build,
+    );
     timing.roles_ms = t0.elapsed().as_secs_f64() * 1000.0;
 
     // ── Stage 8b: Analysis persistence (AST, complexity, CFG, dataflow) ──
-    // Write analysis data from parsed file_symbols directly to DB tables,
-    // eliminating the JS runPostNativeAnalysis step and its WASM re-parse.
-    let include_complexity = opts.complexity.unwrap_or(true);
-    let include_cfg = opts.cfg.unwrap_or(true);
-    let do_analysis = include_ast || include_dataflow || include_cfg || include_complexity;
-
-    let mut analysis_ok = true;
-    if do_analysis {
-        // Determine which files to analyze (excludes reverse-dep files)
-        let analysis_file_set: HashSet<&str> = match &analysis_scope {
-            Some(files) => files.iter().map(|s| s.as_str()).collect(),
-            None => file_symbols.keys().map(|s| s.as_str()).collect(),
-        };
-
-        // Build node ID lookup: (file, name, line) -> node_id
-        let node_id_map = build_analysis_node_map(conn, &analysis_file_set);
-
-        // AST nodes
-        if include_ast {
-            let t0 = Instant::now();
-            let ast_batches = build_ast_batches(&file_symbols, &analysis_file_set);
-            if ast_db::do_insert_ast_nodes(conn, &ast_batches).is_err() {
-                analysis_ok = false;
-            }
-            timing.ast_ms = t0.elapsed().as_secs_f64() * 1000.0;
-        }
-
-        // Complexity metrics
-        if include_complexity {
-            let t0 = Instant::now();
-            if !write_complexity(conn, &file_symbols, &analysis_file_set, &node_id_map) {
-                analysis_ok = false;
-            }
-            timing.complexity_ms = t0.elapsed().as_secs_f64() * 1000.0;
-        }
-
-        // CFG blocks + edges
-        if include_cfg {
-            let t0 = Instant::now();
-            if !write_cfg(conn, &file_symbols, &analysis_file_set, &node_id_map) {
-                analysis_ok = false;
-            }
-            timing.cfg_ms = t0.elapsed().as_secs_f64() * 1000.0;
-        }
-
-        // Dataflow edges
-        if include_dataflow {
-            let t0 = Instant::now();
-            if !write_dataflow(conn, &file_symbols, &analysis_file_set) {
-                analysis_ok = false;
-            }
-            timing.dataflow_ms = t0.elapsed().as_secs_f64() * 1000.0;
-        }
-    }
+    let (do_analysis, analysis_ok) = run_analysis_persistence(
+        conn,
+        &file_symbols,
+        analysis_scope.as_ref(),
+        &opts,
+        include_ast,
+        include_dataflow,
+        &mut timing,
+    );
 
     // ── Stage 9: Finalize ──────────────────────────────────────────────
     let t0 = Instant::now();
@@ -971,24 +1086,73 @@ fn build_file_hash_entries(
 /// miss transitively-required nodes (e.g. a call site whose receiver type
 /// is declared in a file that isn't a direct import target).
 ///
-/// Full builds always load every node — there is no smaller set anyway.
-fn build_and_insert_call_edges(
+/// Constant list of builtin JS receivers excluded from method-resolution
+/// (callers of `console.log` etc. shouldn't get linked to a user-defined
+/// `log` somewhere else). Mirrors `BUILTIN_RECEIVERS` in `build-edges.ts`.
+fn builtin_call_receivers() -> Vec<String> {
+    [
+        "console", "Math", "JSON", "Object", "Array", "String", "Number",
+        "Boolean", "Date", "RegExp", "Map", "Set", "WeakMap", "WeakSet",
+        "Promise", "Symbol", "Error", "TypeError", "RangeError", "Proxy",
+        "Reflect", "Intl", "globalThis", "window", "document", "process",
+        "Buffer", "require",
+    ]
+    .into_iter()
+    .map(String::from)
+    .collect()
+}
+
+const EDGE_NODE_KIND_FILTER: &str = "kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')";
+
+/// For the scoped (incremental, small-batch) path of the edge builder,
+/// compute the set of files that must be loaded: changed/reverse-dep files
+/// plus their direct import targets plus barrel-only files plus the
+/// ultimate definition files barrel chains resolve to. Mirrors the JS
+/// `relevantFiles` accumulation in `loadNodes` (#976, greptile P1).
+fn compute_edge_relevant_files(
+    file_symbols: &HashMap<String, FileSymbols>,
+    import_ctx: &crate::import_edges::ImportEdgeContext,
+) -> HashSet<String> {
+    let mut relevant_files: HashSet<String> = file_symbols.keys().cloned().collect();
+    for (rel_path, symbols) in file_symbols {
+        let abs_file = Path::new(&import_ctx.root_dir).join(rel_path);
+        let abs_str = abs_file.to_str().unwrap_or("");
+        for imp in &symbols.imports {
+            let resolved = import_ctx.get_resolved(abs_str, &imp.source);
+            if resolved.is_empty() {
+                continue;
+            }
+            relevant_files.insert(resolved.clone());
+            if import_ctx.is_barrel_file(&resolved) {
+                for name in &imp.names {
+                    let clean_name = name.strip_prefix("* as ").unwrap_or(name);
+                    let mut visited = HashSet::new();
+                    if let Some(ultimate) =
+                        import_ctx.resolve_barrel_export(&resolved, clean_name, &mut visited)
+                    {
+                        relevant_files.insert(ultimate);
+                    }
+                }
+            }
+        }
+    }
+    for barrel_path in &import_ctx.barrel_only_files {
+        relevant_files.insert(barrel_path.clone());
+    }
+    relevant_files
+}
+
+/// Load all candidate edge nodes either scoped via a temp _edge_files table
+/// (incremental small-batch) or globally (full build). Returns a flat
+/// `Vec<NodeInfo>` suitable for the native edge builder.
+fn load_edge_node_set(
     conn: &Connection,
     file_symbols: &HashMap<String, FileSymbols>,
-    import_ctx: &ImportEdgeContext,
+    import_ctx: &crate::import_edges::ImportEdgeContext,
     is_incremental: bool,
-) {
-    use crate::edge_builder::*;
+) -> Vec<crate::edge_builder::NodeInfo> {
+    use crate::edge_builder::NodeInfo;
 
-    let node_kind_filter = "kind IN ('function','method','class','interface','struct','type','module','enum','trait','record','constant')";
-
-    // Gate parity with `loadNodes` in `src/domain/graph/builder/stages/build-edges.ts`:
-    //   isFullBuild = false
-    //   && fileSymbols.size <= smallFilesThreshold (5)
-    //   && existingFileCount > FAST_PATH_MIN_EXISTING_FILES (20)
-    // Small fixtures skip the scoped path entirely — the savings are
-    // negligible at that scale and the scoped set can miss nodes that the
-    // edge builder needs for receiver-type resolution (#976).
     let existing_file_count: i64 = conn
         .query_row(
             "SELECT COUNT(*) FROM nodes WHERE kind = 'file'",
@@ -1000,174 +1164,156 @@ fn build_and_insert_call_edges(
         && file_symbols.len() <= crate::constants::FAST_PATH_MAX_CHANGED_FILES
         && existing_file_count > crate::constants::FAST_PATH_MIN_EXISTING_FILES;
 
-    let all_nodes: Vec<NodeInfo> = if scope_eligible {
-        // Build the scoped set: changed/reverse-dep files + their resolved
-        // import targets + any barrel files on the path + the **ultimate**
-        // source files that barrel chains resolve to. The FileEdgeInput
-        // construction below (see `imported_names` at ~L1035) rewrites
-        // `target_file` to the ultimate definition file via
-        // `resolve_barrel_export`; if that file isn't in `relevant_files`
-        // the edge builder's `nodes_by_name_and_file` lookup returns
-        // nothing and the call edge is silently dropped (greptile P1).
-        let mut relevant_files: HashSet<String> = file_symbols.keys().cloned().collect();
-        for (rel_path, symbols) in file_symbols {
-            let abs_file = Path::new(&import_ctx.root_dir).join(rel_path);
-            let abs_str = abs_file.to_str().unwrap_or("");
-            for imp in &symbols.imports {
-                let resolved = import_ctx.get_resolved(abs_str, &imp.source);
-                if resolved.is_empty() {
-                    continue;
-                }
-                relevant_files.insert(resolved.clone());
-                // If the resolved target is a barrel, walk the re-export
-                // chain and add every ultimate definition file that a
-                // named import could resolve to.
-                if import_ctx.is_barrel_file(&resolved) {
-                    for name in &imp.names {
-                        let clean_name = name.strip_prefix("* as ").unwrap_or(name);
-                        let mut visited = HashSet::new();
-                        if let Some(ultimate) = import_ctx.resolve_barrel_export(
-                            &resolved,
-                            clean_name,
-                            &mut visited,
-                        ) {
-                            relevant_files.insert(ultimate);
-                        }
-                    }
-                }
-            }
+    if !scope_eligible {
+        return load_all_edge_nodes(conn);
+    }
+
+    let relevant_files = compute_edge_relevant_files(file_symbols, import_ctx);
+    if relevant_files.is_empty() {
+        return Vec::new();
+    }
+
+    let _ = conn.execute_batch(
+        "CREATE TEMP TABLE IF NOT EXISTS _edge_files (file TEXT NOT NULL);\n         CREATE INDEX IF NOT EXISTS _edge_files_file_idx ON _edge_files (file);",
+    );
+    let _ = conn.execute("DELETE FROM temp._edge_files", []);
+    {
+        let mut ins = match conn.prepare("INSERT INTO temp._edge_files (file) VALUES (?1)") {
+            Ok(s) => s,
+            Err(_) => return Vec::new(),
+        };
+        for f in &relevant_files {
+            let _ = ins.execute(rusqlite::params![f]);
         }
-        for barrel_path in &import_ctx.barrel_only_files {
-            relevant_files.insert(barrel_path.clone());
+    }
+
+    let sql = format!(
+        "SELECT n.id, n.name, n.kind, n.file, n.line FROM nodes n \
+         INNER JOIN temp._edge_files ef ON n.file = ef.file \
+         WHERE n.{EDGE_NODE_KIND_FILTER}",
+    );
+    let nodes: Vec<NodeInfo> = match conn.prepare(&sql) {
+        Ok(mut stmt) => stmt
+            .query_map([], read_edge_node_info)
+            .map(|rows| rows.filter_map(|r| r.ok()).collect())
+            .unwrap_or_default(),
+        Err(_) => Vec::new(),
+    };
+    let _ = conn.execute("DROP TABLE IF EXISTS temp._edge_files", []);
+    nodes
+}
+
+/// Load every candidate edge node from the DB (full-build path).
+fn load_all_edge_nodes(conn: &Connection) -> Vec<crate::edge_builder::NodeInfo> {
+    let sql = format!(
+        "SELECT id, name, kind, file, line FROM nodes WHERE {EDGE_NODE_KIND_FILTER}",
+    );
+    match conn.prepare(&sql) {
+        Ok(mut stmt) => stmt
+            .query_map([], read_edge_node_info)
+            .map(|rows| rows.filter_map(|r| r.ok()).collect())
+            .unwrap_or_default(),
+        Err(_) => Vec::new(),
+    }
+}
+
+/// Row-mapper for the `SELECT id, name, kind, file, line FROM nodes ...`
+/// shape used by both scoped and full edge-node loads.
+fn read_edge_node_info(row: &rusqlite::Row) -> rusqlite::Result<crate::edge_builder::NodeInfo> {
+    Ok(crate::edge_builder::NodeInfo {
+        id: row.get::<_, i64>(0)? as u32,
+        name: row.get(1)?,
+        kind: row.get(2)?,
+        file: row.get(3)?,
+        line: row.get::<_, i64>(4)? as u32,
+    })
+}
+
+/// Load all `file`-kind node IDs into a flat map (one query instead of one
+/// per file). The `name = file` guard avoids accidentally overwriting the
+/// map entry when an unrelated row happens to share the file path (#1028).
+fn load_file_node_id_map(conn: &Connection) -> HashMap<String, u32> {
+    let mut map = HashMap::new();
+    if let Ok(mut stmt) = conn.prepare(
+        "SELECT file, id FROM nodes WHERE kind = 'file' AND line = 0 AND name = file",
+    ) {
+        if let Ok(rows) =
+            stmt.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)? as u32)))
+        {
+            for r in rows.flatten() {
+                map.insert(r.0, r.1);
+            }
         }
+    }
+    map
+}
 
-        if relevant_files.is_empty() {
-            Vec::new()
-        } else {
-            // Schema qualification matches the existing `_analysis_files`
-            // pattern below: unqualified CREATE (temp schema is the
-            // default for TEMP tables), qualified `temp.` for every
-            // subsequent op. Index the file column so the INNER JOIN is
-            // a lookup rather than a table scan (greptile P2).
-            let _ = conn.execute_batch(
-                "CREATE TEMP TABLE IF NOT EXISTS _edge_files (file TEXT NOT NULL);\n                 CREATE INDEX IF NOT EXISTS _edge_files_file_idx ON _edge_files (file);",
-            );
-            let _ = conn.execute("DELETE FROM temp._edge_files", []);
-            {
-                let mut ins =
-                    match conn.prepare("INSERT INTO temp._edge_files (file) VALUES (?1)") {
-                        Ok(s) => s,
-                        Err(_) => return,
-                    };
-                for f in &relevant_files {
-                    let _ = ins.execute(rusqlite::params![f]);
+/// Resolve a file's imports to the list of `ImportedName` entries the edge
+/// builder consumes. Walks barrel chains to the ultimate definition file so
+/// the edge builder's name-lookup can find the right target (#976 P1).
+fn collect_imported_names_for_file(
+    abs_str: &str,
+    symbols: &FileSymbols,
+    import_ctx: &crate::import_edges::ImportEdgeContext,
+) -> Vec<crate::edge_builder::ImportedName> {
+    use crate::edge_builder::ImportedName;
+    let mut imported_names: Vec<ImportedName> = Vec::new();
+    for imp in &symbols.imports {
+        let resolved_path = import_ctx.get_resolved(abs_str, &imp.source);
+        for name in &imp.names {
+            let clean_name = name.strip_prefix("* as ").unwrap_or(name).to_string();
+            let mut target_file = resolved_path.clone();
+            if import_ctx.is_barrel_file(&resolved_path) {
+                let mut visited = HashSet::new();
+                if let Some(actual) =
+                    import_ctx.resolve_barrel_export(&resolved_path, &clean_name, &mut visited)
+                {
+                    target_file = actual;
                 }
             }
-
-            let sql = format!(
-                "SELECT n.id, n.name, n.kind, n.file, n.line FROM nodes n \
-                 INNER JOIN temp._edge_files ef ON n.file = ef.file \
-                 WHERE n.{node_kind_filter}",
-            );
-            let nodes: Vec<NodeInfo> = match conn.prepare(&sql) {
-                Ok(mut stmt) => stmt
-                    .query_map([], |row| {
-                        Ok(NodeInfo {
-                            id: row.get::<_, i64>(0)? as u32,
-                            name: row.get(1)?,
-                            kind: row.get(2)?,
-                            file: row.get(3)?,
-                            line: row.get::<_, i64>(4)? as u32,
-                        })
-                    })
-                    .map(|rows| rows.filter_map(|r| r.ok()).collect())
-                    .unwrap_or_default(),
-                Err(_) => Vec::new(),
-            };
-            let _ = conn.execute("DROP TABLE IF EXISTS temp._edge_files", []);
-            nodes
-        }
-    } else {
-        let sql = format!("SELECT id, name, kind, file, line FROM nodes WHERE {node_kind_filter}");
-        match conn.prepare(&sql) {
-            Ok(mut stmt) => stmt
-                .query_map([], |row| {
-                    Ok(NodeInfo {
-                        id: row.get::<_, i64>(0)? as u32,
-                        name: row.get(1)?,
-                        kind: row.get(2)?,
-                        file: row.get(3)?,
-                        line: row.get::<_, i64>(4)? as u32,
-                    })
-                })
-                .map(|rows| rows.filter_map(|r| r.ok()).collect())
-                .unwrap_or_default(),
-            Err(_) => Vec::new(),
+            imported_names.push(ImportedName {
+                name: clean_name,
+                file: target_file,
+            });
         }
-    };
+    }
+    imported_names
+}
+
+/// Insert the edges produced by the native edge builder into the edges table.
+fn insert_call_edge_rows(conn: &Connection, edges: &[crate::edge_builder::ComputedEdge]) {
+    if edges.is_empty() {
+        return;
+    }
+    let edge_rows: Vec<crate::edges_db::EdgeRow> = edges
+        .iter()
+        .map(|e| crate::edges_db::EdgeRow {
+            source_id: e.source_id,
+            target_id: e.target_id,
+            kind: e.kind.clone(),
+            confidence: e.confidence,
+            dynamic: e.dynamic,
+        })
+        .collect();
+    let _ = crate::edges_db::do_insert_edges(conn, &edge_rows);
+}
+
+/// Full builds always load every node — there is no smaller set anyway.
+fn build_and_insert_call_edges(
+    conn: &Connection,
+    file_symbols: &HashMap<String, FileSymbols>,
+    import_ctx: &ImportEdgeContext,
+    is_incremental: bool,
+) {
+    use crate::edge_builder::*;
 
+    let all_nodes = load_edge_node_set(conn, file_symbols, import_ctx, is_incremental);
     if all_nodes.is_empty() {
         return;
     }
 
-    let builtin_receivers: Vec<String> = vec![
-        "console",
-        "Math",
-        "JSON",
-        "Object",
-        "Array",
-        "String",
-        "Number",
-        "Boolean",
-        "Date",
-        "RegExp",
-        "Map",
-        "Set",
-        "WeakMap",
-        "WeakSet",
-        "Promise",
-        "Symbol",
-        "Error",
-        "TypeError",
-        "RangeError",
-        "Proxy",
-        "Reflect",
-        "Intl",
-        "globalThis",
-        "window",
-        "document",
-        "process",
-        "Buffer",
-        "require",
-    ]
-    .into_iter()
-    .map(String::from)
-    .collect();
-
-    // Pre-load every file node ID into a HashMap with one query, replacing
-    // the per-file `query_row` cycle that paid a fresh sqlite3_prepare for
-    // each entry in `file_symbols` (#1013).
-    //
-    // The `name = file` predicate matches the legacy per-row lookup
-    // (`WHERE name = ? AND file = ?` with both binds set to `rel_path`).
-    // For file-kind nodes `name` and `file` are conventionally identical,
-    // but keeping the guard prevents an unrelated row from silently
-    // overwriting the map entry for `file` (#1028 review).
-    let file_node_ids: HashMap<String, u32> = {
-        let mut map = HashMap::new();
-        if let Ok(mut stmt) = conn.prepare(
-            "SELECT file, id FROM nodes WHERE kind = 'file' AND line = 0 AND name = file",
-        ) {
-            if let Ok(rows) = stmt.query_map([], |row| {
-                Ok((row.get::<_, String>(0)?, row.get::<_, i64>(1)? as u32))
-            }) {
-                for r in rows.flatten() {
-                    map.insert(r.0, r.1);
-                }
-            }
-        }
-        map
-    };
+    let builtin_receivers = builtin_call_receivers();
+    let file_node_ids = load_file_node_id_map(conn);
 
     // Build FileEdgeInput entries for the native edge builder
     let mut file_entries: Vec<FileEdgeInput> = Vec::new();
@@ -1175,35 +1321,14 @@ fn build_and_insert_call_edges(
         if import_ctx.barrel_only_files.contains(rel_path) {
             continue;
         }
-
         let file_node_id: u32 = match file_node_ids.get(rel_path) {
             Some(&id) => id,
             None => continue,
         };
 
-        // Build imported names from resolved imports
-        let mut imported_names: Vec<ImportedName> = Vec::new();
         let abs_file = Path::new(&import_ctx.root_dir).join(rel_path);
         let abs_str = abs_file.to_str().unwrap_or("");
-        for imp in &symbols.imports {
-            let resolved_path = import_ctx.get_resolved(abs_str, &imp.source);
-            for name in &imp.names {
-                let clean_name = name.strip_prefix("* as ").unwrap_or(name).to_string();
-                let mut target_file = resolved_path.clone();
-                if import_ctx.is_barrel_file(&resolved_path) {
-                    let mut visited = HashSet::new();
-                    if let Some(actual) =
-                        import_ctx.resolve_barrel_export(&resolved_path, &clean_name, &mut visited)
-                    {
-                        target_file = actual;
-                    }
-                }
-                imported_names.push(ImportedName {
-                    name: clean_name,
-                    file: target_file,
-                });
-            }
-        }
+        let imported_names = collect_imported_names_for_file(abs_str, symbols, import_ctx);
 
         let type_map: Vec<TypeMapInput> = symbols
             .type_map
@@ -1217,7 +1342,7 @@ fn build_and_insert_call_edges(
 
         file_entries.push(FileEdgeInput {
             file: rel_path.clone(),
-            file_node_id: file_node_id,
+            file_node_id,
             definitions: symbols
                 .definitions
                 .iter()
@@ -1252,23 +1377,8 @@ fn build_and_insert_call_edges(
         });
     }
 
-    // Call the native edge builder
     let computed_edges = build_call_edges(file_entries, all_nodes, builtin_receivers);
-
-    // Insert edges
-    if !computed_edges.is_empty() {
-        let edge_rows: Vec<crate::edges_db::EdgeRow> = computed_edges
-            .iter()
-            .map(|e| crate::edges_db::EdgeRow {
-                source_id: e.source_id,
-                target_id: e.target_id,
-                kind: e.kind.clone(),
-                confidence: e.confidence,
-                dynamic: e.dynamic,
-            })
-            .collect();
-        let _ = crate::edges_db::do_insert_edges(conn, &edge_rows);
-    }
+    insert_call_edge_rows(conn, &computed_edges);
 }
 
 // ── Analysis persistence helpers ─────────────────────────────────────────
diff --git a/crates/codegraph-core/src/edge_builder.rs b/crates/codegraph-core/src/edge_builder.rs
index 7fb3beac6..2d0499f73 100644
--- a/crates/codegraph-core/src/edge_builder.rs
+++ b/crates/codegraph-core/src/edge_builder.rs
@@ -549,120 +549,161 @@ pub fn build_import_edges(
     );
 
     let mut edges = Vec::new();
-
+    let normalized_root = root_dir.replace('\\', "/");
     for file_input in &files {
-        let abs_file = format!("{}/{}", root_dir.replace('\\', "/"), file_input.file);
-
+        let abs_file = format!("{normalized_root}/{}", file_input.file);
         for imp in &file_input.imports {
-            // Barrel-only files: only emit reexport edges
-            if file_input.is_barrel_only && !imp.reexport {
-                continue;
-            }
+            process_single_import(&mut edges, file_input, imp, &abs_file, &ctx);
+        }
+    }
+    edges
+}
+
+// ── build_import_edges helpers ──────────────────────────────────────────
+
+/// Strip a `"* as "` / `"*\tas "` prefix from an import name so the bare
+/// symbol can be looked up against the target's exports. JS equivalent:
+/// `name.replace(/^\*\s+as\s+/, '')`.
+fn strip_star_as_prefix(name: &str) -> &str {
+    if name.starts_with("* as ") || name.starts_with("*\tas ") {
+        &name[5..]
+    } else {
+        name
+    }
+}
 
-            // Look up resolved path
-            let resolve_key = format!("{}|{}", abs_file, imp.source);
-            let resolved_path = match ctx.resolved.get(resolve_key.as_str()) {
-                Some(p) => *p,
-                None => continue,
-            };
-
-            // Look up target file node ID
-            let target_node_id = match ctx.file_node_map.get(resolved_path) {
-                Some(id) => *id,
-                None => continue,
-            };
-
-            // Determine edge kind
-            let edge_kind = if imp.reexport {
-                "reexports"
-            } else if imp.type_only {
-                "imports-type"
-            } else if imp.dynamic_import {
-                "dynamic-imports"
-            } else {
-                "imports"
-            };
+/// Classify an import into its edge kind: reexports / imports-type /
+/// dynamic-imports / imports. Mirrors the JS classifier in `build-edges.ts`.
+fn classify_import_edge_kind(imp: &ImportInfo) -> &'static str {
+    if imp.reexport {
+        "reexports"
+    } else if imp.type_only {
+        "imports-type"
+    } else if imp.dynamic_import {
+        "dynamic-imports"
+    } else {
+        "imports"
+    }
+}
 
+/// For a `type` import targeting a barrel or resolved file, emit one
+/// symbol-level `imports-type` edge per named symbol so the target symbols
+/// receive fan-in credit and aren't misclassified as dead code.
+fn emit_type_only_symbol_edges(
+    edges: &mut Vec<ComputedEdge>,
+    file_input: &ImportEdgeFileInput,
+    imp: &ImportInfo,
+    resolved_path: &str,
+    ctx: &ImportEdgeContext,
+) {
+    if !imp.type_only || ctx.symbol_node_map.is_empty() {
+        return;
+    }
+    for name in &imp.names {
+        let clean_name = strip_star_as_prefix(name);
+        let barrel_target = if ctx.barrel_set.contains(resolved_path) {
+            let mut visited = HashSet::new();
+            barrel_resolution::resolve_barrel_export(ctx, resolved_path, clean_name, &mut visited)
+        } else {
+            None
+        };
+        let sym_id = barrel_target
+            .as_deref()
+            .and_then(|f| ctx.symbol_node_map.get(&(clean_name, f)))
+            .or_else(|| ctx.symbol_node_map.get(&(clean_name, resolved_path)));
+        if let Some(&id) = sym_id {
             edges.push(ComputedEdge {
                 source_id: file_input.file_node_id,
-                target_id: target_node_id,
-                kind: edge_kind.to_string(),
+                target_id: id,
+                kind: "imports-type".to_string(),
                 confidence: 1.0,
                 dynamic: 0,
             });
+        }
+    }
+}
 
-            // Type-only imports: create symbol-level edges so the target symbols
-            // get fan-in credit and aren't falsely classified as dead code.
-            if imp.type_only && !ctx.symbol_node_map.is_empty() {
-                for name in &imp.names {
-                    let clean_name = if name.starts_with("* as ") || name.starts_with("*\tas ") {
-                        &name[5..]
-                    } else {
-                        name.as_str()
-                    };
-                    // Try barrel resolution first, then fall back to the resolved path
-                    let barrel_target = if ctx.barrel_set.contains(resolved_path) {
-                        let mut visited = HashSet::new();
-                        barrel_resolution::resolve_barrel_export(&ctx, resolved_path, clean_name, &mut visited)
-                    } else {
-                        None
-                    };
-                    let sym_id = barrel_target
-                        .as_deref()
-                        .and_then(|f| ctx.symbol_node_map.get(&(clean_name, f)))
-                        .or_else(|| ctx.symbol_node_map.get(&(clean_name, resolved_path)));
-                    if let Some(&id) = sym_id {
-                        edges.push(ComputedEdge {
-                            source_id: file_input.file_node_id,
-                            target_id: id,
-                            kind: "imports-type".to_string(),
-                            confidence: 1.0,
-                            dynamic: 0,
-                        });
-                    }
-                }
-            }
-
-            // Barrel resolution: if not reexport and target is a barrel file
-            if !imp.reexport && ctx.barrel_set.contains(resolved_path) {
-                let mut resolved_sources: HashSet<String> = HashSet::new();
-                for name in &imp.names {
-                    let clean_name = if name.starts_with("* as ") || name.starts_with("*\tas ") {
-                        // Strip "* as " or "*\tas " prefix (both exactly 5 bytes)
-                        // JS equivalent: name.replace(/^\*\s+as\s+/, '')
-                        &name[5..]
-                    } else {
-                        name.as_str()
-                    };
-
-                    let mut visited = HashSet::new();
-                    let actual = barrel_resolution::resolve_barrel_export(&ctx, resolved_path, clean_name, &mut visited);
-
-                    if let Some(actual_source) = actual {
-                        if actual_source != resolved_path && !resolved_sources.contains(&actual_source) {
-                            if let Some(&actual_node_id) = ctx.file_node_map.get(actual_source.as_str()) {
-                                let barrel_kind = match edge_kind {
-                                    "imports-type" => "imports-type",
-                                    "dynamic-imports" => "dynamic-imports",
-                                    _ => "imports",
-                                };
-                                edges.push(ComputedEdge {
-                                    source_id: file_input.file_node_id,
-                                    target_id: actual_node_id,
-                                    kind: barrel_kind.to_string(),
-                                    confidence: 0.9,
-                                    dynamic: 0,
-                                });
-                            }
-                            resolved_sources.insert(actual_source);
-                        }
-                    }
-                }
-            }
+/// For a non-reexport import targeting a barrel file, walk the barrel
+/// chain for each named symbol and emit a barrel-through edge to the
+/// ultimate definition file. Deduplicates target files via
+/// `resolved_sources`.
+fn emit_barrel_through_edges(
+    edges: &mut Vec<ComputedEdge>,
+    file_input: &ImportEdgeFileInput,
+    imp: &ImportInfo,
+    resolved_path: &str,
+    edge_kind: &str,
+    ctx: &ImportEdgeContext,
+) {
+    if imp.reexport || !ctx.barrel_set.contains(resolved_path) {
+        return;
+    }
+    let barrel_kind = match edge_kind {
+        "imports-type" => "imports-type",
+        "dynamic-imports" => "dynamic-imports",
+        _ => "imports",
+    };
+    let mut resolved_sources: HashSet<String> = HashSet::new();
+    for name in &imp.names {
+        let clean_name = strip_star_as_prefix(name);
+        let mut visited = HashSet::new();
+        let actual = barrel_resolution::resolve_barrel_export(
+            ctx,
+            resolved_path,
+            clean_name,
+            &mut visited,
+        );
+        let actual_source = match actual {
+            Some(s) => s,
+            None => continue,
+        };
+        if actual_source == resolved_path || resolved_sources.contains(&actual_source) {
+            continue;
+        }
+        if let Some(&actual_node_id) = ctx.file_node_map.get(actual_source.as_str()) {
+            edges.push(ComputedEdge {
+                source_id: file_input.file_node_id,
+                target_id: actual_node_id,
+                kind: barrel_kind.to_string(),
+                confidence: 0.9,
+                dynamic: 0,
+            });
         }
+        resolved_sources.insert(actual_source);
     }
+}
 
-    edges
+/// Process a single import from a file, emitting the primary file-to-file
+/// edge plus any type-symbol and barrel-through edges.
+fn process_single_import(
+    edges: &mut Vec<ComputedEdge>,
+    file_input: &ImportEdgeFileInput,
+    imp: &ImportInfo,
+    abs_file: &str,
+    ctx: &ImportEdgeContext,
+) {
+    if file_input.is_barrel_only && !imp.reexport {
+        return;
+    }
+    let resolve_key = format!("{abs_file}|{}", imp.source);
+    let resolved_path = match ctx.resolved.get(resolve_key.as_str()) {
+        Some(p) => *p,
+        None => return,
+    };
+    let target_node_id = match ctx.file_node_map.get(resolved_path) {
+        Some(id) => *id,
+        None => return,
+    };
+    let edge_kind = classify_import_edge_kind(imp);
+    edges.push(ComputedEdge {
+        source_id: file_input.file_node_id,
+        target_id: target_node_id,
+        kind: edge_kind.to_string(),
+        confidence: 1.0,
+        dynamic: 0,
+    });
+    emit_type_only_symbol_edges(edges, file_input, imp, resolved_path, ctx);
+    emit_barrel_through_edges(edges, file_input, imp, resolved_path, edge_kind, ctx);
 }
 
 #[cfg(test)]
diff --git a/crates/codegraph-core/src/graph_algorithms.rs b/crates/codegraph-core/src/graph_algorithms.rs
index a30c269ff..4d08a4d67 100644
--- a/crates/codegraph-core/src/graph_algorithms.rs
+++ b/crates/codegraph-core/src/graph_algorithms.rs
@@ -70,6 +70,58 @@ impl<'a> DirectedGraph<'a> {
     }
 }
 
+// ─── Traversal helpers ───────────────────────────────────────────────
+
+/// Pick the neighbor set used by `bfs_traversal` for the requested direction.
+/// "backward" → predecessors, "both" → predecessors + successors,
+/// anything else → successors. Mirrors the JS direction enum.
+fn bfs_neighbors_for_direction<'a>(
+    graph: &'a DirectedGraph<'a>,
+    current: &str,
+    direction: &str,
+) -> Vec<&'a str> {
+    match direction {
+        "backward" => graph
+            .predecessors
+            .get(current)
+            .map(|v| v.as_slice())
+            .unwrap_or(&[])
+            .to_vec(),
+        "both" => {
+            let mut all: Vec<&str> = Vec::new();
+            if let Some(succ) = graph.successors.get(current) {
+                all.extend(succ.iter());
+            }
+            if let Some(pred) = graph.predecessors.get(current) {
+                all.extend(pred.iter());
+            }
+            all
+        }
+        _ => graph
+            .successors
+            .get(current)
+            .map(|v| v.as_slice())
+            .unwrap_or(&[])
+            .to_vec(),
+    }
+}
+
+/// Walk the parent pointers produced by a BFS back from `terminal` to the
+/// start node and return the path as a `Vec<String>` (start → terminal).
+fn reconstruct_bfs_path<'a>(
+    parent: &HashMap<&'a str, Option<&'a str>>,
+    terminal: &'a str,
+) -> Vec<String> {
+    let mut path: Vec<String> = Vec::new();
+    let mut node: Option<&str> = Some(terminal);
+    while let Some(n) = node {
+        path.push(n.to_string());
+        node = parent.get(n).copied().flatten();
+    }
+    path.reverse();
+    path
+}
+
 // ─── BFS ─────────────────────────────────────────────────────────────
 
 /// BFS traversal on a directed graph built from edges.
@@ -102,33 +154,7 @@ pub fn bfs_traversal(
         if depth >= max_depth {
             continue;
         }
-
-        let neighbors: Vec<&str> = match dir {
-            "backward" => graph
-                .predecessors
-                .get(current)
-                .map(|v| v.as_slice())
-                .unwrap_or(&[])
-                .to_vec(),
-            "both" => {
-                let mut all: Vec<&str> = Vec::new();
-                if let Some(succ) = graph.successors.get(current) {
-                    all.extend(succ.iter());
-                }
-                if let Some(pred) = graph.predecessors.get(current) {
-                    all.extend(pred.iter());
-                }
-                all
-            }
-            _ => graph
-                .successors
-                .get(current)
-                .map(|v| v.as_slice())
-                .unwrap_or(&[])
-                .to_vec(),
-        };
-
-        for n in neighbors {
+        for n in bfs_neighbors_for_direction(&graph, current, dir) {
             if !depths.contains_key(n) {
                 depths.insert(n, depth + 1);
                 queue.push_back(n);
@@ -166,24 +192,19 @@ pub fn shortest_path(edges: Vec<GraphEdge>, from_id: String, to_id: String) -> V
     queue.push_back(from_id.as_str());
 
     while let Some(current) = queue.pop_front() {
-        if let Some(neighbors) = graph.successors.get(current) {
-            for &neighbor in neighbors {
-                if parent.contains_key(neighbor) {
-                    continue;
-                }
-                parent.insert(neighbor, Some(current));
-                if neighbor == to_id.as_str() {
-                    let mut path: Vec<String> = Vec::new();
-                    let mut node: Option<&str> = Some(neighbor);
-                    while let Some(n) = node {
-                        path.push(n.to_string());
-                        node = parent.get(n).copied().flatten();
-                    }
-                    path.reverse();
-                    return path;
-                }
-                queue.push_back(neighbor);
+        let neighbors = match graph.successors.get(current) {
+            Some(n) => n,
+            None => continue,
+        };
+        for &neighbor in neighbors {
+            if parent.contains_key(neighbor) {
+                continue;
+            }
+            parent.insert(neighbor, Some(current));
+            if neighbor == to_id.as_str() {
+                return reconstruct_bfs_path(&parent, neighbor);
             }
+            queue.push_back(neighbor);
         }
     }
 
diff --git a/crates/codegraph-core/src/import_edges.rs b/crates/codegraph-core/src/import_edges.rs
index 458476923..f000a808c 100644
--- a/crates/codegraph-core/src/import_edges.rs
+++ b/crates/codegraph-core/src/import_edges.rs
@@ -276,16 +276,144 @@ fn collect_type_only_lookup_pairs(ctx: &ImportEdgeContext) -> HashSet<(String, S
 /// - `reexports` for re-exports
 ///
 /// Also creates barrel-through edges (confidence 0.9) for imports targeting barrel files.
+/// Classify an `ImportInfo` into the edge kind name used in the edges
+/// table: reexports / imports-type / dynamic-imports / imports.
+fn classify_import_kind(imp: &crate::types::Import) -> &'static str {
+    if imp.reexport.unwrap_or(false) {
+        "reexports"
+    } else if imp.type_only.unwrap_or(false) {
+        "imports-type"
+    } else if imp.dynamic_import.unwrap_or(false) {
+        "dynamic-imports"
+    } else {
+        "imports"
+    }
+}
+
+/// For a `type` import, emit one symbol-level `imports-type` edge per name
+/// so the target symbols receive fan-in credit and aren't classified dead.
+fn emit_type_only_symbol_rows(
+    edges: &mut Vec<EdgeRow>,
+    file_node_id: i64,
+    imp: &crate::types::Import,
+    resolved_path: &str,
+    ctx: &ImportEdgeContext,
+    symbol_node_ids: &HashMap<(String, String), i64>,
+) {
+    if !imp.type_only.unwrap_or(false) {
+        return;
+    }
+    for name in &imp.names {
+        let clean_name = name.strip_prefix("* as ").unwrap_or(name);
+        let mut target_file = resolved_path.to_string();
+        if ctx.is_barrel_file(resolved_path) {
+            let mut visited = HashSet::new();
+            if let Some(actual) =
+                ctx.resolve_barrel_export(resolved_path, clean_name, &mut visited)
+            {
+                target_file = actual;
+            }
+        }
+        if let Some(&sym_id) = symbol_node_ids.get(&(clean_name.to_string(), target_file)) {
+            edges.push(EdgeRow {
+                source_id: file_node_id,
+                target_id: sym_id,
+                kind: "imports-type".to_string(),
+                confidence: 1.0,
+                dynamic: 0,
+            });
+        }
+    }
+}
+
+/// For a non-reexport import targeting a barrel file, emit `imports`-like
+/// edges to each ultimate definition file reached through the barrel chain.
+fn emit_barrel_through_rows(
+    edges: &mut Vec<EdgeRow>,
+    file_node_id: i64,
+    imp: &crate::types::Import,
+    resolved_path: &str,
+    edge_kind: &str,
+    ctx: &ImportEdgeContext,
+    file_node_ids: &HashMap<String, i64>,
+) {
+    let is_reexport = imp.reexport.unwrap_or(false);
+    if is_reexport || !ctx.is_barrel_file(resolved_path) {
+        return;
+    }
+    let through_kind = match edge_kind {
+        "imports-type" => "imports-type",
+        "dynamic-imports" => "dynamic-imports",
+        _ => "imports",
+    };
+    let mut resolved_sources: HashSet<String> = HashSet::new();
+    for name in &imp.names {
+        let clean_name = name.strip_prefix("* as ").unwrap_or(name);
+        let mut visited = HashSet::new();
+        let actual_source =
+            match ctx.resolve_barrel_export(resolved_path, clean_name, &mut visited) {
+                Some(s) => s,
+                None => continue,
+            };
+        if actual_source == resolved_path || !resolved_sources.insert(actual_source.clone()) {
+            continue;
+        }
+        if let Some(&actual_id) = file_node_ids.get(&actual_source) {
+            edges.push(EdgeRow {
+                source_id: file_node_id,
+                target_id: actual_id,
+                kind: through_kind.to_string(),
+                confidence: 0.9,
+                dynamic: 0,
+            });
+        }
+    }
+}
+
+/// Emit all edges produced by a single import on a single source file.
+fn emit_edges_for_import(
+    edges: &mut Vec<EdgeRow>,
+    file_node_id: i64,
+    abs_str: &str,
+    imp: &crate::types::Import,
+    is_barrel_only: bool,
+    ctx: &ImportEdgeContext,
+    file_node_ids: &HashMap<String, i64>,
+    symbol_node_ids: &HashMap<(String, String), i64>,
+) {
+    let is_reexport = imp.reexport.unwrap_or(false);
+    if is_barrel_only && !is_reexport {
+        return;
+    }
+    let resolved_path = ctx.get_resolved(abs_str, &imp.source);
+    let target_id = match file_node_ids.get(&resolved_path) {
+        Some(&id) => id,
+        None => return,
+    };
+    let edge_kind = classify_import_kind(imp);
+    edges.push(EdgeRow {
+        source_id: file_node_id,
+        target_id,
+        kind: edge_kind.to_string(),
+        confidence: 1.0,
+        dynamic: 0,
+    });
+    emit_type_only_symbol_rows(edges, file_node_id, imp, &resolved_path, ctx, symbol_node_ids);
+    emit_barrel_through_rows(
+        edges,
+        file_node_id,
+        imp,
+        &resolved_path,
+        edge_kind,
+        ctx,
+        file_node_ids,
+    );
+}
+
 pub fn build_import_edges(conn: &Connection, ctx: &ImportEdgeContext) -> Vec<EdgeRow> {
     let mut edges = Vec::new();
 
-    // Pre-load all file node IDs once. Previously this was N x query_row,
-    // each of which ran a fresh sqlite3_prepare/step/finalize cycle (#1013).
     let file_node_ids = load_file_node_ids(conn);
-    // Only the symbols actually referenced by type-only imports are needed —
-    // skip the lookup entirely when no type-only imports exist (the common
-    // case), and otherwise issue a chunked `(name, file) IN (...)` query so
-    // memory stays bounded even on large monorepos (#1028 review).
     let needed_symbol_pairs = collect_type_only_lookup_pairs(ctx);
     let symbol_node_ids = if needed_symbol_pairs.is_empty() {
         HashMap::new()
@@ -304,92 +432,16 @@ pub fn build_import_edges(conn: &Connection, ctx: &ImportEdgeContext) -> Vec<Edg
         let abs_str = abs_file.to_str().unwrap_or("");
 
         for imp in &symbols.imports {
-            let is_reexport = imp.reexport.unwrap_or(false);
-            // Barrel-only files: only emit reexport edges, skip regular imports
-            if is_barrel_only && !is_reexport {
-                continue;
-            }
-
-            let resolved_path = ctx.get_resolved(abs_str, &imp.source);
-            let target_id = match file_node_ids.get(&resolved_path) {
-                Some(&id) => id,
-                None => continue,
-            };
-
-            let edge_kind = if is_reexport {
-                "reexports"
-            } else if imp.type_only.unwrap_or(false) {
-                "imports-type"
-            } else if imp.dynamic_import.unwrap_or(false) {
-                "dynamic-imports"
-            } else {
-                "imports"
-            };
-
-            edges.push(EdgeRow {
-                source_id: file_node_id,
-                target_id,
-                kind: edge_kind.to_string(),
-                confidence: 1.0,
-                dynamic: 0,
-            });
-
-            // Type-only imports: create symbol-level edges so the target symbols
-            // get fan-in credit and aren't falsely classified as dead code.
-            if imp.type_only.unwrap_or(false) {
-                for name in &imp.names {
-                    let clean_name = name.strip_prefix("* as ").unwrap_or(name);
-                    let mut target_file = resolved_path.clone();
-                    if ctx.is_barrel_file(&resolved_path) {
-                        let mut visited = HashSet::new();
-                        if let Some(actual) = ctx.resolve_barrel_export(&resolved_path, clean_name, &mut visited) {
-                            target_file = actual;
-                        }
-                    }
-                    if let Some(&sym_id) =
-                        symbol_node_ids.get(&(clean_name.to_string(), target_file))
-                    {
-                        edges.push(EdgeRow {
-                            source_id: file_node_id,
-                            target_id: sym_id,
-                            kind: "imports-type".to_string(),
-                            confidence: 1.0,
-                            dynamic: 0,
-                        });
-                    }
-                }
-            }
-
-            // Build barrel-through edges if the target is a barrel file
-            if !is_reexport && ctx.is_barrel_file(&resolved_path) {
-                let mut resolved_sources = HashSet::new();
-                for name in &imp.names {
-                    let clean_name = name.strip_prefix("* as ").unwrap_or(name);
-                    let mut visited = HashSet::new();
-                    if let Some(actual_source) =
-                        ctx.resolve_barrel_export(&resolved_path, clean_name, &mut visited)
-                    {
-                        if actual_source != resolved_path
-                            && resolved_sources.insert(actual_source.clone())
-                        {
-                            if let Some(&actual_id) = file_node_ids.get(&actual_source) {
-                                let through_kind = match edge_kind {
-                                    "imports-type" => "imports-type",
-                                    "dynamic-imports" => "dynamic-imports",
-                                    _ => "imports",
-                                };
-                                edges.push(EdgeRow {
-                                    source_id: file_node_id,
-                                    target_id: actual_id,
-                                    kind: through_kind.to_string(),
-                                    confidence: 0.9,
-                                    dynamic: 0,
-                                });
-                            }
-                        }
-                    }
-                }
-            }
+            emit_edges_for_import(
+                &mut edges,
+                file_node_id,
+                abs_str,
+                imp,
+                is_barrel_only,
+                ctx,
+                &file_node_ids,
+                &symbol_node_ids,
+            );
         }
     }
 
diff --git a/crates/codegraph-core/src/import_resolution.rs b/crates/codegraph-core/src/import_resolution.rs
index 67d63137d..c701d9c4a 100644
--- a/crates/codegraph-core/src/import_resolution.rs
+++ b/crates/codegraph-core/src/import_resolution.rs
@@ -134,53 +134,59 @@ pub fn resolve_import_path(
 }
 
 /// Inner implementation with optional known_files cache.
-fn resolve_import_path_inner(
-    from_file: &str,
+/// Convert an absolute path candidate into a root-relative, normalized
+/// path string. Used as the success exit of every probe in
+/// `resolve_import_path_inner`.
+fn relativize_to_root(candidate: &str, root_dir: &str) -> String {
+    let root = Path::new(root_dir);
+    if let Ok(rel) = Path::new(candidate).strip_prefix(root) {
+        normalize_path(&rel.display().to_string())
+    } else {
+        normalize_path(candidate)
+    }
+}
+
+/// Resolve a non-relative (alias or bare) import source. Returns the
+/// resolved path or the raw source if no alias matches (bare specifier).
+fn resolve_non_relative_import(
     import_source: &str,
     root_dir: &str,
     aliases: &PathAliases,
     known_files: Option<&HashSet<String>>,
 ) -> String {
-    // Try alias resolution for non-relative imports
-    if !import_source.starts_with('.') {
-        if let Some(alias_resolved) =
-            resolve_via_alias(import_source, aliases, root_dir, known_files)
-        {
-            let root = Path::new(root_dir);
-            if let Ok(rel) = Path::new(&alias_resolved).strip_prefix(root) {
-                return normalize_path(&rel.display().to_string());
-            }
-            return normalize_path(&alias_resolved);
-        }
-        // Bare specifier (e.g., "lodash") — return as-is
-        return import_source.to_string();
+    if let Some(alias_resolved) = resolve_via_alias(import_source, aliases, root_dir, known_files) {
+        return relativize_to_root(&alias_resolved, root_dir);
     }
+    import_source.to_string()
+}
 
-    // Relative import — normalize immediately to remove `.` / `..` segments
-    let dir = Path::new(from_file).parent().unwrap_or(Path::new(""));
-    let resolved = clean_path(&dir.join(import_source));
-    let resolved_str = resolved.display().to_string().replace('\\', "/");
-
-    // .js → .ts remap
-    if resolved_str.ends_with(".js") {
-        let ts_candidate = resolved_str.replace(".js", ".ts");
-        if file_exists(&ts_candidate, known_files, root_dir) {
-            let root = Path::new(root_dir);
-            if let Ok(rel) = Path::new(&ts_candidate).strip_prefix(root) {
-                return normalize_path(&rel.display().to_string());
-            }
-        }
-        let tsx_candidate = resolved_str.replace(".js", ".tsx");
-        if file_exists(&tsx_candidate, known_files, root_dir) {
-            let root = Path::new(root_dir);
-            if let Ok(rel) = Path::new(&tsx_candidate).strip_prefix(root) {
-                return normalize_path(&rel.display().to_string());
-            }
+/// Probe the `.js → .ts/.tsx` remap candidates and return the first
+/// existing file's relative path, if any.
+fn probe_js_to_ts_remap(
+    resolved_str: &str,
+    root_dir: &str,
+    known_files: Option<&HashSet<String>>,
+) -> Option<String> {
+    if !resolved_str.ends_with(".js") {
+        return None;
+    }
+    for replacement in [".ts", ".tsx"] {
+        let candidate = resolved_str.replace(".js", replacement);
+        if file_exists(&candidate, known_files, root_dir) {
+            return Some(relativize_to_root(&candidate, root_dir));
         }
     }
+    None
+}
 
-    // Extension probing
-    let extensions = [
+/// Probe known extensions (TS/JS/Python plus index files) for an existing
+/// match against the normalized relative path stem.
+fn probe_known_extensions(
+    resolved_str: &str,
+    root_dir: &str,
+    known_files: Option<&HashSet<String>>,
+) -> Option<String> {
+    const EXTENSIONS: &[&str] = &[
         ".ts",
         ".tsx",
         ".js",
@@ -193,31 +199,40 @@ fn resolve_import_path_inner(
         "/index.js",
         "/__init__.py",
     ];
-    for ext in &extensions {
-        let candidate = format!("{}{}", resolved_str, ext);
+    for ext in EXTENSIONS {
+        let candidate = format!("{resolved_str}{ext}");
         if file_exists(&candidate, known_files, root_dir) {
-            let root = Path::new(root_dir);
-            if let Ok(rel) = Path::new(&candidate).strip_prefix(root) {
-                return normalize_path(&rel.display().to_string());
-            }
+            return Some(relativize_to_root(&candidate, root_dir));
         }
     }
+    None
+}
 
-    // Exact match
-    if file_exists(&resolved_str, known_files, root_dir) {
-        let root = Path::new(root_dir);
-        if let Ok(rel) = Path::new(&resolved_str).strip_prefix(root) {
-            return normalize_path(&rel.display().to_string());
-        }
+fn resolve_import_path_inner(
+    from_file: &str,
+    import_source: &str,
+    root_dir: &str,
+    aliases: &PathAliases,
+    known_files: Option<&HashSet<String>>,
+) -> String {
+    if !import_source.starts_with('.') {
+        return resolve_non_relative_import(import_source, root_dir, aliases, known_files);
     }
 
-    // Fallback: return relative path
-    let root = Path::new(root_dir);
-    if let Ok(rel) = resolved.strip_prefix(root) {
-        normalize_path(&rel.display().to_string())
-    } else {
-        normalize_path(&resolved_str)
+    let dir = Path::new(from_file).parent().unwrap_or(Path::new(""));
+    let resolved = clean_path(&dir.join(import_source));
+    let resolved_str = resolved.display().to_string().replace('\\', "/");
+
+    if let Some(p) = probe_js_to_ts_remap(&resolved_str, root_dir, known_files) {
+        return p;
+    }
+    if let Some(p) = probe_known_extensions(&resolved_str, root_dir, known_files) {
+        return p;
+    }
+    if file_exists(&resolved_str, known_files, root_dir) {
+        return relativize_to_root(&resolved_str, root_dir);
     }
+    relativize_to_root(&resolved.display().to_string().replace('\\', "/"), root_dir)
 }
 
 /// Compute proximity-based confidence for call resolution.
diff --git a/crates/codegraph-core/src/read_queries.rs b/crates/codegraph-core/src/read_queries.rs
index 405feacc2..11d03b2c1 100644
--- a/crates/codegraph-core/src/read_queries.rs
+++ b/crates/codegraph-core/src/read_queries.rs
@@ -112,6 +112,706 @@ const VALID_ROLES: &[&str] = &[
     "dead-unresolved",
 ];
 
+// ── fn_deps internal types ──────────────────────────────────────────────
+
+/// Matched candidate node from the initial relevance ranking step of `fn_deps`.
+struct FnDepsMatchedNode {
+    id: i32,
+    name: String,
+    kind: String,
+    file: String,
+    line: Option<i32>,
+    end_line: Option<i32>,
+    role: Option<String>,
+    fan_in: i32,
+}
+
+/// Caller node with id retained for BFS reuse. Differs from the public
+/// `FnDepsCallerNode` which strips the id from the output.
+struct FnDepsCallerWithId {
+    id: i32,
+    name: String,
+    kind: String,
+    file: String,
+    line: Option<i32>,
+    via_hierarchy: Option<String>,
+}
+
+// ── fn_deps helpers ─────────────────────────────────────────────────────
+
+/// Build the SQL + params for fn_deps' initial candidate-node lookup.
+fn build_fn_deps_match_query(
+    name: &str,
+    kind: Option<&str>,
+    file: Option<&str>,
+) -> (String, Vec<Box<dyn rusqlite::types::ToSql>>) {
+    let default_kinds: Vec<String> = vec![
+        "function".to_string(),
+        "method".to_string(),
+        "class".to_string(),
+        "constant".to_string(),
+    ];
+    let kinds: Vec<String> = match kind {
+        Some(k) => vec![k.to_string()],
+        None => default_kinds,
+    };
+
+    let mut sql = String::from(
+        "SELECT n.id, n.name, n.kind, n.file, n.line, n.end_line, n.role, \
+         COALESCE(fi.cnt, 0) AS fan_in \
+         FROM nodes n \
+         LEFT JOIN (SELECT target_id, COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id) fi \
+         ON fi.target_id = n.id \
+         WHERE n.name LIKE ?1",
+    );
+    let mut params_v: Vec<Box<dyn rusqlite::types::ToSql>> = vec![Box::new(format!("%{name}%"))];
+    let mut idx = 2;
+
+    if !kinds.is_empty() {
+        let placeholders: Vec<String> = kinds
+            .iter()
+            .enumerate()
+            .map(|(i, _)| format!("?{}", idx + i))
+            .collect();
+        sql.push_str(&format!(" AND n.kind IN ({})", placeholders.join(", ")));
+        for k in &kinds {
+            params_v.push(Box::new(k.clone()));
+        }
+        idx += kinds.len();
+    }
+    if let Some(f) = file {
+        sql.push_str(&format!(" AND n.file LIKE ?{idx} ESCAPE '\\'"));
+        params_v.push(Box::new(format!("%{}%", escape_like(f))));
+    }
+
+    (sql, params_v)
+}
+
+/// Score a matched node by relevance to the user query. Mirrors the JS
+/// `findMatchingNodes` ranking in `domain/queries.ts`.
+fn fn_deps_relevance_score(node: &FnDepsMatchedNode, lower_query: &str) -> f64 {
+    let lower_name = node.name.to_lowercase();
+    let bare_name = lower_name.rsplit('.').next().unwrap_or(&lower_name);
+    let match_score = if lower_name == lower_query || bare_name == lower_query {
+        100.0
+    } else if lower_name.starts_with(lower_query) || bare_name.starts_with(lower_query) {
+        60.0
+    } else if lower_name.contains(&format!(".{lower_query}"))
+        || lower_name.contains(&format!("{lower_query}."))
+    {
+        40.0
+    } else {
+        10.0
+    };
+    let fan_in_bonus = ((node.fan_in as f64 + 1.0).log2() * 5.0).min(25.0);
+    match_score + fan_in_bonus
+}
+
+/// Fetch the direct callees of a node (other nodes called by `node_id`).
+fn fetch_fn_deps_callees(
+    conn: &rusqlite::Connection,
+    node_id: i32,
+    no_tests: bool,
+) -> napi::Result<Vec<FnDepsNode>> {
+    let mut stmt = conn
+        .prepare_cached(
+            "SELECT DISTINCT n.id, n.name, n.kind, n.file, n.line \
+             FROM edges e JOIN nodes n ON e.target_id = n.id \
+             WHERE e.source_id = ?1 AND e.kind = 'calls'",
+        )
+        .map_err(|e| napi::Error::from_reason(format!("fn_deps callees prepare: {e}")))?;
+    let rows = stmt
+        .query_map(params![node_id], |row| {
+            Ok(FnDepsNode {
+                name: row.get("name")?,
+                kind: row.get("kind")?,
+                file: row.get("file")?,
+                line: row.get("line")?,
+            })
+        })
+        .map_err(|e| napi::Error::from_reason(format!("fn_deps callees: {e}")))?;
+    let mut out: Vec<FnDepsNode> = rows
+        .collect::<Result<Vec<_>, _>>()
+        .map_err(|e| napi::Error::from_reason(format!("fn_deps callees collect: {e}")))?;
+    if no_tests {
+        out.retain(|c| !is_test_file(&c.file));
+    }
+    Ok(out)
+}
+
+/// Fetch the direct callers of a node. Retains `id` for BFS reuse.
+fn fetch_fn_deps_direct_callers(
+    conn: &rusqlite::Connection,
+    node_id: i32,
+) -> napi::Result<Vec<FnDepsCallerWithId>> {
+    let mut stmt = conn
+        .prepare_cached(
+            "SELECT n.id, n.name, n.kind, n.file, n.line \
+             FROM edges e JOIN nodes n ON e.source_id = n.id \
+             WHERE e.target_id = ?1 AND e.kind = 'calls'",
+        )
+        .map_err(|e| napi::Error::from_reason(format!("fn_deps callers prepare: {e}")))?;
+    let rows = stmt
+        .query_map(params![node_id], |row| {
+            Ok(FnDepsCallerWithId {
+                id: row.get("id")?,
+                name: row.get("name")?,
+                kind: row.get("kind")?,
+                file: row.get("file")?,
+                line: row.get("line")?,
+                via_hierarchy: None,
+            })
+        })
+        .map_err(|e| napi::Error::from_reason(format!("fn_deps callers: {e}")))?;
+    rows.collect::<Result<Vec<_>, _>>()
+        .map_err(|e| napi::Error::from_reason(format!("fn_deps callers collect: {e}")))
+}
+
+/// For a method node `Cls.foo`, expand callers via method-hierarchy resolution:
+/// other classes that also define a method named `foo` and the callers of those
+/// hierarchy peers. Appends to the supplied `callers` vector. Mirrors the JS
+/// hierarchy expansion in `domain/queries.ts::findMethodHierarchyCallers`.
+fn expand_method_hierarchy_callers(
+    conn: &rusqlite::Connection,
+    node: &FnDepsMatchedNode,
+    callers: &mut Vec<FnDepsCallerWithId>,
+) -> napi::Result<()> {
+    if node.kind != "method" || !node.name.contains('.') {
+        return Ok(());
+    }
+    let method_name = match node.name.split('.').last() {
+        Some(n) => n,
+        None => return Ok(()),
+    };
+    let pattern = format!("%.{method_name}");
+    let related: Vec<(i32, String)> = {
+        let mut stmt = conn
+            .prepare_cached(
+                "SELECT n.id, n.name FROM nodes n \
+                 LEFT JOIN (SELECT target_id, COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id) fi \
+                 ON fi.target_id = n.id \
+                 WHERE n.name LIKE ?1 AND n.kind = 'method'",
+            )
+            .map_err(|e| napi::Error::from_reason(format!("fn_deps hierarchy prepare: {e}")))?;
+        let rows = stmt
+            .query_map(params![pattern], |row| {
+                Ok((row.get::<_, i32>("id")?, row.get::<_, String>("name")?))
+            })
+            .map_err(|e| napi::Error::from_reason(format!("fn_deps hierarchy: {e}")))?;
+        rows.collect::<Result<Vec<_>, _>>()
+            .map_err(|e| napi::Error::from_reason(format!("fn_deps hierarchy collect: {e}")))?
+    };
+    for (rm_id, rm_name) in &related {
+        if *rm_id == node.id {
+            continue;
+        }
+        let mut stmt = conn
+            .prepare_cached(
+                "SELECT n.id, n.name, n.kind, n.file, n.line \
+                 FROM edges e JOIN nodes n ON e.source_id = n.id \
+                 WHERE e.target_id = ?1 AND e.kind = 'calls'",
+            )
+            .map_err(|e| {
+                napi::Error::from_reason(format!("fn_deps hierarchy callers prepare: {e}"))
+            })?;
+        let rows = stmt
+            .query_map(params![rm_id], |row| {
+                Ok(FnDepsCallerWithId {
+                    id: row.get("id")?,
+                    name: row.get("name")?,
+                    kind: row.get("kind")?,
+                    file: row.get("file")?,
+                    line: row.get("line")?,
+                    via_hierarchy: Some(rm_name.clone()),
+                })
+            })
+            .map_err(|e| napi::Error::from_reason(format!("fn_deps hierarchy callers: {e}")))?;
+        let extra: Vec<FnDepsCallerWithId> = rows.collect::<Result<Vec<_>, _>>().map_err(|e| {
+            napi::Error::from_reason(format!("fn_deps hierarchy callers collect: {e}"))
+        })?;
+        callers.extend(extra);
+    }
+    Ok(())
+}
+
+/// BFS over caller chains starting from `initial_frontier` up to `depth`
+/// hops. Returns transitive caller groups, one per depth level. Mirrors the
+/// JS `bfsTransitiveCallers` helper in `domain/queries.ts`.
+fn bfs_transitive_callers(
+    conn: &rusqlite::Connection,
+    node_id: i32,
+    initial_frontier: Vec<FnDepsCallerWithId>,
+    depth: usize,
+    no_tests: bool,
+) -> napi::Result<Vec<FnDepsTransitiveGroup>> {
+    if depth <= 1 {
+        return Ok(Vec::new());
+    }
+    let mut visited: HashSet<i32> = HashSet::new();
+    visited.insert(node_id);
+    let mut frontier: Vec<FnDepsCallerWithId> = initial_frontier;
+    let mut groups: Vec<FnDepsTransitiveGroup> = Vec::new();
+
+    for d in 2..=depth {
+        let unvisited: Vec<&FnDepsCallerWithId> =
+            frontier.iter().filter(|f| !visited.contains(&f.id)).collect();
+        for f in &unvisited {
+            visited.insert(f.id);
+        }
+        if unvisited.is_empty() {
+            break;
+        }
+        let mut next_frontier: Vec<FnDepsCallerWithId> = Vec::new();
+        let mut next_ids: HashSet<i32> = HashSet::new();
+        for f in &unvisited {
+            let mut stmt = conn
+                .prepare_cached(
+                    "SELECT n.id, n.name, n.kind, n.file, n.line \
+                     FROM edges e JOIN nodes n ON e.source_id = n.id \
+                     WHERE e.target_id = ?1 AND e.kind = 'calls'",
+                )
+                .map_err(|e| napi::Error::from_reason(format!("fn_deps bfs prepare: {e}")))?;
+            let rows = stmt
+                .query_map(params![f.id], |row| {
+                    Ok(FnDepsCallerWithId {
+                        id: row.get("id")?,
+                        name: row.get("name")?,
+                        kind: row.get("kind")?,
+                        file: row.get("file")?,
+                        line: row.get("line")?,
+                        via_hierarchy: None,
+                    })
+                })
+                .map_err(|e| napi::Error::from_reason(format!("fn_deps bfs: {e}")))?;
+            let upstream: Vec<FnDepsCallerWithId> = rows
+                .collect::<Result<Vec<_>, _>>()
+                .map_err(|e| napi::Error::from_reason(format!("fn_deps bfs collect: {e}")))?;
+            for u in upstream {
+                if no_tests && is_test_file(&u.file) {
+                    continue;
+                }
+                if !visited.contains(&u.id) && !next_ids.contains(&u.id) {
+                    next_ids.insert(u.id);
+                    next_frontier.push(u);
+                }
+            }
+        }
+        if !next_frontier.is_empty() {
+            groups.push(FnDepsTransitiveGroup {
+                depth: d as i32,
+                callers: next_frontier
+                    .iter()
+                    .map(|n| FnDepsNode {
+                        name: n.name.clone(),
+                        kind: n.kind.clone(),
+                        file: n.file.clone(),
+                        line: n.line,
+                    })
+                    .collect(),
+            });
+        }
+        frontier = next_frontier;
+    }
+    Ok(groups)
+}
+
+/// Cached file-hash lookup: probes `file_hashes` for `file` and memoizes the
+/// result in `cache` so repeated lookups in the same `fn_deps` call avoid
+/// redundant prepared-statement execution.
+fn fn_deps_cached_file_hash(
+    conn: &rusqlite::Connection,
+    cache: &mut HashMap<String, Option<String>>,
+    file: &str,
+) -> Option<String> {
+    if let Some(v) = cache.get(file) {
+        return v.clone();
+    }
+    let hash: Option<String> = conn
+        .prepare_cached("SELECT hash FROM file_hashes WHERE file = ?1")
+        .ok()
+        .and_then(|mut stmt| stmt.query_row(params![file], |row| row.get(0)).ok());
+    cache.insert(file.to_string(), hash.clone());
+    hash
+}
+
+// ── get_graph_stats helpers ─────────────────────────────────────────────
+
+fn fetch_nodes_by_kind(
+    conn: &rusqlite::Connection,
+    no_tests_filter: &str,
+) -> napi::Result<Vec<KindCount>> {
+    let sql = format!(
+        "SELECT kind, COUNT(*) as c FROM nodes WHERE 1=1 {no_tests_filter} GROUP BY kind",
+    );
+    let mut stmt = conn
+        .prepare_cached(&sql)
+        .map_err(|e| napi::Error::from_reason(format!("get_graph_stats nodes_by_kind: {e}")))?;
+    let rows = stmt
+        .query_map([], |row| {
+            Ok(KindCount {
+                kind: row.get::<_, String>(0)?,
+                count: row.get::<_, i32>(1)?,
+            })
+        })
+        .map_err(|e| {
+            napi::Error::from_reason(format!("get_graph_stats nodes_by_kind query: {e}"))
+        })?;
+    rows.collect::<Result<Vec<_>, _>>().map_err(|e| {
+        napi::Error::from_reason(format!("get_graph_stats nodes_by_kind collect: {e}"))
+    })
+}
+
+fn fetch_edges_by_kind(
+    conn: &rusqlite::Connection,
+    no_tests: bool,
+) -> napi::Result<Vec<KindCount>> {
+    let sql = if no_tests {
+        format!(
+            "SELECT e.kind, COUNT(*) as c FROM edges e \
+             JOIN nodes ns ON e.source_id = ns.id \
+             JOIN nodes nt ON e.target_id = nt.id \
+             WHERE 1=1 {} {} GROUP BY e.kind",
+            test_filter_clauses("ns.file"),
+            test_filter_clauses("nt.file"),
+        )
+    } else {
+        "SELECT kind, COUNT(*) as c FROM edges GROUP BY kind".to_string()
+    };
+    let mut stmt = conn
+        .prepare_cached(&sql)
+        .map_err(|e| napi::Error::from_reason(format!("get_graph_stats edges_by_kind: {e}")))?;
+    let rows = stmt
+        .query_map([], |row| {
+            Ok(KindCount {
+                kind: row.get::<_, String>(0)?,
+                count: row.get::<_, i32>(1)?,
+            })
+        })
+        .map_err(|e| {
+            napi::Error::from_reason(format!("get_graph_stats edges_by_kind query: {e}"))
+        })?;
+    rows.collect::<Result<Vec<_>, _>>().map_err(|e| {
+        napi::Error::from_reason(format!("get_graph_stats edges_by_kind collect: {e}"))
+    })
+}
+
+fn fetch_role_counts(
+    conn: &rusqlite::Connection,
+    no_tests_filter: &str,
+) -> napi::Result<Vec<RoleCount>> {
+    let sql = format!(
+        "SELECT role, COUNT(*) as c FROM nodes WHERE role IS NOT NULL {no_tests_filter} GROUP BY role",
+    );
+    let mut stmt = conn
+        .prepare_cached(&sql)
+        .map_err(|e| napi::Error::from_reason(format!("get_graph_stats role_counts: {e}")))?;
+    let rows = stmt
+        .query_map([], |row| {
+            Ok(RoleCount {
+                role: row.get::<_, String>(0)?,
+                count: row.get::<_, i32>(1)?,
+            })
+        })
+        .map_err(|e| napi::Error::from_reason(format!("get_graph_stats role_counts query: {e}")))?;
+    rows.collect::<Result<Vec<_>, _>>().map_err(|e| {
+        napi::Error::from_reason(format!("get_graph_stats role_counts collect: {e}"))
+    })
+}
+
+fn fetch_quality_metrics(
+    conn: &rusqlite::Connection,
+    tf_file: &str,
+    tf_n_file: &str,
+) -> napi::Result<QualityMetrics> {
+    let callable_total: i32 = {
+        let sql = format!(
+            "SELECT COUNT(*) FROM nodes WHERE kind IN ('function', 'method') {tf_file}",
+        );
+        conn.prepare_cached(&sql)
+            .map_err(|e| napi::Error::from_reason(format!("get_graph_stats callable_total: {e}")))?
+            .query_row([], |row| row.get(0))
+            .map_err(|e| {
+                napi::Error::from_reason(format!("get_graph_stats callable_total query: {e}"))
+            })?
+    };
+    let callable_with_callers: i32 = {
+        let sql = format!(
+            "SELECT COUNT(DISTINCT e.target_id) FROM edges e \
+             JOIN nodes n ON e.target_id = n.id \
+             WHERE e.kind = 'calls' AND n.kind IN ('function', 'method') {tf_n_file}",
+        );
+        conn.prepare_cached(&sql)
+            .map_err(|e| {
+                napi::Error::from_reason(format!("get_graph_stats callable_with_callers: {e}"))
+            })?
+            .query_row([], |row| row.get(0))
+            .map_err(|e| {
+                napi::Error::from_reason(format!(
+                    "get_graph_stats callable_with_callers query: {e}"
+                ))
+            })?
+    };
+    let call_edges: i32 = conn
+        .prepare_cached("SELECT COUNT(*) FROM edges WHERE kind = 'calls'")
+        .map_err(|e| napi::Error::from_reason(format!("get_graph_stats call_edges: {e}")))?
+        .query_row([], |row| row.get(0))
+        .map_err(|e| {
+            napi::Error::from_reason(format!("get_graph_stats call_edges query: {e}"))
+        })?;
+    let high_conf_call_edges: i32 = conn
+        .prepare_cached("SELECT COUNT(*) FROM edges WHERE kind = 'calls' AND confidence >= 0.7")
+        .map_err(|e| napi::Error::from_reason(format!("get_graph_stats high_conf: {e}")))?
+        .query_row([], |row| row.get(0))
+        .map_err(|e| {
+            napi::Error::from_reason(format!("get_graph_stats high_conf query: {e}"))
+        })?;
+    Ok(QualityMetrics {
+        callable_total,
+        callable_with_callers,
+        call_edges,
+        high_conf_call_edges,
+    })
+}
+
+fn fetch_file_hotspots(
+    conn: &rusqlite::Connection,
+    tf_n_file: &str,
+) -> napi::Result<Vec<FileHotspot>> {
+    let sql = format!(
+        "SELECT n.file, \
+         (SELECT COUNT(*) FROM edges WHERE target_id = n.id) as fan_in, \
+         (SELECT COUNT(*) FROM edges WHERE source_id = n.id) as fan_out \
+         FROM nodes n WHERE n.kind = 'file' {tf_n_file} \
+         ORDER BY (SELECT COUNT(*) FROM edges WHERE target_id = n.id) \
+                + (SELECT COUNT(*) FROM edges WHERE source_id = n.id) DESC \
+         LIMIT 5",
+    );
+    let mut stmt = conn
+        .prepare_cached(&sql)
+        .map_err(|e| napi::Error::from_reason(format!("get_graph_stats hotspots: {e}")))?;
+    let rows = stmt
+        .query_map([], |row| {
+            Ok(FileHotspot {
+                file: row.get(0)?,
+                fan_in: row.get(1)?,
+                fan_out: row.get(2)?,
+            })
+        })
+        .map_err(|e| napi::Error::from_reason(format!("get_graph_stats hotspots query: {e}")))?;
+    rows.collect::<Result<Vec<_>, _>>()
+        .map_err(|e| napi::Error::from_reason(format!("get_graph_stats hotspots collect: {e}")))
+}
+
+fn fetch_complexity_summary(
+    conn: &rusqlite::Connection,
+    tf_n_file: &str,
+) -> napi::Result<Option<ComplexitySummary>> {
+    if !has_table(conn, "function_complexity") {
+        return Ok(None);
+    }
+    let sql = format!(
+        "SELECT fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.maintainability_index \
+         FROM function_complexity fc JOIN nodes n ON fc.node_id = n.id \
+         WHERE n.kind IN ('function','method') {tf_n_file}",
+    );
+    let mut stmt = conn
+        .prepare_cached(&sql)
+        .map_err(|e| napi::Error::from_reason(format!("get_graph_stats complexity: {e}")))?;
+    let rows = stmt
+        .query_map([], |row| {
+            Ok((
+                row.get::<_, i32>(0)?,
+                row.get::<_, i32>(1)?,
+                row.get::<_, i32>(2)?,
+                row.get::<_, f64>(3).unwrap_or(0.0),
+            ))
+        })
+        .map_err(|e| {
+            napi::Error::from_reason(format!("get_graph_stats complexity query: {e}"))
+        })?;
+    let data: Vec<(i32, i32, i32, f64)> = rows.collect::<Result<Vec<_>, _>>().map_err(|e| {
+        napi::Error::from_reason(format!("get_graph_stats complexity collect: {e}"))
+    })?;
+    if data.is_empty() {
+        return Ok(None);
+    }
+    let n = data.len() as f64;
+    let sum_cog: i32 = data.iter().map(|d| d.0).sum();
+    let sum_cyc: i32 = data.iter().map(|d| d.1).sum();
+    let max_cog = data.iter().map(|d| d.0).max().unwrap_or(0);
+    let max_cyc = data.iter().map(|d| d.1).max().unwrap_or(0);
+    let sum_mi: f64 = data.iter().map(|d| d.3).sum();
+    let min_mi = data.iter().map(|d| d.3).fold(f64::INFINITY, f64::min);
+    Ok(Some(ComplexitySummary {
+        analyzed: data.len() as i32,
+        avg_cognitive: (sum_cog as f64 / n * 10.0).round() / 10.0,
+        avg_cyclomatic: (sum_cyc as f64 / n * 10.0).round() / 10.0,
+        max_cognitive: max_cog,
+        max_cyclomatic: max_cyc,
+        avg_mi: (sum_mi / n * 10.0).round() / 10.0,
+        min_mi: (min_mi * 10.0).round() / 10.0,
+    }))
+}
+
+// ── find_nodes_for_triage helpers ───────────────────────────────────────
+
+fn validate_triage_kind(kind: Option<&str>) -> napi::Result<()> {
+    if let Some(k) = kind {
+        if !EVERY_SYMBOL_KIND.contains(&k) {
+            return Err(napi::Error::from_reason(format!(
+                "Invalid kind: {k} (expected one of {})",
+                EVERY_SYMBOL_KIND.join(", ")
+            )));
+        }
+    }
+    Ok(())
+}
+
+fn validate_triage_role(role: Option<&str>) -> napi::Result<()> {
+    if let Some(r) = role {
+        if !VALID_ROLES.contains(&r) {
+            return Err(napi::Error::from_reason(format!(
+                "Invalid role: {r} (expected one of {})",
+                VALID_ROLES.join(", ")
+            )));
+        }
+    }
+    Ok(())
+}
+
+fn build_triage_query(
+    kind: Option<&str>,
+    role: Option<&str>,
+    file: Option<&str>,
+    no_tests: bool,
+) -> (String, Vec<Box<dyn rusqlite::types::ToSql>>) {
+    let kinds_to_use: Vec<&str> = match kind {
+        Some(k) => vec![k],
+        None => vec!["function", "method", "class"],
+    };
+    let kind_placeholders: Vec<String> = kinds_to_use
+        .iter()
+        .enumerate()
+        .map(|(i, _)| format!("?{}", i + 1))
+        .collect();
+
+    let mut sql = format!(
+        "SELECT n.id, n.name, n.kind, n.file, n.line, n.end_line, \
+                n.parent_id, n.exported, n.qualified_name, n.scope, n.visibility, n.role, \
+                COALESCE(fi.cnt, 0) AS fan_in, \
+                COALESCE(fc.cognitive, 0) AS cognitive, \
+                COALESCE(fc.maintainability_index, 0) AS mi, \
+                COALESCE(fc.cyclomatic, 0) AS cyclomatic, \
+                COALESCE(fc.max_nesting, 0) AS max_nesting, \
+                COALESCE(fcc.commit_count, 0) AS churn \
+         FROM nodes n \
+         LEFT JOIN (SELECT target_id, COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id) fi ON fi.target_id = n.id \
+         LEFT JOIN function_complexity fc ON fc.node_id = n.id \
+         LEFT JOIN file_commit_counts fcc ON n.file = fcc.file \
+         WHERE n.kind IN ({kinds})",
+        kinds = kind_placeholders.join(", "),
+    );
+
+    let mut param_values: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
+    for k in &kinds_to_use {
+        param_values.push(Box::new(k.to_string()));
+    }
+    let mut idx = kinds_to_use.len() + 1;
+
+    if no_tests {
+        sql.push_str(&format!(" {}", test_filter_clauses("n.file")));
+    }
+    if let Some(f) = file {
+        sql.push_str(&format!(" AND n.file LIKE ?{idx} ESCAPE '\\'"));
+        param_values.push(Box::new(format!("%{}%", escape_like(f))));
+        idx += 1;
+    }
+    if let Some(r) = role {
+        if r == "dead" {
+            sql.push_str(&format!(" AND n.role LIKE ?{idx}"));
+            param_values.push(Box::new("dead%".to_string()));
+        } else {
+            sql.push_str(&format!(" AND n.role = ?{idx}"));
+            param_values.push(Box::new(r.to_string()));
+        }
+    }
+    sql.push_str(" ORDER BY n.file, n.line");
+    (sql, param_values)
+}
+
+fn read_triage_row(row: &rusqlite::Row) -> rusqlite::Result<NativeTriageNodeRow> {
+    Ok(NativeTriageNodeRow {
+        id: row.get("id")?,
+        name: row.get("name")?,
+        kind: row.get("kind")?,
+        file: row.get("file")?,
+        line: row.get("line")?,
+        end_line: row.get("end_line")?,
+        parent_id: row.get("parent_id")?,
+        exported: row.get("exported")?,
+        qualified_name: row.get("qualified_name")?,
+        scope: row.get("scope")?,
+        visibility: row.get("visibility")?,
+        role: row.get("role")?,
+        fan_in: row.get("fan_in")?,
+        cognitive: row.get("cognitive")?,
+        mi: row.get("mi")?,
+        cyclomatic: row.get("cyclomatic")?,
+        max_nesting: row.get("max_nesting")?,
+        churn: row.get("churn")?,
+    })
+}
+
+fn fetch_embedding_info(conn: &rusqlite::Connection) -> napi::Result<Option<EmbeddingInfo>> {
+    if !has_table(conn, "embeddings") {
+        return Ok(None);
+    }
+    let count: i32 = conn
+        .prepare_cached("SELECT COUNT(*) FROM embeddings")
+        .map_err(|e| napi::Error::from_reason(format!("get_graph_stats embeddings: {e}")))?
+        .query_row([], |row| row.get(0))
+        .unwrap_or(0);
+    if count == 0 {
+        return Ok(None);
+    }
+    if !has_table(conn, "embedding_meta") {
+        return Ok(Some(EmbeddingInfo {
+            count,
+            model: None,
+            dim: None,
+            built_at: None,
+        }));
+    }
+    let mut model: Option<String> = None;
+    let mut dim: Option<i32> = None;
+    let mut built_at: Option<String> = None;
+    let mut stmt = conn
+        .prepare_cached("SELECT key, value FROM embedding_meta")
+        .map_err(|e| napi::Error::from_reason(format!("get_graph_stats embedding_meta: {e}")))?;
+    let rows = stmt
+        .query_map([], |row| {
+            Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
+        })
+        .map_err(|e| {
+            napi::Error::from_reason(format!("get_graph_stats embedding_meta query: {e}"))
+        })?;
+    for row in rows.flatten() {
+        let (k, v) = row;
+        match k.as_str() {
+            "model" => model = Some(v),
+            "dim" => dim = v.parse().ok(),
+            "built_at" => built_at = Some(v),
+            _ => {}
+        }
+    }
+    Ok(Some(EmbeddingInfo {
+        count,
+        model,
+        dim,
+        built_at,
+    }))
+}
+
 // ── Query Methods ───────────────────────────────────────────────────────
 
 #[napi]
@@ -456,116 +1156,27 @@ impl NativeDatabase {
         file: Option<String>,
         no_tests: Option<bool>,
     ) -> napi::Result<Vec<NativeTriageNodeRow>> {
-        // Validate kind
-        if let Some(ref k) = kind {
-            if !EVERY_SYMBOL_KIND.contains(&k.as_str()) {
-                return Err(napi::Error::from_reason(format!(
-                    "Invalid kind: {k} (expected one of {})",
-                    EVERY_SYMBOL_KIND.join(", ")
-                )));
-            }
-        }
-        // Validate role
-        if let Some(ref r) = role {
-            if !VALID_ROLES.contains(&r.as_str()) {
-                return Err(napi::Error::from_reason(format!(
-                    "Invalid role: {r} (expected one of {})",
-                    VALID_ROLES.join(", ")
-                )));
-            }
-        }
+        validate_triage_kind(kind.as_deref())?;
+        validate_triage_role(role.as_deref())?;
 
         let conn = self.conn()?;
-
-        let kinds_to_use: Vec<&str> = match kind {
-            Some(ref k) => vec![k.as_str()],
-            None => vec!["function", "method", "class"],
-        };
-        let kind_placeholders: Vec<String> = kinds_to_use
-            .iter()
-            .enumerate()
-            .map(|(i, _)| format!("?{}", i + 1))
-            .collect();
-
-        let mut sql = format!(
-            "SELECT n.id, n.name, n.kind, n.file, n.line, n.end_line, \
-                    n.parent_id, n.exported, n.qualified_name, n.scope, n.visibility, n.role, \
-                    COALESCE(fi.cnt, 0) AS fan_in, \
-                    COALESCE(fc.cognitive, 0) AS cognitive, \
-                    COALESCE(fc.maintainability_index, 0) AS mi, \
-                    COALESCE(fc.cyclomatic, 0) AS cyclomatic, \
-                    COALESCE(fc.max_nesting, 0) AS max_nesting, \
-                    COALESCE(fcc.commit_count, 0) AS churn \
-             FROM nodes n \
-             LEFT JOIN (SELECT target_id, COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id) fi ON fi.target_id = n.id \
-             LEFT JOIN function_complexity fc ON fc.node_id = n.id \
-             LEFT JOIN file_commit_counts fcc ON n.file = fcc.file \
-             WHERE n.kind IN ({kinds})",
-            kinds = kind_placeholders.join(", "),
+        let (sql, param_values) = build_triage_query(
+            kind.as_deref(),
+            role.as_deref(),
+            file.as_deref(),
+            no_tests.unwrap_or(false),
         );
 
-        let mut param_values: Vec<Box<dyn rusqlite::types::ToSql>> = Vec::new();
-        for k in &kinds_to_use {
-            param_values.push(Box::new(k.to_string()));
-        }
-        let mut idx = kinds_to_use.len() + 1;
-
-        if no_tests.unwrap_or(false) {
-            sql.push_str(&format!(" {}", test_filter_clauses("n.file")));
-        }
-        if let Some(ref f) = file {
-            sql.push_str(&format!(" AND n.file LIKE ?{idx} ESCAPE '\\'"));
-            param_values.push(Box::new(format!("%{}%", escape_like(f))));
-            idx += 1;
-        }
-        if let Some(ref r) = role {
-            if r == "dead" {
-                sql.push_str(&format!(" AND n.role LIKE ?{idx}"));
-                param_values.push(Box::new("dead%".to_string()));
-            } else {
-                sql.push_str(&format!(" AND n.role = ?{idx}"));
-                param_values.push(Box::new(r.clone()));
-            }
-        }
-        sql.push_str(" ORDER BY n.file, n.line");
-
-        let mut stmt = conn
-            .prepare_cached(&sql)
-            .map_err(|e| {
-                napi::Error::from_reason(format!("find_nodes_for_triage prepare: {e}"))
-            })?;
+        let mut stmt = conn.prepare_cached(&sql).map_err(|e| {
+            napi::Error::from_reason(format!("find_nodes_for_triage prepare: {e}"))
+        })?;
         let params_ref: Vec<&dyn rusqlite::types::ToSql> =
             param_values.iter().map(|p| p.as_ref()).collect();
         let rows = stmt
-            .query_map(params_ref.as_slice(), |row| {
-                Ok(NativeTriageNodeRow {
-                    id: row.get("id")?,
-                    name: row.get("name")?,
-                    kind: row.get("kind")?,
-                    file: row.get("file")?,
-                    line: row.get("line")?,
-                    end_line: row.get("end_line")?,
-                    parent_id: row.get("parent_id")?,
-                    exported: row.get("exported")?,
-                    qualified_name: row.get("qualified_name")?,
-                    scope: row.get("scope")?,
-                    visibility: row.get("visibility")?,
-                    role: row.get("role")?,
-                    fan_in: row.get("fan_in")?,
-                    cognitive: row.get("cognitive")?,
-                    mi: row.get("mi")?,
-                    cyclomatic: row.get("cyclomatic")?,
-                    max_nesting: row.get("max_nesting")?,
-                    churn: row.get("churn")?,
-                })
-            })
-            .map_err(|e| {
-                napi::Error::from_reason(format!("find_nodes_for_triage: {e}"))
-            })?;
+            .query_map(params_ref.as_slice(), read_triage_row)
+            .map_err(|e| napi::Error::from_reason(format!("find_nodes_for_triage: {e}")))?;
         rows.collect::<Result<Vec<_>, _>>()
-            .map_err(|e| {
-                napi::Error::from_reason(format!("find_nodes_for_triage collect: {e}"))
-            })
+            .map_err(|e| napi::Error::from_reason(format!("find_nodes_for_triage collect: {e}")))
     }
 
     /// List function/method/class nodes.
@@ -1293,221 +1904,20 @@ impl NativeDatabase {
     #[napi]
     pub fn get_graph_stats(&self, no_tests: bool) -> napi::Result<GraphStats> {
         let conn = self.conn()?;
-        let tf = if no_tests {
-            test_filter_clauses("file")
-        } else {
-            String::new()
-        };
-        let tf_n = if no_tests {
-            test_filter_clauses("n.file")
-        } else {
-            String::new()
-        };
+        let tf = if no_tests { test_filter_clauses("file") } else { String::new() };
+        let tf_n = if no_tests { test_filter_clauses("n.file") } else { String::new() };
 
-        // ── Node counts by kind ────────────────────────────────────
-        let nodes_by_kind = {
-            let sql = format!(
-                "SELECT kind, COUNT(*) as c FROM nodes WHERE 1=1 {} GROUP BY kind",
-                tf
-            );
-            let mut stmt = conn.prepare_cached(&sql)
-                .map_err(|e| napi::Error::from_reason(format!("get_graph_stats nodes_by_kind: {e}")))?;
-            let rows = stmt.query_map([], |row| {
-                Ok(KindCount {
-                    kind: row.get::<_, String>(0)?,
-                    count: row.get::<_, i32>(1)?,
-                })
-            }).map_err(|e| napi::Error::from_reason(format!("get_graph_stats nodes_by_kind query: {e}")))?;
-            rows.collect::<Result<Vec<_>, _>>()
-                .map_err(|e| napi::Error::from_reason(format!("get_graph_stats nodes_by_kind collect: {e}")))?
-        };
+        let nodes_by_kind = fetch_nodes_by_kind(conn, &tf)?;
         let total_nodes: i32 = nodes_by_kind.iter().map(|k| k.count).sum();
 
-        // ── Edge counts by kind ────────────────────────────────────
-        let edges_by_kind = {
-            let sql = if no_tests {
-                format!(
-                    "SELECT e.kind, COUNT(*) as c FROM edges e \
-                     JOIN nodes ns ON e.source_id = ns.id \
-                     JOIN nodes nt ON e.target_id = nt.id \
-                     WHERE 1=1 {} {} GROUP BY e.kind",
-                    test_filter_clauses("ns.file"),
-                    test_filter_clauses("nt.file"),
-                )
-            } else {
-                "SELECT kind, COUNT(*) as c FROM edges GROUP BY kind".to_string()
-            };
-            let mut stmt = conn.prepare_cached(&sql)
-                .map_err(|e| napi::Error::from_reason(format!("get_graph_stats edges_by_kind: {e}")))?;
-            let rows = stmt.query_map([], |row| {
-                Ok(KindCount {
-                    kind: row.get::<_, String>(0)?,
-                    count: row.get::<_, i32>(1)?,
-                })
-            }).map_err(|e| napi::Error::from_reason(format!("get_graph_stats edges_by_kind query: {e}")))?;
-            rows.collect::<Result<Vec<_>, _>>()
-                .map_err(|e| napi::Error::from_reason(format!("get_graph_stats edges_by_kind collect: {e}")))?
-        };
+        let edges_by_kind = fetch_edges_by_kind(conn, no_tests)?;
         let total_edges: i32 = edges_by_kind.iter().map(|k| k.count).sum();
 
-        // ── Role counts ────────────────────────────────────────────
-        let role_counts = {
-            let sql = format!(
-                "SELECT role, COUNT(*) as c FROM nodes WHERE role IS NOT NULL {} GROUP BY role",
-                tf
-            );
-            let mut stmt = conn.prepare_cached(&sql)
-                .map_err(|e| napi::Error::from_reason(format!("get_graph_stats role_counts: {e}")))?;
-            let rows = stmt.query_map([], |row| {
-                Ok(RoleCount {
-                    role: row.get::<_, String>(0)?,
-                    count: row.get::<_, i32>(1)?,
-                })
-            }).map_err(|e| napi::Error::from_reason(format!("get_graph_stats role_counts query: {e}")))?;
-            rows.collect::<Result<Vec<_>, _>>()
-                .map_err(|e| napi::Error::from_reason(format!("get_graph_stats role_counts collect: {e}")))?
-        };
-
-        // ── Quality metrics ────────────────────────────────────────
-        let callable_total: i32 = {
-            let sql = format!(
-                "SELECT COUNT(*) FROM nodes WHERE kind IN ('function', 'method') {}",
-                tf
-            );
-            conn.prepare_cached(&sql)
-                .map_err(|e| napi::Error::from_reason(format!("get_graph_stats callable_total: {e}")))?
-                .query_row([], |row| row.get(0))
-                .map_err(|e| napi::Error::from_reason(format!("get_graph_stats callable_total query: {e}")))?
-        };
-        let callable_with_callers: i32 = {
-            let sql = format!(
-                "SELECT COUNT(DISTINCT e.target_id) FROM edges e \
-                 JOIN nodes n ON e.target_id = n.id \
-                 WHERE e.kind = 'calls' AND n.kind IN ('function', 'method') {}",
-                tf_n
-            );
-            conn.prepare_cached(&sql)
-                .map_err(|e| napi::Error::from_reason(format!("get_graph_stats callable_with_callers: {e}")))?
-                .query_row([], |row| row.get(0))
-                .map_err(|e| napi::Error::from_reason(format!("get_graph_stats callable_with_callers query: {e}")))?
-        };
-        let call_edges: i32 = conn
-            .prepare_cached("SELECT COUNT(*) FROM edges WHERE kind = 'calls'")
-            .map_err(|e| napi::Error::from_reason(format!("get_graph_stats call_edges: {e}")))?
-            .query_row([], |row| row.get(0))
-            .map_err(|e| napi::Error::from_reason(format!("get_graph_stats call_edges query: {e}")))?;
-        let high_conf_call_edges: i32 = conn
-            .prepare_cached("SELECT COUNT(*) FROM edges WHERE kind = 'calls' AND confidence >= 0.7")
-            .map_err(|e| napi::Error::from_reason(format!("get_graph_stats high_conf: {e}")))?
-            .query_row([], |row| row.get(0))
-            .map_err(|e| napi::Error::from_reason(format!("get_graph_stats high_conf query: {e}")))?;
-
-        // ── Hotspots (top 5 files by coupling) ─────────────────────
-        let hotspots = {
-            let sql = format!(
-                "SELECT n.file, \
-                 (SELECT COUNT(*) FROM edges WHERE target_id = n.id) as fan_in, \
-                 (SELECT COUNT(*) FROM edges WHERE source_id = n.id) as fan_out \
-                 FROM nodes n WHERE n.kind = 'file' {} \
-                 ORDER BY (SELECT COUNT(*) FROM edges WHERE target_id = n.id) \
-                        + (SELECT COUNT(*) FROM edges WHERE source_id = n.id) DESC \
-                 LIMIT 5",
-                tf_n
-            );
-            let mut stmt = conn.prepare_cached(&sql)
-                .map_err(|e| napi::Error::from_reason(format!("get_graph_stats hotspots: {e}")))?;
-            let rows = stmt.query_map([], |row| {
-                Ok(FileHotspot {
-                    file: row.get(0)?,
-                    fan_in: row.get(1)?,
-                    fan_out: row.get(2)?,
-                })
-            }).map_err(|e| napi::Error::from_reason(format!("get_graph_stats hotspots query: {e}")))?;
-            rows.collect::<Result<Vec<_>, _>>()
-                .map_err(|e| napi::Error::from_reason(format!("get_graph_stats hotspots collect: {e}")))?
-        };
-
-        // ── Complexity summary ─────────────────────────────────────
-        let complexity = if has_table(conn, "function_complexity") {
-            let sql = format!(
-                "SELECT fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.maintainability_index \
-                 FROM function_complexity fc JOIN nodes n ON fc.node_id = n.id \
-                 WHERE n.kind IN ('function','method') {}",
-                tf_n
-            );
-            let mut stmt = conn.prepare_cached(&sql)
-                .map_err(|e| napi::Error::from_reason(format!("get_graph_stats complexity: {e}")))?;
-            let rows = stmt.query_map([], |row| {
-                Ok((
-                    row.get::<_, i32>(0)?,
-                    row.get::<_, i32>(1)?,
-                    row.get::<_, i32>(2)?,
-                    row.get::<_, f64>(3).unwrap_or(0.0),
-                ))
-            }).map_err(|e| napi::Error::from_reason(format!("get_graph_stats complexity query: {e}")))?;
-            let data: Vec<(i32, i32, i32, f64)> = rows
-                .collect::<Result<Vec<_>, _>>()
-                .map_err(|e| napi::Error::from_reason(format!("get_graph_stats complexity collect: {e}")))?;
-            if data.is_empty() {
-                None
-            } else {
-                let n = data.len() as f64;
-                let sum_cog: i32 = data.iter().map(|d| d.0).sum();
-                let sum_cyc: i32 = data.iter().map(|d| d.1).sum();
-                let max_cog = data.iter().map(|d| d.0).max().unwrap_or(0);
-                let max_cyc = data.iter().map(|d| d.1).max().unwrap_or(0);
-                let sum_mi: f64 = data.iter().map(|d| d.3).sum();
-                let min_mi = data.iter().map(|d| d.3).fold(f64::INFINITY, f64::min);
-                Some(ComplexitySummary {
-                    analyzed: data.len() as i32,
-                    avg_cognitive: (sum_cog as f64 / n * 10.0).round() / 10.0,
-                    avg_cyclomatic: (sum_cyc as f64 / n * 10.0).round() / 10.0,
-                    max_cognitive: max_cog,
-                    max_cyclomatic: max_cyc,
-                    avg_mi: (sum_mi / n * 10.0).round() / 10.0,
-                    min_mi: (min_mi * 10.0).round() / 10.0,
-                })
-            }
-        } else {
-            None
-        };
-
-        // ── Embeddings info ────────────────────────────────────────
-        let embeddings = if has_table(conn, "embeddings") {
-            let count: i32 = conn
-                .prepare_cached("SELECT COUNT(*) FROM embeddings")
-                .map_err(|e| napi::Error::from_reason(format!("get_graph_stats embeddings: {e}")))?
-                .query_row([], |row| row.get(0))
-                .unwrap_or(0);
-            if count > 0 && has_table(conn, "embedding_meta") {
-                let mut model: Option<String> = None;
-                let mut dim: Option<i32> = None;
-                let mut built_at: Option<String> = None;
-                let mut stmt = conn
-                    .prepare_cached("SELECT key, value FROM embedding_meta")
-                    .map_err(|e| napi::Error::from_reason(format!("get_graph_stats embedding_meta: {e}")))?;
-                let rows = stmt.query_map([], |row| {
-                    Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
-                }).map_err(|e| napi::Error::from_reason(format!("get_graph_stats embedding_meta query: {e}")))?;
-                for row in rows {
-                    if let Ok((k, v)) = row {
-                        match k.as_str() {
-                            "model" => model = Some(v),
-                            "dim" => dim = v.parse().ok(),
-                            "built_at" => built_at = Some(v),
-                            _ => {}
-                        }
-                    }
-                }
-                Some(EmbeddingInfo { count, model, dim, built_at })
-            } else if count > 0 {
-                Some(EmbeddingInfo { count, model: None, dim: None, built_at: None })
-            } else {
-                None
-            }
-        } else {
-            None
-        };
+        let role_counts = fetch_role_counts(conn, &tf)?;
+        let quality = fetch_quality_metrics(conn, &tf, &tf_n)?;
+        let hotspots = fetch_file_hotspots(conn, &tf_n)?;
+        let complexity = fetch_complexity_summary(conn, &tf_n)?;
+        let embeddings = fetch_embedding_info(conn)?;
 
         Ok(GraphStats {
             total_nodes,
@@ -1515,12 +1925,7 @@ impl NativeDatabase {
             nodes_by_kind,
             edges_by_kind,
             role_counts,
-            quality: QualityMetrics {
-                callable_total,
-                callable_with_callers,
-                call_edges,
-                high_conf_call_edges,
-            },
+            quality,
             hotspots,
             complexity,
             embeddings,
@@ -1715,284 +2120,80 @@ impl NativeDatabase {
         let lower_query = name.to_lowercase();
 
         // ── Step 1: Find matching nodes with fan-in (relevance ranking) ───
-        let default_kinds = vec![
-            "function".to_string(),
-            "method".to_string(),
-            "class".to_string(),
-            "constant".to_string(),
-        ];
-        let kinds = if let Some(ref k) = kind {
-            vec![k.clone()]
-        } else {
-            default_kinds
-        };
-
-        let mut sql = String::from(
-            "SELECT n.id, n.name, n.kind, n.file, n.line, n.end_line, n.role, \
-             COALESCE(fi.cnt, 0) AS fan_in \
-             FROM nodes n \
-             LEFT JOIN (SELECT target_id, COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id) fi \
-             ON fi.target_id = n.id \
-             WHERE n.name LIKE ?1",
-        );
-        let mut param_values: Vec<Box<dyn rusqlite::types::ToSql>> =
-            vec![Box::new(format!("%{name}%"))];
-        let mut idx = 2;
-
-        if !kinds.is_empty() {
-            let placeholders: Vec<String> =
-                kinds.iter().enumerate().map(|(i, _)| format!("?{}", idx + i)).collect();
-            sql.push_str(&format!(" AND n.kind IN ({})", placeholders.join(", ")));
-            for k in &kinds {
-                param_values.push(Box::new(k.clone()));
-            }
-            idx += kinds.len();
-        }
-        if let Some(ref f) = file {
-            sql.push_str(&format!(" AND n.file LIKE ?{idx} ESCAPE '\\'"));
-            param_values.push(Box::new(format!("%{}%", escape_like(f))));
-        }
-
+        let (sql, param_values) =
+            build_fn_deps_match_query(&name, kind.as_deref(), file.as_deref());
         let params_ref: Vec<&dyn rusqlite::types::ToSql> =
             param_values.iter().map(|p| p.as_ref()).collect();
 
-        struct MatchedNode {
-            id: i32,
-            name: String,
-            kind: String,
-            file: String,
-            line: Option<i32>,
-            end_line: Option<i32>,
-            role: Option<String>,
-            fan_in: i32,
-        }
-
-        let mut matched: Vec<MatchedNode> = {
-            let mut stmt = conn.prepare_cached(&sql)
+        let mut matched: Vec<FnDepsMatchedNode> = {
+            let mut stmt = conn
+                .prepare_cached(&sql)
                 .map_err(|e| napi::Error::from_reason(format!("fn_deps find_nodes prepare: {e}")))?;
-            let rows = stmt.query_map(params_ref.as_slice(), |row| {
-                Ok(MatchedNode {
-                    id: row.get("id")?,
-                    name: row.get("name")?,
-                    kind: row.get("kind")?,
-                    file: row.get("file")?,
-                    line: row.get("line")?,
-                    end_line: row.get("end_line")?,
-                    role: row.get("role")?,
-                    fan_in: row.get("fan_in")?,
+            let rows = stmt
+                .query_map(params_ref.as_slice(), |row| {
+                    Ok(FnDepsMatchedNode {
+                        id: row.get("id")?,
+                        name: row.get("name")?,
+                        kind: row.get("kind")?,
+                        file: row.get("file")?,
+                        line: row.get("line")?,
+                        end_line: row.get("end_line")?,
+                        role: row.get("role")?,
+                        fan_in: row.get("fan_in")?,
+                    })
                 })
-            }).map_err(|e| napi::Error::from_reason(format!("fn_deps find_nodes: {e}")))?;
+                .map_err(|e| napi::Error::from_reason(format!("fn_deps find_nodes: {e}")))?;
             rows.collect::<Result<Vec<_>, _>>()
                 .map_err(|e| napi::Error::from_reason(format!("fn_deps find_nodes collect: {e}")))?
         };
 
-        // Filter test files
         if no_tests {
             matched.retain(|n| !is_test_file(&n.file));
         }
-
-        // Relevance scoring (mirrors JS findMatchingNodes)
         matched.sort_by(|a, b| {
-            let score = |node: &MatchedNode| -> f64 {
-                let lower_name = node.name.to_lowercase();
-                let bare_name = lower_name.rsplit('.').next().unwrap_or(&lower_name);
-                let match_score = if lower_name == lower_query || bare_name == lower_query {
-                    100.0
-                } else if lower_name.starts_with(&lower_query) || bare_name.starts_with(&lower_query) {
-                    60.0
-                } else if lower_name.contains(&format!(".{lower_query}")) || lower_name.contains(&format!("{lower_query}.")) {
-                    40.0
-                } else {
-                    10.0
-                };
-                let fan_in_bonus = ((node.fan_in as f64 + 1.0).log2() * 5.0).min(25.0);
-                match_score + fan_in_bonus
-            };
-            score(b).partial_cmp(&score(a)).unwrap_or(std::cmp::Ordering::Equal)
+            fn_deps_relevance_score(b, &lower_query)
+                .partial_cmp(&fn_deps_relevance_score(a, &lower_query))
+                .unwrap_or(std::cmp::Ordering::Equal)
         });
 
         // ── Step 2: Build result for each matched node ────────────────────
         let mut file_hash_cache: HashMap<String, Option<String>> = HashMap::new();
-
         let mut results = Vec::with_capacity(matched.len());
-        for node in &matched {
-            // Callees
-            let callees: Vec<FnDepsNode> = {
-                let mut stmt = conn.prepare_cached(
-                    "SELECT DISTINCT n.id, n.name, n.kind, n.file, n.line \
-                     FROM edges e JOIN nodes n ON e.target_id = n.id \
-                     WHERE e.source_id = ?1 AND e.kind = 'calls'"
-                ).map_err(|e| napi::Error::from_reason(format!("fn_deps callees prepare: {e}")))?;
-                let rows = stmt.query_map(params![node.id], |row| {
-                    Ok(FnDepsNode {
-                        name: row.get("name")?,
-                        kind: row.get("kind")?,
-                        file: row.get("file")?,
-                        line: row.get("line")?,
-                    })
-                }).map_err(|e| napi::Error::from_reason(format!("fn_deps callees: {e}")))?;
-                let mut v: Vec<FnDepsNode> = rows.collect::<Result<Vec<_>, _>>()
-                    .map_err(|e| napi::Error::from_reason(format!("fn_deps callees collect: {e}")))?;
-                if no_tests {
-                    v.retain(|c| !is_test_file(&c.file));
-                }
-                v
-            };
 
-            // Callers (direct) — query includes `id` for BFS reuse
-            struct CallerWithId { id: i32, name: String, kind: String, file: String, line: Option<i32>, via_hierarchy: Option<String> }
-            let mut callers_with_id: Vec<CallerWithId> = {
-                let mut stmt = conn.prepare_cached(
-                    "SELECT n.id, n.name, n.kind, n.file, n.line \
-                     FROM edges e JOIN nodes n ON e.source_id = n.id \
-                     WHERE e.target_id = ?1 AND e.kind = 'calls'"
-                ).map_err(|e| napi::Error::from_reason(format!("fn_deps callers prepare: {e}")))?;
-                let rows = stmt.query_map(params![node.id], |row| {
-                    Ok(CallerWithId {
-                        id: row.get("id")?,
-                        name: row.get("name")?,
-                        kind: row.get("kind")?,
-                        file: row.get("file")?,
-                        line: row.get("line")?,
-                        via_hierarchy: None,
-                    })
-                }).map_err(|e| napi::Error::from_reason(format!("fn_deps callers: {e}")))?;
-                rows.collect::<Result<Vec<_>, _>>()
-                    .map_err(|e| napi::Error::from_reason(format!("fn_deps callers collect: {e}")))?
-            };
-
-            // Method hierarchy resolution
-            if node.kind == "method" && node.name.contains('.') {
-                if let Some(method_name) = node.name.split('.').last() {
-                    let pattern = format!("%.{method_name}");
-                    let related: Vec<(i32, String)> = {
-                        let mut stmt = conn.prepare_cached(
-                            "SELECT n.id, n.name FROM nodes n \
-                             LEFT JOIN (SELECT target_id, COUNT(*) AS cnt FROM edges WHERE kind = 'calls' GROUP BY target_id) fi \
-                             ON fi.target_id = n.id \
-                             WHERE n.name LIKE ?1 AND n.kind = 'method'"
-                        ).map_err(|e| napi::Error::from_reason(format!("fn_deps hierarchy prepare: {e}")))?;
-                        let rows = stmt.query_map(params![pattern], |row| {
-                            Ok((row.get::<_, i32>("id")?, row.get::<_, String>("name")?))
-                        }).map_err(|e| napi::Error::from_reason(format!("fn_deps hierarchy: {e}")))?;
-                        rows.collect::<Result<Vec<_>, _>>()
-                            .map_err(|e| napi::Error::from_reason(format!("fn_deps hierarchy collect: {e}")))?
-                    };
-                    for (rm_id, rm_name) in &related {
-                        if *rm_id == node.id { continue; }
-                        let mut stmt = conn.prepare_cached(
-                            "SELECT n.id, n.name, n.kind, n.file, n.line \
-                             FROM edges e JOIN nodes n ON e.source_id = n.id \
-                             WHERE e.target_id = ?1 AND e.kind = 'calls'"
-                        ).map_err(|e| napi::Error::from_reason(format!("fn_deps hierarchy callers prepare: {e}")))?;
-                        let rows = stmt.query_map(params![rm_id], |row| {
-                            Ok(CallerWithId {
-                                id: row.get("id")?,
-                                name: row.get("name")?,
-                                kind: row.get("kind")?,
-                                file: row.get("file")?,
-                                line: row.get("line")?,
-                                via_hierarchy: Some(rm_name.clone()),
-                            })
-                        }).map_err(|e| napi::Error::from_reason(format!("fn_deps hierarchy callers: {e}")))?;
-                        let extra: Vec<CallerWithId> = rows.collect::<Result<Vec<_>, _>>()
-                            .map_err(|e| napi::Error::from_reason(format!("fn_deps hierarchy callers collect: {e}")))?;
-                        callers_with_id.extend(extra);
-                    }
-                }
-            }
+        for node in &matched {
+            let callees = fetch_fn_deps_callees(conn, node.id, no_tests)?;
+            let mut callers_with_id = fetch_fn_deps_direct_callers(conn, node.id)?;
+            expand_method_hierarchy_callers(conn, node, &mut callers_with_id)?;
             if no_tests {
                 callers_with_id.retain(|c| !is_test_file(&c.file));
             }
 
-            // Convert to FnDepsCallerNode for output (strip id)
-            let callers: Vec<FnDepsCallerNode> = callers_with_id.iter().map(|c| FnDepsCallerNode {
-                name: c.name.clone(),
-                kind: c.kind.clone(),
-                file: c.file.clone(),
-                line: c.line,
-                via_hierarchy: c.via_hierarchy.clone(),
-            }).collect();
-
-            // BFS transitive callers — reuse callers_with_id as initial frontier
-            let transitive_callers = if depth > 1 {
-                let mut visited = HashSet::new();
-                visited.insert(node.id);
-                let initial_frontier: Vec<CallerWithId> = callers_with_id.iter().map(|c| CallerWithId {
-                    id: c.id, name: c.name.clone(), kind: c.kind.clone(), file: c.file.clone(), line: c.line, via_hierarchy: c.via_hierarchy.clone(),
-                }).collect();
-                let mut frontier: Vec<CallerWithId> = initial_frontier;
-                let mut groups: Vec<FnDepsTransitiveGroup> = Vec::new();
-
-                for d in 2..=depth {
-                    let unvisited: Vec<&CallerWithId> = frontier.iter()
-                        .filter(|f| !visited.contains(&f.id))
-                        .collect();
-                    for f in &unvisited {
-                        visited.insert(f.id);
-                    }
-                    if unvisited.is_empty() { break; }
-
-                    // Batch query: find all callers of the unvisited frontier
-                    let mut next_frontier: Vec<CallerWithId> = Vec::new();
-                    let mut next_ids = HashSet::new();
-                    for f in &unvisited {
-                        let mut stmt = conn.prepare_cached(
-                            "SELECT n.id, n.name, n.kind, n.file, n.line \
-                             FROM edges e JOIN nodes n ON e.source_id = n.id \
-                             WHERE e.target_id = ?1 AND e.kind = 'calls'"
-                        ).map_err(|e| napi::Error::from_reason(format!("fn_deps bfs prepare: {e}")))?;
-                        let rows = stmt.query_map(params![f.id], |row| {
-                            Ok(CallerWithId {
-                                id: row.get("id")?,
-                                name: row.get("name")?,
-                                kind: row.get("kind")?,
-                                file: row.get("file")?,
-                                line: row.get("line")?,
-                                via_hierarchy: None,
-                            })
-                        }).map_err(|e| napi::Error::from_reason(format!("fn_deps bfs: {e}")))?;
-                        let upstream: Vec<CallerWithId> = rows.collect::<Result<Vec<_>, _>>()
-                            .map_err(|e| napi::Error::from_reason(format!("fn_deps bfs collect: {e}")))?;
-                        for u in upstream {
-                            if no_tests && is_test_file(&u.file) { continue; }
-                            if !visited.contains(&u.id) && !next_ids.contains(&u.id) {
-                                next_ids.insert(u.id);
-                                next_frontier.push(u);
-                            }
-                        }
-                    }
-
-                    if !next_frontier.is_empty() {
-                        groups.push(FnDepsTransitiveGroup {
-                            depth: d as i32,
-                            callers: next_frontier.iter().map(|n| FnDepsNode {
-                                name: n.name.clone(),
-                                kind: n.kind.clone(),
-                                file: n.file.clone(),
-                                line: n.line,
-                            }).collect(),
-                        });
-                    }
-                    frontier = next_frontier;
-                }
-                groups
-            } else {
-                Vec::new()
-            };
-
-            // File hash (cached)
-            let file_hash = if !file_hash_cache.contains_key(&node.file) {
-                let hash: Option<String> = conn.prepare_cached(
-                    "SELECT hash FROM file_hashes WHERE file = ?1"
-                ).ok().and_then(|mut stmt| {
-                    stmt.query_row(params![node.file], |row| row.get(0)).ok()
-                });
-                file_hash_cache.insert(node.file.clone(), hash.clone());
-                hash
-            } else {
-                file_hash_cache.get(&node.file).cloned().flatten()
-            };
+            let callers: Vec<FnDepsCallerNode> = callers_with_id
+                .iter()
+                .map(|c| FnDepsCallerNode {
+                    name: c.name.clone(),
+                    kind: c.kind.clone(),
+                    file: c.file.clone(),
+                    line: c.line,
+                    via_hierarchy: c.via_hierarchy.clone(),
+                })
+                .collect();
+
+            let initial_frontier: Vec<FnDepsCallerWithId> = callers_with_id
+                .iter()
+                .map(|c| FnDepsCallerWithId {
+                    id: c.id,
+                    name: c.name.clone(),
+                    kind: c.kind.clone(),
+                    file: c.file.clone(),
+                    line: c.line,
+                    via_hierarchy: c.via_hierarchy.clone(),
+                })
+                .collect();
+            let transitive_callers =
+                bfs_transitive_callers(conn, node.id, initial_frontier, depth, no_tests)?;
+
+            let file_hash = fn_deps_cached_file_hash(conn, &mut file_hash_cache, &node.file);
 
             results.push(FnDepsEntry {
                 name: node.name.clone(),
diff --git a/crates/codegraph-core/src/structure.rs b/crates/codegraph-core/src/structure.rs
index ce5609640..b34307a8f 100644
--- a/crates/codegraph-core/src/structure.rs
+++ b/crates/codegraph-core/src/structure.rs
@@ -401,6 +401,111 @@ fn load_file_paths_in_dirs(conn: &Connection, dirs: &HashSet<String>) -> Vec<Str
     result
 }
 
+/// Emit `directory → file` contains edges for every (deduplicated) file in
+/// the union of `file_symbols` and any DB-loaded paths under affected
+/// directories. The transaction-scoped `stmt` must INSERT into edges with
+/// kind='contains'.
+fn insert_dir_to_file_contains_edges(
+    tx: &rusqlite::Transaction,
+    stmt: &mut rusqlite::Statement,
+    file_symbols: &HashMap<String, FileSymbols>,
+    all_file_paths: &[String],
+    affected_dirs: Option<&HashSet<String>>,
+) {
+    let mut seen_files: HashSet<String> = HashSet::new();
+    let file_paths_iter = file_symbols
+        .keys()
+        .map(|s| s.as_str())
+        .chain(all_file_paths.iter().map(|s| s.as_str()));
+
+    for rel_path in file_paths_iter {
+        if !seen_files.insert(rel_path.to_string()) {
+            continue;
+        }
+        let dir = match parent_dir(rel_path) {
+            Some(d) => d,
+            None => continue,
+        };
+        if let Some(ad) = affected_dirs {
+            if !ad.contains(&dir) {
+                continue;
+            }
+        }
+        let dir_id = match get_node_id(tx, &dir, "directory", &dir, 0) {
+            Some(id) => id,
+            None => continue,
+        };
+        let file_id = match get_node_id(tx, rel_path, "file", rel_path, 0) {
+            Some(id) => id,
+            None => continue,
+        };
+        let _ = stmt.execute(rusqlite::params![dir_id, file_id]);
+    }
+}
+
+/// Emit `parent_dir → child_dir` contains edges for every entry in
+/// `all_dirs` whose parent is in scope.
+fn insert_dir_to_dir_contains_edges(
+    tx: &rusqlite::Transaction,
+    stmt: &mut rusqlite::Statement,
+    all_dirs: &HashSet<String>,
+    affected_dirs: Option<&HashSet<String>>,
+) {
+    for dir in all_dirs {
+        let parent = match parent_dir(dir) {
+            Some(p) => p,
+            None => continue,
+        };
+        if parent == *dir {
+            continue;
+        }
+        if let Some(ad) = affected_dirs {
+            if !ad.contains(&parent) {
+                continue;
+            }
+        }
+        let parent_id = match get_node_id(tx, &parent, "directory", &parent, 0) {
+            Some(id) => id,
+            None => continue,
+        };
+        let child_id = match get_node_id(tx, dir, "directory", dir, 0) {
+            Some(id) => id,
+            None => continue,
+        };
+        let _ = stmt.execute(rusqlite::params![parent_id, child_id]);
+    }
+}
+
+/// Restore `parent → child` directory contains edges that were dropped by
+/// cleanup for sibling subdirectories that aren't in `all_dirs` (no changed
+/// file under them) but still exist in the DB.
+fn restore_unchanged_dir_edges(
+    tx: &rusqlite::Transaction,
+    stmt: &mut rusqlite::Statement,
+    all_dirs: &HashSet<String>,
+    affected_dirs: &HashSet<String>,
+) {
+    let db_child_dirs = load_child_dirs_in_affected(tx, affected_dirs);
+    for child_dir in &db_child_dirs {
+        if all_dirs.contains(child_dir.as_str()) {
+            continue;
+        }
+        let parent = match parent_dir(child_dir) {
+            Some(p) => p,
+            None => continue,
+        };
+        if !affected_dirs.contains(&parent) {
+            continue;
+        }
+        if let (Some(p_id), Some(c_id)) = (
+            get_node_id(tx, &parent, "directory", &parent, 0),
+            get_node_id(tx, child_dir, "directory", child_dir, 0),
+        ) {
+            let _ = stmt.execute(rusqlite::params![p_id, c_id]);
+        }
+    }
+}
+
 fn insert_contains_edges(
     conn: &Connection,
     file_symbols: &HashMap<String, FileSymbols>,
@@ -422,96 +527,23 @@ fn insert_contains_edges(
             Err(_) => return,
         };
 
-        // In incremental mode, we need ALL file paths in affected directories,
-        // not just the changed files in file_symbols. Load existing file nodes
-        // from the DB so unchanged files keep their dir→file containment edges.
-        let all_file_paths: Vec<String> = if affected_dirs.is_some() {
-            load_file_paths_in_dirs(&tx, affected_dirs.as_ref().unwrap())
+        let all_file_paths: Vec<String> = if let Some(ref ad) = affected_dirs {
+            load_file_paths_in_dirs(&tx, ad)
         } else {
             Vec::new()
         };
 
-        // Directory → file edges: iterate over file_symbols keys (covers
-        // changed/parsed files) plus DB-loaded paths (covers unchanged files
-        // in affected directories during incremental builds).
-        let mut seen_files: HashSet<String> = HashSet::new();
-        let file_paths_iter = file_symbols
-            .keys()
-            .map(|s| s.as_str())
-            .chain(all_file_paths.iter().map(|s| s.as_str()));
-
-        for rel_path in file_paths_iter {
-            if !seen_files.insert(rel_path.to_string()) {
-                continue; // deduplicate
-            }
-            let dir = match parent_dir(rel_path) {
-                Some(d) => d,
-                None => continue,
-            };
-            // Skip unaffected directories in incremental mode
-            if let Some(ref ad) = affected_dirs {
-                if !ad.contains(&dir) {
-                    continue;
-                }
-            }
-            let dir_id = match get_node_id(&tx, &dir, "directory", &dir, 0) {
-                Some(id) => id,
-                None => continue,
-            };
-            let file_id = match get_node_id(&tx, rel_path, "file", rel_path, 0) {
-                Some(id) => id,
-                None => continue,
-            };
-            let _ = stmt.execute(rusqlite::params![dir_id, file_id]);
-        }
-
-        // Parent directory → child directory edges
-        for dir in all_dirs {
-            let parent = match parent_dir(dir) {
-                Some(p) => p,
-                None => continue,
-            };
-            if parent == *dir {
-                continue;
-            }
-            if let Some(ref ad) = affected_dirs {
-                if !ad.contains(&parent) {
-                    continue;
-                }
-            }
-            let parent_id = match get_node_id(&tx, &parent, "directory", &parent, 0) {
-                Some(id) => id,
-                None => continue,
-            };
-            let child_id = match get_node_id(&tx, dir, "directory", dir, 0) {
-                Some(id) => id,
-                None => continue,
-            };
-            let _ = stmt.execute(rusqlite::params![parent_id, child_id]);
-        }
+        insert_dir_to_file_contains_edges(
+            &tx,
+            &mut stmt,
+            file_symbols,
+            &all_file_paths,
+            affected_dirs.as_ref(),
+        );
+        insert_dir_to_dir_contains_edges(&tx, &mut stmt, all_dirs, affected_dirs.as_ref());
 
-        // Restore dir→dir edges for unchanged sibling subdirectories that
-        // were cleaned up but aren't in all_dirs (no changed file under them).
         if let Some(ref ad) = affected_dirs {
-            let db_child_dirs = load_child_dirs_in_affected(&tx, ad);
-            for child_dir in &db_child_dirs {
-                if all_dirs.contains(child_dir.as_str()) {
-                    continue; // already handled above
-                }
-                let parent = match parent_dir(child_dir) {
-                    Some(p) => p,
-                    None => continue,
-                };
-                if !ad.contains(&parent) {
-                    continue;
-                }
-                if let (Some(p_id), Some(c_id)) = (
-                    get_node_id(&tx, &parent, "directory", &parent, 0),
-                    get_node_id(&tx, child_dir, "directory", child_dir, 0),
-                ) {
-                    let _ = stmt.execute(rusqlite::params![p_id, c_id]);
-                }
-            }
+            restore_unchanged_dir_edges(&tx, &mut stmt, all_dirs, ad);
         }
     }
     let _ = tx.commit();
@@ -646,97 +678,96 @@ fn compute_file_metrics(
     let _ = tx.commit();
 }
 
-fn compute_directory_metrics(
-    conn: &Connection,
-    file_symbols: &HashMap<String, FileSymbols>,
-    all_dirs: &HashSet<String>,
-    import_edges: &[ImportEdge],
-) {
-    // Load ALL file paths from DB so directory metrics account for unchanged
-    // files during incremental builds (file_symbols only has changed files).
-    let all_db_files: Vec<String> = {
-        let mut v = Vec::new();
-        if let Ok(mut stmt) = conn.prepare("SELECT name FROM nodes WHERE kind = 'file'") {
-            if let Ok(rows) = stmt.query_map([], |row| row.get::<_, String>(0)) {
-                for row in rows.flatten() {
-                    v.push(row);
-                }
+/// Load every file path stored as a `kind='file'` node in the DB.
+fn load_all_file_paths_from_db(conn: &Connection) -> Vec<String> {
+    let mut v = Vec::new();
+    if let Ok(mut stmt) = conn.prepare("SELECT name FROM nodes WHERE kind = 'file'") {
+        if let Ok(rows) = stmt.query_map([], |row| row.get::<_, String>(0)) {
+            for row in rows.flatten() {
+                v.push(row);
             }
         }
-        v
+    }
+    v
+}
+
+/// Walk a relative file path up through its ancestor directories, pushing
+/// the file's path slice into each ancestor's bucket in `dir_files`.
+fn record_file_in_ancestor_dirs<'a>(
+    rel_path: &'a str,
+    dir_files: &mut HashMap<&'a str, Vec<&'a str>>,
+) {
+    let mut d = match parent_dir(rel_path) {
+        Some(p) => p,
+        None => return,
     };
+    while !d.is_empty() && d != "." {
+        if let Some(files) = dir_files.get_mut(d.as_str()) {
+            files.push(rel_path);
+        }
+        d = match parent_dir(&d) {
+            Some(p) => p,
+            None => break,
+        };
+    }
+}
 
-    // Build dir→files map (transitive: each dir contains all files in all subdirs).
-    // Uses DB files as the complete set, supplemented by file_symbols for any
-    // files not yet in the DB (full build where nodes were just inserted).
+/// Build the `dir → ancestor files` map. DB files are the authoritative set
+/// for incremental builds; `file_symbols` adds anything newly-inserted that
+/// hasn't yet shown up via the DB query (full-build first run).
+fn build_dir_files_map<'a>(
+    all_dirs: &'a HashSet<String>,
+    all_db_files: &'a [String],
+    file_symbols: &'a HashMap<String, FileSymbols>,
+) -> HashMap<&'a str, Vec<&'a str>> {
     let mut dir_files: HashMap<&str, Vec<&str>> = HashMap::new();
     for dir in all_dirs {
         dir_files.insert(dir.as_str(), Vec::new());
     }
     let mut seen_files: HashSet<&str> = HashSet::new();
-    // First: DB files (complete set for incremental builds)
-    for rel_path in &all_db_files {
-        if !seen_files.insert(rel_path.as_str()) {
-            continue;
-        }
-        let mut d = match parent_dir(rel_path) {
-            Some(p) => p,
-            None => continue,
-        };
-        while !d.is_empty() && d != "." {
-            if let Some(files) = dir_files.get_mut(d.as_str()) {
-                files.push(rel_path.as_str());
-            }
-            d = match parent_dir(&d) {
-                Some(p) => p,
-                None => break,
-            };
+    for rel_path in all_db_files {
+        if seen_files.insert(rel_path.as_str()) {
+            record_file_in_ancestor_dirs(rel_path.as_str(), &mut dir_files);
         }
     }
-    // Second: file_symbols keys (covers newly-inserted files in full builds)
     for rel_path in file_symbols.keys() {
-        if !seen_files.insert(rel_path.as_str()) {
-            continue;
-        }
-        let mut d = match parent_dir(rel_path) {
-            Some(p) => p,
-            None => continue,
-        };
-        while !d.is_empty() && d != "." {
-            if let Some(files) = dir_files.get_mut(d.as_str()) {
-                files.push(rel_path.as_str());
-            }
-            d = match parent_dir(&d) {
-                Some(p) => p,
-                None => break,
-            };
+        if seen_files.insert(rel_path.as_str()) {
+            record_file_in_ancestor_dirs(rel_path.as_str(), &mut dir_files);
         }
     }
+    dir_files
+}
 
-    // Build reverse map: file → set of ancestor directories
+/// Invert `dir_files` to a `file → ancestor dirs` map.
+fn build_file_to_ancestor_dirs<'a>(
+    dir_files: &'a HashMap<&'a str, Vec<&'a str>>,
+) -> HashMap<&'a str, HashSet<&'a str>> {
     let mut file_to_ancestor_dirs: HashMap<&str, HashSet<&str>> = HashMap::new();
-    for (dir, files) in &dir_files {
+    for (dir, files) in dir_files {
         for f in files {
-            file_to_ancestor_dirs
-                .entry(f)
-                .or_default()
-                .insert(dir);
+            file_to_ancestor_dirs.entry(*f).or_default().insert(*dir);
         }
     }
+    file_to_ancestor_dirs
+}
 
-    // Count intra-directory, fan-in, and fan-out edges per directory
-    let mut dir_edge_counts: HashMap<&str, (i64, i64, i64)> = HashMap::new(); // (intra, fan_in, fan_out)
+/// Tally intra-directory, fan-in, and fan-out edge counts per directory by
+/// classifying each import edge against the ancestor sets of its endpoints.
+fn count_directory_edges<'a>(
+    all_dirs: &'a HashSet<String>,
+    file_to_ancestor_dirs: &HashMap<&'a str, HashSet<&'a str>>,
+    import_edges: &[ImportEdge],
+) -> HashMap<&'a str, (i64, i64, i64)> {
+    let mut dir_edge_counts: HashMap<&str, (i64, i64, i64)> = HashMap::new();
     for dir in all_dirs {
         dir_edge_counts.insert(dir.as_str(), (0, 0, 0));
     }
     for edge in import_edges {
         let src_dirs = file_to_ancestor_dirs.get(edge.source_file.as_str());
         let tgt_dirs = file_to_ancestor_dirs.get(edge.target_file.as_str());
-
         if src_dirs.is_none() && tgt_dirs.is_none() {
             continue;
         }
-
         if let Some(src_dirs) = src_dirs {
             for dir in src_dirs {
                 if let Some(counts) = dir_edge_counts.get_mut(dir) {
@@ -758,10 +789,11 @@ fn compute_directory_metrics(
             }
         }
     }
+    dir_edge_counts
+}
 
-    // Count symbols per directory.
-    // Use DB counts (covers all files including unchanged ones in incremental
-    // builds) and fall back to file_symbols for newly-inserted files.
+/// Load per-file symbol counts from the DB (one query per build).
+fn load_db_symbol_counts(conn: &Connection) -> HashMap<String, i64> {
     let mut db_symbol_counts: HashMap<String, i64> = HashMap::new();
     if let Ok(mut stmt) = conn.prepare(
         "SELECT file, COUNT(*) FROM nodes \
@@ -776,26 +808,53 @@ fn compute_directory_metrics(
             }
         }
     }
+    db_symbol_counts
+}
+
+/// Count distinct definitions in `file_symbols` for a single newly-inserted
+/// file (used as a fallback when DB counts haven't been written yet).
+fn count_distinct_definitions(sym: &FileSymbols) -> i64 {
+    let mut seen = HashSet::new();
+    let mut count: i64 = 0;
+    for d in &sym.definitions {
+        let key = format!("{}|{}|{}", d.name, d.kind, d.line);
+        if seen.insert(key) {
+            count += 1;
+        }
+    }
+    count
+}
+
+/// Compute per-directory symbol counts by summing DB counts for every file
+/// under the directory, falling back to in-memory `file_symbols` for any
+/// files not yet persisted.
+fn compute_dir_symbol_counts<'a>(
+    dir_files: &HashMap<&'a str, Vec<&'a str>>,
+    db_symbol_counts: &HashMap<String, i64>,
+    file_symbols: &HashMap<String, FileSymbols>,
+) -> HashMap<&'a str, i64> {
     let mut dir_symbol_counts: HashMap<&str, i64> = HashMap::new();
-    for (dir, files) in &dir_files {
+    for (dir, files) in dir_files {
         let mut count: i64 = 0;
         for f in files {
             if let Some(&c) = db_symbol_counts.get(*f) {
                 count += c;
             } else if let Some(sym) = file_symbols.get(*f) {
-                let mut seen = HashSet::new();
-                for d in &sym.definitions {
-                    let key = format!("{}|{}|{}", d.name, d.kind, d.line);
-                    if seen.insert(key) {
-                        count += 1;
-                    }
-                }
+                count += count_distinct_definitions(sym);
             }
         }
-        dir_symbol_counts.insert(dir, count);
+        dir_symbol_counts.insert(*dir, count);
     }
+    dir_symbol_counts
+}
 
-    // Write directory metrics
+/// Write the directory metrics rows produced by the previous helpers.
+fn write_directory_metric_rows(
+    conn: &Connection,
+    dir_files: &HashMap<&str, Vec<&str>>,
+    dir_symbol_counts: &HashMap<&str, i64>,
+    dir_edge_counts: &HashMap<&str, (i64, i64, i64)>,
+) {
     let tx = match conn.unchecked_transaction() {
         Ok(tx) => tx,
         Err(_) => return,
@@ -809,13 +868,11 @@ fn compute_directory_metrics(
             Ok(s) => s,
             Err(_) => return,
         };
-
-        for (dir, files) in &dir_files {
+        for (dir, files) in dir_files {
             let dir_id = match get_node_id(&tx, dir, "directory", dir, 0) {
                 Some(id) => id,
                 None => continue,
             };
-
             let file_count = files.len() as i64;
             let symbol_count = dir_symbol_counts.get(dir).copied().unwrap_or(0);
             let (intra, fan_in, fan_out) = dir_edge_counts.get(dir).copied().unwrap_or((0, 0, 0));
@@ -825,7 +882,6 @@ fn compute_directory_metrics(
             } else {
                 None
             };
-
             let _ = upsert.execute(rusqlite::params![
                 dir_id,
                 symbol_count,
@@ -839,6 +895,25 @@ fn compute_directory_metrics(
     let _ = tx.commit();
 }
 
+fn compute_directory_metrics(
+    conn: &Connection,
+    file_symbols: &HashMap<String, FileSymbols>,
+    all_dirs: &HashSet<String>,
+    import_edges: &[ImportEdge],
+) {
+    // Load ALL file paths from DB so directory metrics account for unchanged
+    // files during incremental builds (file_symbols only has changed files).
+    let all_db_files = load_all_file_paths_from_db(conn);
+    let dir_files = build_dir_files_map(all_dirs, &all_db_files, file_symbols);
+    let file_to_ancestor_dirs = build_file_to_ancestor_dirs(&dir_files);
+    let dir_edge_counts =
+        count_directory_edges(all_dirs, &file_to_ancestor_dirs, import_edges);
+    let db_symbol_counts = load_db_symbol_counts(conn);
+    let dir_symbol_counts =
+        compute_dir_symbol_counts(&dir_files, &db_symbol_counts, file_symbols);
+    write_directory_metric_rows(conn, &dir_files, &dir_symbol_counts, &dir_edge_counts);
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

From d2eab30ebf0b5462f6842d45b41ae5a17a6de513 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 13:24:03 -0600
Subject: [PATCH 15/27] refactor(parser): extract LANGUAGE_REGISTRY iteration
 and worker boundary helpers

---
 src/domain/parser.ts            | 178 ++++++++++-----
 src/domain/wasm-worker-entry.ts | 384 +++++++++++++++++++-------------
 2 files changed, 354 insertions(+), 208 deletions(-)

diff --git a/src/domain/parser.ts b/src/domain/parser.ts
index bb53192c9..38ebc035a 100644
--- a/src/domain/parser.ts
+++ b/src/domain/parser.ts
@@ -322,12 +322,15 @@ export function getParser(parsers: Map<string, Parser | null>, filePath: string)
  * without _tree", which was the source of #1036 — a single file missing one
  * analysis triggered a full-build re-parse of every WASM-parseable file.
  */
-export async function ensureWasmTrees(
+/**
+ * Select files from `fileSymbols` that still need analysis data and are
+ * parseable by an installed WASM grammar. Pure (no I/O) — safe to unit-test.
+ */
+function collectBackfillPending(
   fileSymbols: Map<string, any>,
   rootDir: string,
   needsFn?: (relPath: string, symbols: any) => boolean,
-): Promise<void> {
-  // Collect files that still need analysis data and are parseable by WASM.
+): Array<{ relPath: string; absPath: string; symbols: any }> {
   const pending: Array<{ relPath: string; absPath: string; symbols: any }> = [];
   for (const [relPath, symbols] of fileSymbols) {
     if (symbols._tree) continue; // legacy path — leave existing trees alone
@@ -335,6 +338,15 @@ export async function ensureWasmTrees(
     if (needsFn && !needsFn(relPath, symbols)) continue;
     pending.push({ relPath, absPath: path.join(rootDir, relPath), symbols });
   }
+  return pending;
+}
+
+export async function ensureWasmTrees(
+  fileSymbols: Map<string, any>,
+  rootDir: string,
+  needsFn?: (relPath: string, symbols: any) => boolean,
+): Promise<void> {
+  const pending = collectBackfillPending(fileSymbols, rootDir, needsFn);
   if (pending.length === 0) return;
 
   const pool = getWasmWorkerPool();
@@ -352,30 +364,37 @@ export async function ensureWasmTrees(
   }
 }
 
-/**
- * Merge pre-computed analysis data from a worker result onto existing symbols.
- * Only fills gaps — never overwrites fields the caller already populated.
- * Used to patch native-parsed symbols with worker-produced astNodes / dataflow /
- * per-definition complexity and cfg.
- */
-function mergeAnalysisData(symbols: any, worker: ExtractorOutput): void {
+/** Fill gap-only scalar metadata (`_langId`, `_lineCount`) from the worker output. */
+function mergeScalarMetadata(symbols: any, worker: ExtractorOutput): void {
   if (!symbols._langId && worker._langId) symbols._langId = worker._langId;
   if (!symbols._lineCount && worker._lineCount) symbols._lineCount = worker._lineCount;
+}
+
+/** Fill gap-only analysis arrays (`astNodes`, `dataflow`) from the worker output. */
+function mergeAnalysisArrays(symbols: any, worker: ExtractorOutput): void {
   if (!Array.isArray(symbols.astNodes) && Array.isArray(worker.astNodes)) {
     symbols.astNodes = worker.astNodes;
   }
   if (!symbols.dataflow && worker.dataflow) symbols.dataflow = worker.dataflow;
-  if (worker.typeMap && worker.typeMap.size > 0) {
-    if (!symbols.typeMap || !(symbols.typeMap instanceof Map)) {
-      symbols.typeMap = new Map(worker.typeMap);
-    } else {
-      for (const [k, v] of worker.typeMap) {
-        if (!symbols.typeMap.has(k)) symbols.typeMap.set(k, v);
-      }
-    }
+}
+
+/** Merge worker typeMap into existing symbols.typeMap with first-wins semantics. */
+function mergeTypeMap(symbols: any, worker: ExtractorOutput): void {
+  if (!worker.typeMap || worker.typeMap.size === 0) return;
+  if (!symbols.typeMap || !(symbols.typeMap instanceof Map)) {
+    symbols.typeMap = new Map(worker.typeMap);
+    return;
+  }
+  for (const [k, v] of worker.typeMap) {
+    if (!symbols.typeMap.has(k)) symbols.typeMap.set(k, v);
   }
+}
+
+/** Patch existing definitions with worker complexity/cfg when absent. */
+function mergeDefinitionAnalysis(symbols: any, worker: ExtractorOutput): void {
   const existingDefs: any[] = Array.isArray(symbols.definitions) ? symbols.definitions : [];
   const workerDefs: any[] = Array.isArray(worker.definitions) ? worker.definitions : [];
+  if (existingDefs.length === 0 || workerDefs.length === 0) return;
   // Index existing defs by (kind, name, line) — mirrors engine.ts matching key.
   const byKey = new Map<string, any>();
   for (const d of existingDefs) byKey.set(`${d.kind}|${d.name}|${d.line}`, d);
@@ -389,6 +408,19 @@ function mergeAnalysisData(symbols: any, worker: ExtractorOutput): void {
   }
 }
 
+/**
+ * Merge pre-computed analysis data from a worker result onto existing symbols.
+ * Only fills gaps — never overwrites fields the caller already populated.
+ * Used to patch native-parsed symbols with worker-produced astNodes / dataflow /
+ * per-definition complexity and cfg.
+ */
+function mergeAnalysisData(symbols: any, worker: ExtractorOutput): void {
+  mergeScalarMetadata(symbols, worker);
+  mergeAnalysisArrays(symbols, worker);
+  mergeTypeMap(symbols, worker);
+  mergeDefinitionAnalysis(symbols, worker);
+}
+
 /**
  * Check whether the required WASM grammar files exist on disk.
  */
@@ -603,24 +635,36 @@ function patchDefinitions(definitions: any[]): void {
   }
 }
 
+/**
+ * Field renames applied to each import record to bridge older native binaries
+ * that emit snake_case names. Each `[camel, snake]` pair becomes:
+ *   `if (imp[camel] === undefined) imp[camel] = imp[snake];`
+ * Defined as data so the loop body stays trivially linear in cognitive complexity.
+ */
+const IMPORT_FIELD_RENAMES: ReadonlyArray<readonly [string, string]> = [
+  ['typeOnly', 'type_only'],
+  ['wildcardReexport', 'wildcard_reexport'],
+  ['pythonImport', 'python_import'],
+  ['goImport', 'go_import'],
+  ['rustUse', 'rust_use'],
+  ['javaImport', 'java_import'],
+  ['csharpUsing', 'csharp_using'],
+  ['rubyRequire', 'ruby_require'],
+  ['phpUse', 'php_use'],
+  ['cInclude', 'c_include'],
+  ['kotlinImport', 'kotlin_import'],
+  ['swiftImport', 'swift_import'],
+  ['scalaImport', 'scala_import'],
+  ['bashSource', 'bash_source'],
+  ['dynamicImport', 'dynamic_import'],
+];
+
 /** Patch import fields for backward compat with older native binaries. */
 function patchImports(imports: any[]): void {
   for (const i of imports) {
-    if (i.typeOnly === undefined) i.typeOnly = i.type_only;
-    if (i.wildcardReexport === undefined) i.wildcardReexport = i.wildcard_reexport;
-    if (i.pythonImport === undefined) i.pythonImport = i.python_import;
-    if (i.goImport === undefined) i.goImport = i.go_import;
-    if (i.rustUse === undefined) i.rustUse = i.rust_use;
-    if (i.javaImport === undefined) i.javaImport = i.java_import;
-    if (i.csharpUsing === undefined) i.csharpUsing = i.csharp_using;
-    if (i.rubyRequire === undefined) i.rubyRequire = i.ruby_require;
-    if (i.phpUse === undefined) i.phpUse = i.php_use;
-    if (i.cInclude === undefined) i.cInclude = i.c_include;
-    if (i.kotlinImport === undefined) i.kotlinImport = i.kotlin_import;
-    if (i.swiftImport === undefined) i.swiftImport = i.swift_import;
-    if (i.scalaImport === undefined) i.scalaImport = i.scala_import;
-    if (i.bashSource === undefined) i.bashSource = i.bash_source;
-    if (i.dynamicImport === undefined) i.dynamicImport = i.dynamic_import;
+    for (const [camel, snake] of IMPORT_FIELD_RENAMES) {
+      if (i[camel] === undefined) i[camel] = i[snake];
+    }
   }
 }
 
@@ -1170,18 +1214,16 @@ export async function parseFilesWasmForBackfill(
 }
 
 /**
- * Parse multiple files in bulk and return a Map<relPath, symbols>.
+ * Run the native engine over `filePaths` and ingest the results into `result`.
+ * Returns the set of file paths the native engine successfully parsed and the
+ * TS/TSX files that need a typeMap backfill pass.
  */
-export async function parseFilesAuto(
+function ingestNativeResults(
+  native: any,
   filePaths: string[],
   rootDir: string,
-  opts: ParseEngineOpts = {},
-): Promise<Map<string, ExtractorOutput>> {
-  const { native } = resolveEngine(opts);
-
-  if (!native) return parseFilesWasm(filePaths, rootDir);
-
-  const result = new Map<string, ExtractorOutput>();
+  result: Map<string, ExtractorOutput>,
+): { nativeParsed: Set<string>; needsTypeMap: { filePath: string; relPath: string }[] } {
   // Always extract all analysis data (dataflow + AST nodes) during native parse.
   // This eliminates the need for any downstream WASM re-parse or native standalone calls.
   const nativeResults = native.parseFilesFull
@@ -1204,27 +1246,51 @@ export async function parseFilesAuto(
       needsTypeMap.push({ filePath: r.file, relPath });
     }
   }
-  if (needsTypeMap.length > 0) {
-    await backfillTypeMapBatch(needsTypeMap, result);
-  }
+  return { nativeParsed, needsTypeMap };
+}
 
-  // Engine parity: native may silently drop files whose extensions are in
-  // SUPPORTED_EXTENSIONS (because a WASM grammar exists) but whose Rust
-  // extractor/grammar is missing or fails. WASM handles these — fall back so
-  // both engines process the same file set (#967). Restrict to installed WASM
-  // grammars so we don't warn about files that neither engine can parse.
+/**
+ * Engine parity: native may silently drop files whose extensions are in
+ * SUPPORTED_EXTENSIONS (because a WASM grammar exists) but whose Rust
+ * extractor/grammar is missing or fails. WASM handles these — fall back so
+ * both engines process the same file set (#967). Restrict to installed WASM
+ * grammars so we don't warn about files that neither engine can parse.
+ */
+async function backfillNativeDrops(
+  filePaths: string[],
+  nativeParsed: Set<string>,
+  rootDir: string,
+  result: Map<string, ExtractorOutput>,
+): Promise<void> {
   const installedExts = getInstalledWasmExtensions();
   const dropped = filePaths.filter(
     (f) => !nativeParsed.has(f) && installedExts.has(path.extname(f).toLowerCase()),
   );
-  if (dropped.length > 0) {
-    warn(`Native engine dropped ${dropped.length} file(s); falling back to WASM for parity`);
-    const wasmResults = await parseFilesWasmForBackfill(dropped, rootDir);
-    for (const [relPath, symbols] of wasmResults) {
-      result.set(relPath, symbols);
-    }
+  if (dropped.length === 0) return;
+  warn(`Native engine dropped ${dropped.length} file(s); falling back to WASM for parity`);
+  const wasmResults = await parseFilesWasmForBackfill(dropped, rootDir);
+  for (const [relPath, symbols] of wasmResults) {
+    result.set(relPath, symbols);
   }
+}
 
+/**
+ * Parse multiple files in bulk and return a Map<relPath, symbols>.
+ */
+export async function parseFilesAuto(
+  filePaths: string[],
+  rootDir: string,
+  opts: ParseEngineOpts = {},
+): Promise<Map<string, ExtractorOutput>> {
+  const { native } = resolveEngine(opts);
+  if (!native) return parseFilesWasm(filePaths, rootDir);
+
+  const result = new Map<string, ExtractorOutput>();
+  const { nativeParsed, needsTypeMap } = ingestNativeResults(native, filePaths, rootDir, result);
+  if (needsTypeMap.length > 0) {
+    await backfillTypeMapBatch(needsTypeMap, result);
+  }
+  await backfillNativeDrops(filePaths, nativeParsed, rootDir, result);
   return result;
 }
 
diff --git a/src/domain/wasm-worker-entry.ts b/src/domain/wasm-worker-entry.ts
index ca02bca70..b9298e879 100644
--- a/src/domain/wasm-worker-entry.ts
+++ b/src/domain/wasm-worker-entry.ts
@@ -573,6 +573,90 @@ interface SetupResult {
   dataflowVisitor: Visitor | null;
 }
 
+/**
+ * Build the AST-store visitor for `langId`. Returns `null` when AST is
+ * disabled or the language has no AST type map. db-free — passes an empty
+ * nodeIdMap. The main thread re-resolves parent node IDs in
+ * `features/ast.ts::collectFileAstRows`.
+ */
+function buildAstVisitor(
+  langId: string,
+  defs: ExtractorOutput['definitions'],
+  relPath: string,
+  enabled: boolean,
+): Visitor | null {
+  if (!enabled) return null;
+  const astTypeMap = AST_TYPE_MAPS.get(langId);
+  if (!astTypeMap) return null;
+  const stringConfig = AST_STRING_CONFIGS.get(langId);
+  return createAstStoreVisitor(
+    astTypeMap,
+    defs,
+    relPath,
+    new Map<string, number>(),
+    stringConfig,
+    astStopRecurseKinds(langId),
+  );
+}
+
+/**
+ * Build the complexity visitor when enabled, the language has complexity
+ * rules, and at least one definition still lacks a `complexity` payload.
+ * Side-effect: extends `walkerOpts` with nesting-node types and a
+ * `getFunctionName` resolver suitable for this language.
+ */
+function buildComplexityVisitor(
+  langId: string,
+  defs: ExtractorOutput['definitions'],
+  enabled: boolean,
+  walkerOpts: WalkOptions,
+): Visitor | null {
+  if (!enabled) return null;
+  const cRules = COMPLEXITY_RULES.get(langId);
+  if (!cRules || !defs.some((d) => hasFuncBody(d) && !d.complexity)) return null;
+
+  const hRules = HALSTEAD_RULES.get(langId);
+  const visitor = createComplexityVisitor(cRules, hRules, { fileLevelWalk: true, langId });
+  for (const t of cRules.nestingNodes) walkerOpts.nestingNodeTypes?.add(t);
+  const dfRules = DATAFLOW_RULES.get(langId);
+  walkerOpts.getFunctionName = (node: TreeSitterNode): string | null => {
+    const nameNode = node.childForFieldName('name');
+    if (nameNode) return nameNode.text;
+    // dfRules shape varies per language; visitor-utils accepts any shape
+    if (dfRules) return getFuncName(node, dfRules as any);
+    return null;
+  };
+  return visitor;
+}
+
+/** Build the CFG visitor when enabled and at least one definition still lacks blocks. */
+function buildCfgVisitor(
+  langId: string,
+  defs: ExtractorOutput['definitions'],
+  enabled: boolean,
+): Visitor | null {
+  if (!enabled) return null;
+  const cfgRulesForLang = CFG_RULES.get(langId);
+  if (!cfgRulesForLang) return null;
+  const needsCfg = defs.some(
+    (d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks),
+  );
+  if (!needsCfg) return null;
+  return createCfgVisitor(cfgRulesForLang);
+}
+
+/** Build the dataflow visitor when enabled and `symbols.dataflow` is not yet populated. */
+function buildDataflowVisitor(
+  langId: string,
+  symbols: ExtractorOutput,
+  enabled: boolean,
+): Visitor | null {
+  if (!enabled) return null;
+  const dfRules = DATAFLOW_RULES.get(langId);
+  if (!dfRules || symbols.dataflow) return null;
+  return createDataflowVisitor(dfRules);
+}
+
 function setupVisitorsLocal(
   symbols: ExtractorOutput,
   relPath: string,
@@ -580,82 +664,158 @@ function setupVisitorsLocal(
   opts: WorkerParseRequest['opts'],
 ): SetupResult {
   const defs = symbols.definitions || [];
-  const visitors: Visitor[] = [];
   const walkerOpts: WalkOptions = {
     functionNodeTypes: new Set<string>(),
     nestingNodeTypes: new Set<string>(),
     getFunctionName: (_node: TreeSitterNode) => null,
   };
 
-  // AST-store: db-free — pass an empty nodeIdMap. The main thread re-resolves
-  // parent node IDs in features/ast.ts::collectFileAstRows.
-  let astVisitor: Visitor | null = null;
-  if (opts.ast) {
-    const astTypeMap = AST_TYPE_MAPS.get(langId);
-    if (astTypeMap) {
-      const stringConfig = AST_STRING_CONFIGS.get(langId);
-      astVisitor = createAstStoreVisitor(
-        astTypeMap,
-        defs,
-        relPath,
-        new Map<string, number>(),
-        stringConfig,
-        astStopRecurseKinds(langId),
-      );
-      visitors.push(astVisitor);
-    }
-  }
+  const astVisitor = buildAstVisitor(langId, defs, relPath, !!opts.ast);
+  const complexityVisitor = buildComplexityVisitor(langId, defs, !!opts.complexity, walkerOpts);
+  const cfgVisitor = buildCfgVisitor(langId, defs, !!opts.cfg);
+  const dataflowVisitor = buildDataflowVisitor(langId, symbols, !!opts.dataflow);
 
-  // Complexity
-  let complexityVisitor: Visitor | null = null;
-  if (opts.complexity) {
-    const cRules = COMPLEXITY_RULES.get(langId);
-    if (cRules && defs.some((d) => hasFuncBody(d) && !d.complexity)) {
-      const hRules = HALSTEAD_RULES.get(langId);
-      complexityVisitor = createComplexityVisitor(cRules, hRules, {
-        fileLevelWalk: true,
-        langId,
-      });
-      for (const t of cRules.nestingNodes) walkerOpts.nestingNodeTypes?.add(t);
-      const dfRules = DATAFLOW_RULES.get(langId);
-      walkerOpts.getFunctionName = (node: TreeSitterNode): string | null => {
-        const nameNode = node.childForFieldName('name');
-        if (nameNode) return nameNode.text;
-        // dfRules shape varies per language; visitor-utils accepts any shape
-        if (dfRules) return getFuncName(node, dfRules as any);
-        return null;
-      };
-      visitors.push(complexityVisitor);
-    }
-  }
+  const visitors: Visitor[] = [];
+  if (astVisitor) visitors.push(astVisitor);
+  if (complexityVisitor) visitors.push(complexityVisitor);
+  if (cfgVisitor) visitors.push(cfgVisitor);
+  if (dataflowVisitor) visitors.push(dataflowVisitor);
 
-  // CFG
-  let cfgVisitor: Visitor | null = null;
-  if (opts.cfg) {
-    const cfgRulesForLang = CFG_RULES.get(langId);
-    if (
-      cfgRulesForLang &&
-      defs.some((d) => hasFuncBody(d) && d.cfg !== null && !Array.isArray(d.cfg?.blocks))
-    ) {
-      cfgVisitor = createCfgVisitor(cfgRulesForLang);
-      visitors.push(cfgVisitor);
-    }
+  return { visitors, walkerOpts, astVisitor, complexityVisitor, cfgVisitor, dataflowVisitor };
+}
+
+// ── Main parse handler ──────────────────────────────────────────────────────
+
+/**
+ * Run tree-sitter parse + extractor on `code`. Returns `null` when either
+ * step yields no usable output. Throws (for the caller to report back to the
+ * pool) only on a hard tree-sitter parse error.
+ */
+function parseAndExtract(
+  parser: Parser,
+  entry: LanguageRegistryEntry,
+  filePath: string,
+  code: string,
+): { tree: Tree; symbols: ExtractorOutput } | null {
+  let tree: Tree | null;
+  try {
+    tree = parser.parse(code);
+  } catch (e: unknown) {
+    // Parse error — report back but keep worker alive.
+    throw new Error(`parse failed: ${(e as Error).message}`);
   }
+  if (!tree) return null;
 
-  // Dataflow
-  let dataflowVisitor: Visitor | null = null;
-  if (opts.dataflow) {
-    const dfRules = DATAFLOW_RULES.get(langId);
-    if (dfRules && !symbols.dataflow) {
-      dataflowVisitor = createDataflowVisitor(dfRules);
-      visitors.push(dataflowVisitor);
-    }
+  // Extractor — on failure, skip file (ok:true, null) to match parser.ts
+  // behavior where extractor issues don't crash the build.
+  let symbols: ExtractorOutput | null;
+  try {
+    const query = _queries.get(entry.id);
+    // tree-sitter's Tree/Query are structurally compatible with
+    // TreeSitterTree/TreeSitterQuery at runtime — same cast style as
+    // parser.ts::wasmExtractSymbols (parser.ts:789).
+    symbols = entry.extractor(tree as any, filePath, query as any) ?? null;
+  } catch {
+    return null;
   }
+  if (!symbols) {
+    return null;
+  }
+  return { tree, symbols };
+}
 
-  return { visitors, walkerOpts, astVisitor, complexityVisitor, cfgVisitor, dataflowVisitor };
+/**
+ * Project the visitor `ast-store` rows into the wire-safe shape returned to
+ * the main thread. Strips `file` and `parentNodeId` — both are re-resolved in
+ * `features/ast.ts::collectFileAstRows`. Always returns an array (even empty)
+ * so `engine.ts::fileNeedsWasmTree` doesn't treat the file as un-walked and
+ * trigger a full ensureWasmTrees re-parse (#1036).
+ */
+function projectAstNodes(results: WalkResults): SerializedExtractorOutput['astNodes'] {
+  const astRows = (results['ast-store'] || []) as Array<{
+    line: number;
+    kind: string;
+    name: string | null | undefined;
+    text: string | null;
+    receiver: string | null;
+    file?: string;
+    parentNodeId?: number | null;
+  }>;
+  return astRows.map((n) => ({
+    line: n.line,
+    kind: n.kind,
+    name: n.name ?? '',
+    text: n.text ?? undefined,
+    receiver: n.receiver ?? undefined,
+  }));
 }
 
-// ── Main parse handler ──────────────────────────────────────────────────────
+/**
+ * Run the configured visitor walk over `tree.rootNode` and apply each
+ * visitor's results back onto `symbols`. Returns the serialized astNodes
+ * (or `undefined` when AST is disabled / no rows produced).
+ *
+ * Mirrors engine.ts:791-829. Runs BEFORE `tree.delete()` because
+ * storeComplexityResults / storeCfgResults read `funcNode` off live nodes.
+ */
+function runVisitorWalk(
+  tree: Tree,
+  symbols: ExtractorOutput,
+  langId: string,
+  setup: SetupResult,
+): SerializedExtractorOutput['astNodes'] {
+  if (setup.visitors.length === 0) return undefined;
+  // rootNode shape matches TreeSitterNode at runtime — same cast as parser.ts:789.
+  const results = walkWithVisitors(tree.rootNode as any, setup.visitors, langId, setup.walkerOpts);
+  const defs = symbols.definitions || [];
+  let serializedAstNodes: SerializedExtractorOutput['astNodes'];
+  if (setup.astVisitor) serializedAstNodes = projectAstNodes(results);
+  if (setup.complexityVisitor) storeComplexityResults(results, defs, langId);
+  if (setup.cfgVisitor) storeCfgResults(results, defs);
+  if (setup.dataflowVisitor) symbols.dataflow = results.dataflow as DataflowResult;
+  return serializedAstNodes;
+}
+
+/**
+ * Pack the in-memory ExtractorOutput into the structured-clone-safe shape
+ * sent back across the worker boundary. Converts the typeMap into a tuple
+ * array and intentionally omits `_tree` (cannot cross the boundary).
+ */
+function serializeExtractorOutput(
+  symbols: ExtractorOutput,
+  langId: LanguageId,
+  code: string,
+  astNodes: SerializedExtractorOutput['astNodes'],
+): SerializedExtractorOutput {
+  return {
+    definitions: symbols.definitions,
+    calls: symbols.calls,
+    imports: symbols.imports,
+    classes: symbols.classes,
+    exports: symbols.exports,
+    typeMap: Array.from(symbols.typeMap.entries()),
+    _langId: langId,
+    _lineCount: code.split('\n').length,
+    dataflow: symbols.dataflow,
+    astNodes,
+  };
+}
+
+/**
+ * Release WASM linear memory backing a tree. Best-effort — swallows errors so
+ * the worker keeps serving requests. Deferring this would let trees accumulate
+ * in the worker's WASM heap and defeat the point of isolating parse calls.
+ */
+function disposeTree(tree: Tree | null): void {
+  if (!tree) return;
+  const deletable = tree as unknown as { delete?: () => void };
+  if (typeof deletable.delete !== 'function') return;
+  try {
+    deletable.delete();
+  } catch {
+    // best-effort cleanup — swallow; worker continues.
+  }
+}
 
 async function handleParse(msg: WorkerParseRequest): Promise<SerializedExtractorOutput | null> {
   const ext = path.extname(msg.filePath).toLowerCase();
@@ -666,100 +826,20 @@ async function handleParse(msg: WorkerParseRequest): Promise<SerializedExtractor
   const parser = await loadLanguageLazy(entry);
   if (!parser) return null;
 
-  let tree: Tree | null = null;
-  try {
-    try {
-      tree = parser.parse(msg.code);
-    } catch (e: unknown) {
-      // Parse error — report back but keep worker alive.
-      throw new Error(`parse failed: ${(e as Error).message}`);
-    }
-    if (!tree) return null;
-
-    // Extractor — on failure, skip file (ok:true, null) to match parser.ts
-    // behavior where extractor issues don't crash the build.
-    let symbols: ExtractorOutput | null;
-    try {
-      const query = _queries.get(entry.id);
-      // tree-sitter's Tree/Query are structurally compatible with
-      // TreeSitterTree/TreeSitterQuery at runtime — same cast style as
-      // parser.ts::wasmExtractSymbols (parser.ts:789).
-      symbols = entry.extractor(tree as any, msg.filePath, query as any) ?? null;
-    } catch {
-      return null;
-    }
-    if (!symbols) return null;
-
-    // Unified visitor walk — mirrors engine.ts:791-829. Runs BEFORE tree.delete()
-    // because storeComplexityResults/storeCfgResults read funcNode off live nodes.
-    const { visitors, walkerOpts, astVisitor, complexityVisitor, cfgVisitor, dataflowVisitor } =
-      setupVisitorsLocal(symbols, msg.filePath, entry.id, msg.opts);
+  const parsed = parseAndExtract(parser, entry, msg.filePath, msg.code);
+  if (!parsed) return null;
+  const { tree, symbols } = parsed;
 
-    // astNodes are kept in the serialized shape (without `file`/`parentNodeId`),
+  try {
+    const setup = setupVisitorsLocal(symbols, msg.filePath, entry.id, msg.opts);
+    // astNodes kept in the serialized shape (without `file`/`parentNodeId`),
     // not assigned back to symbols.astNodes — ExtractorOutput.astNodes is
     // ASTNodeRow[] (DB row shape with node_id), which is a different type.
-    let serializedAstNodes: SerializedExtractorOutput['astNodes'];
-
-    if (visitors.length > 0) {
-      // rootNode shape matches TreeSitterNode at runtime — same cast as parser.ts:789.
-      const results = walkWithVisitors(tree.rootNode as any, visitors, entry.id, walkerOpts);
-
-      const defs = symbols.definitions || [];
-      if (astVisitor) {
-        const astRows = (results['ast-store'] || []) as Array<{
-          line: number;
-          kind: string;
-          name: string | null | undefined;
-          text: string | null;
-          receiver: string | null;
-          file?: string;
-          parentNodeId?: number | null;
-        }>;
-        // Always set an array (even empty) — leaving astNodes undefined makes
-        // engine.ts::fileNeedsWasmTree treat the file as un-walked and trigger
-        // a full ensureWasmTrees re-parse of every WASM-parseable file (#1036).
-        // Strip `file` and `parentNodeId` — main thread re-resolves both in
-        // features/ast.ts::collectFileAstRows.
-        serializedAstNodes = astRows.map((n) => ({
-          line: n.line,
-          kind: n.kind,
-          name: n.name ?? '',
-          text: n.text ?? undefined,
-          receiver: n.receiver ?? undefined,
-        }));
-      }
-
-      if (complexityVisitor) storeComplexityResults(results, defs, entry.id);
-      if (cfgVisitor) storeCfgResults(results, defs);
-      if (dataflowVisitor) symbols.dataflow = results.dataflow as DataflowResult;
-    }
-
-    // Serialize — convert Map<string, TypeMapEntry> to tuple array for the wire.
-    const serialized: SerializedExtractorOutput = {
-      definitions: symbols.definitions,
-      calls: symbols.calls,
-      imports: symbols.imports,
-      classes: symbols.classes,
-      exports: symbols.exports,
-      typeMap: Array.from(symbols.typeMap.entries()),
-      _langId: entry.id as LanguageId,
-      _lineCount: msg.code.split('\n').length,
-      dataflow: symbols.dataflow,
-      astNodes: serializedAstNodes,
-    };
-    // _tree is deliberately not serialized — it cannot cross the worker boundary.
-    return serialized;
+    const serializedAstNodes = runVisitorWalk(tree, symbols, entry.id, setup);
+    return serializeExtractorOutput(symbols, entry.id as LanguageId, msg.code, serializedAstNodes);
   } finally {
-    // ALWAYS release WASM memory before responding. Deferring this would let
-    // trees accumulate in the worker's WASM heap across requests and defeat
-    // the point of isolating parse calls.
-    if (tree && typeof (tree as unknown as { delete?: () => void }).delete === 'function') {
-      try {
-        (tree as unknown as { delete: () => void }).delete();
-      } catch {
-        // best-effort cleanup — swallow; worker continues.
-      }
-    }
+    // ALWAYS release WASM memory before responding (see disposeTree note).
+    disposeTree(tree);
   }
 }
 

From 6819cd6a3c3491d724d570ce2ff5fa1ddbfbf12b Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 13:31:52 -0600
Subject: [PATCH 16/27] refactor(analysis): decompose module-map and reduce
 complexity in fn-impact and dependencies

Split high-cognitive-complexity functions in the analysis domain into focused
helpers. Worst functions per gauntlet (cog/cyc/maxNesting/halstead) are now
below thresholds.

module-map.ts (statsData cog=31 -> below threshold):
- Extract buildStatsFromNative and buildStatsFromJs branches
- Share false-positive query and quality-score helpers between paths
- aggregateRolesFromNative pulls duplicated role-aggregation code out

fn-impact.ts (bfsTransitiveCallers cog=37 -> below threshold,
              impactAnalysisData cog=27 -> below threshold):
- Extract recordCaller, processFrontierNode, seedInterfaceImplementors
- Extract bfsImportDependents and groupDependentsByLevel

dependencies.ts (bfsShortestPath cog=29, bfsFilePath cog=30,
                 buildTransitiveCallers cog=24 -> all below threshold):
- Extract buildNextCallerFrontier from buildTransitiveCallers
- Extract buildNeighborStmt + visitNeighbor; state collected in struct
- Extract visitFileNeighbor + reconstructFilePath

docs check acknowledged - internal helper extraction, no user-facing changes
---
 src/domain/analysis/dependencies.ts | 251 ++++++++++++++--------
 src/domain/analysis/fn-impact.ts    | 170 ++++++++++-----
 src/domain/analysis/module-map.ts   | 315 +++++++++++++++-------------
 3 files changed, 461 insertions(+), 275 deletions(-)

diff --git a/src/domain/analysis/dependencies.ts b/src/domain/analysis/dependencies.ts
index 1a619c861..4a759e6e1 100644
--- a/src/domain/analysis/dependencies.ts
+++ b/src/domain/analysis/dependencies.ts
@@ -58,9 +58,32 @@ export function fileDepsData(
  *
  * Uses Repository.findCallers() so it works with both native and WASM engines.
  */
+type CallerRow = { id: number; name: string; kind: string; file: string; line: number };
+
+/** Compute the next BFS frontier from a batched upstream-callers lookup. */
+function buildNextCallerFrontier(
+  unvisited: CallerRow[],
+  batchCallers: Map<number, CallerRow[]>,
+  visited: Set<number>,
+  noTests: boolean,
+): CallerRow[] {
+  const nextFrontier: CallerRow[] = [];
+  const nextFrontierIds = new Set<number>();
+  for (const f of unvisited) {
+    const upstream = batchCallers.get(f.id) || [];
+    for (const u of upstream) {
+      if (noTests && isTestFile(u.file)) continue;
+      if (visited.has(u.id) || nextFrontierIds.has(u.id)) continue;
+      nextFrontierIds.add(u.id);
+      nextFrontier.push(u);
+    }
+  }
+  return nextFrontier;
+}
+
 function buildTransitiveCallers(
   repo: InstanceType<typeof Repository>,
-  callers: Array<{ id: number; name: string; kind: string; file: string; line: number }>,
+  callers: CallerRow[],
   nodeId: number,
   depth: number,
   noTests: boolean,
@@ -81,18 +104,8 @@ function buildTransitiveCallers(
     if (unvisited.length === 0) break;
 
     const batchCallers = repo.findCallersBatch(unvisited.map((f) => f.id));
-    const nextFrontier: typeof frontier = [];
-    const nextFrontierIds = new Set<number>();
-    for (const f of unvisited) {
-      const upstream = batchCallers.get(f.id) || [];
-      for (const u of upstream) {
-        if (noTests && isTestFile(u.file)) continue;
-        if (!visited.has(u.id) && !nextFrontierIds.has(u.id)) {
-          nextFrontierIds.add(u.id);
-          nextFrontier.push(u);
-        }
-      }
-    }
+    const nextFrontier = buildNextCallerFrontier(unvisited, batchCallers, visited, noTests);
+
     if (nextFrontier.length > 0) {
       transitiveCallers[d] = nextFrontier.map((n) => ({
         name: n.name,
@@ -258,22 +271,30 @@ function resolveEndpoints(
   };
 }
 
-/**
- * BFS from sourceId toward targetId.
- * Returns { found, parent, alternateCount, foundDepth }.
- * `parent` maps nodeId -> { parentId, edgeKind }.
- */
-function bfsShortestPath(
+type NeighborRow = {
+  id: number;
+  name: string;
+  kind: string;
+  file: string;
+  line: number;
+  edge_kind: string;
+};
+
+type BfsShortestState = {
+  visited: Set<number>;
+  parent: Map<number, { parentId: number; edgeKind: string }>;
+  found: boolean;
+  foundDepth: number;
+  alternateCount: number;
+};
+
+/** Build the SQL statement that yields neighbors of a node id in the requested direction. */
+function buildNeighborStmt(
   db: BetterSqlite3Database,
-  sourceId: number,
-  targetId: number,
   edgeKinds: string[],
   reverse: boolean,
-  maxDepth: number,
-  noTests: boolean,
-) {
+): ReturnType<BetterSqlite3Database['prepare']> {
   const kindPlaceholders = edgeKinds.map(() => '?').join(', ');
-
   // Forward: source_id -> target_id (A calls... calls B)
   // Reverse: target_id -> source_id (B is called by... called by A)
   const neighborQuery = reverse
@@ -283,50 +304,78 @@ function bfsShortestPath(
     : `SELECT n.id, n.name, n.kind, n.file, n.line, e.kind AS edge_kind
        FROM edges e JOIN nodes n ON e.target_id = n.id
        WHERE e.source_id = ? AND e.kind IN (${kindPlaceholders})`;
-  const neighborStmt = db.prepare(neighborQuery);
+  return db.prepare(neighborQuery);
+}
+
+/** Process a single neighbor row during BFS; returns true once the target has been reached. */
+function visitNeighbor(
+  n: NeighborRow,
+  currentId: number,
+  depth: number,
+  targetId: number,
+  state: BfsShortestState,
+  nextQueue: number[],
+  noTests: boolean,
+): void {
+  if (noTests && isTestFile(n.file)) return;
+  if (n.id === targetId) {
+    if (!state.found) {
+      state.found = true;
+      state.foundDepth = depth;
+      state.parent.set(n.id, { parentId: currentId, edgeKind: n.edge_kind });
+    }
+    state.alternateCount++;
+    return;
+  }
+  if (state.visited.has(n.id)) return;
+  state.visited.add(n.id);
+  state.parent.set(n.id, { parentId: currentId, edgeKind: n.edge_kind });
+  nextQueue.push(n.id);
+}
 
-  const visited = new Set([sourceId]);
-  const parent = new Map<number, { parentId: number; edgeKind: string }>();
+/**
+ * BFS from sourceId toward targetId.
+ * Returns { found, parent, alternateCount, foundDepth }.
+ * `parent` maps nodeId -> { parentId, edgeKind }.
+ */
+function bfsShortestPath(
+  db: BetterSqlite3Database,
+  sourceId: number,
+  targetId: number,
+  edgeKinds: string[],
+  reverse: boolean,
+  maxDepth: number,
+  noTests: boolean,
+) {
+  const neighborStmt = buildNeighborStmt(db, edgeKinds, reverse);
+  const state: BfsShortestState = {
+    visited: new Set([sourceId]),
+    parent: new Map(),
+    found: false,
+    foundDepth: -1,
+    alternateCount: 0,
+  };
   let queue = [sourceId];
-  let found = false;
-  let alternateCount = 0;
-  let foundDepth = -1;
 
   for (let depth = 1; depth <= maxDepth; depth++) {
     const nextQueue: number[] = [];
     for (const currentId of queue) {
-      const neighbors = neighborStmt.all(currentId, ...edgeKinds) as Array<{
-        id: number;
-        name: string;
-        kind: string;
-        file: string;
-        line: number;
-        edge_kind: string;
-      }>;
+      const neighbors = neighborStmt.all(currentId, ...edgeKinds) as NeighborRow[];
       for (const n of neighbors) {
-        if (noTests && isTestFile(n.file)) continue;
-        if (n.id === targetId) {
-          if (!found) {
-            found = true;
-            foundDepth = depth;
-            parent.set(n.id, { parentId: currentId, edgeKind: n.edge_kind });
-          }
-          alternateCount++;
-          continue;
-        }
-        if (!visited.has(n.id)) {
-          visited.add(n.id);
-          parent.set(n.id, { parentId: currentId, edgeKind: n.edge_kind });
-          nextQueue.push(n.id);
-        }
+        visitNeighbor(n, currentId, depth, targetId, state, nextQueue, noTests);
       }
     }
-    if (found) break;
+    if (state.found) break;
     queue = nextQueue;
     if (queue.length === 0) break;
   }
 
-  return { found, parent, alternateCount, foundDepth };
+  return {
+    found: state.found,
+    parent: state.parent,
+    alternateCount: state.alternateCount,
+    foundDepth: state.foundDepth,
+  };
 }
 
 /**
@@ -474,6 +523,53 @@ export function pathData(
 
 // ── File-level shortest path ────────────────────────────────────────────
 
+type FileBfsState = {
+  visited: Set<string>;
+  parentMap: Map<string, string>;
+  found: boolean;
+  alternateCount: number;
+};
+
+/** Process a neighbor file during file-level BFS; updates state in place. */
+function visitFileNeighbor(
+  neighborFile: string,
+  currentFile: string,
+  targetFile: string,
+  state: FileBfsState,
+  nextQueue: string[],
+  noTests: boolean,
+): void {
+  if (noTests && isTestFile(neighborFile)) return;
+  if (neighborFile === targetFile) {
+    if (!state.found) {
+      state.found = true;
+      state.parentMap.set(neighborFile, currentFile);
+    }
+    state.alternateCount++;
+    return;
+  }
+  if (state.visited.has(neighborFile)) return;
+  state.visited.add(neighborFile);
+  state.parentMap.set(neighborFile, currentFile);
+  nextQueue.push(neighborFile);
+}
+
+/** Reconstruct file path from target back to source using parent links. */
+function reconstructFilePath(
+  parentMap: Map<string, string>,
+  sourceFile: string,
+  targetFile: string,
+): string[] {
+  const filePath: string[] = [targetFile];
+  let cur = targetFile;
+  while (cur !== sourceFile) {
+    cur = parentMap.get(cur)!;
+    filePath.push(cur);
+  }
+  filePath.reverse();
+  return filePath;
+}
+
 /** BFS over file adjacency graph to find shortest path. */
 function bfsFilePath(
   neighborStmt: ReturnType<BetterSqlite3Database['prepare']>,
@@ -483,11 +579,13 @@ function bfsFilePath(
   maxDepth: number,
   noTests: boolean,
 ): { found: boolean; path: string[]; alternateCount: number } {
-  const visited = new Set([sourceFile]);
-  const parentMap = new Map<string, string>();
+  const state: FileBfsState = {
+    visited: new Set([sourceFile]),
+    parentMap: new Map<string, string>(),
+    found: false,
+    alternateCount: 0,
+  };
   let queue = [sourceFile];
-  let found = false;
-  let alternateCount = 0;
 
   for (let depth = 1; depth <= maxDepth; depth++) {
     const nextQueue: string[] = [];
@@ -496,38 +594,21 @@ function bfsFilePath(
         neighbor_file: string;
       }>;
       for (const n of neighbors) {
-        if (noTests && isTestFile(n.neighbor_file)) continue;
-        if (n.neighbor_file === targetFile) {
-          if (!found) {
-            found = true;
-            parentMap.set(n.neighbor_file, currentFile);
-          }
-          alternateCount++;
-          continue;
-        }
-        if (!visited.has(n.neighbor_file)) {
-          visited.add(n.neighbor_file);
-          parentMap.set(n.neighbor_file, currentFile);
-          nextQueue.push(n.neighbor_file);
-        }
+        visitFileNeighbor(n.neighbor_file, currentFile, targetFile, state, nextQueue, noTests);
       }
     }
-    if (found) break;
+    if (state.found) break;
     queue = nextQueue;
     if (queue.length === 0) break;
   }
 
-  if (!found) return { found: false, path: [], alternateCount: 0 };
+  if (!state.found) return { found: false, path: [], alternateCount: 0 };
 
-  // Reconstruct path
-  const filePath: string[] = [targetFile];
-  let cur = targetFile;
-  while (cur !== sourceFile) {
-    cur = parentMap.get(cur)!;
-    filePath.push(cur);
-  }
-  filePath.reverse();
-  return { found: true, path: filePath, alternateCount: Math.max(0, alternateCount - 1) };
+  return {
+    found: true,
+    path: reconstructFilePath(state.parentMap, sourceFile, targetFile),
+    alternateCount: Math.max(0, state.alternateCount - 1),
+  };
 }
 
 /**
diff --git a/src/domain/analysis/fn-impact.ts b/src/domain/analysis/fn-impact.ts
index f33ab26ff..e795c2092 100644
--- a/src/domain/analysis/fn-impact.ts
+++ b/src/domain/analysis/fn-impact.ts
@@ -83,6 +83,63 @@ function expandImplementors(
   }
 }
 
+/** Record a caller node at depth `d`, adding to frontier and levels. */
+function recordCaller(
+  caller: RelatedNodeRow,
+  parentId: number,
+  depth: number,
+  visited: Set<number>,
+  nextFrontier: number[],
+  levels: BfsLevels,
+  noTests: boolean,
+  onVisit?: BfsOnVisit,
+): void {
+  if (visited.has(caller.id) || (noTests && isTestFile(caller.file))) return;
+  visited.add(caller.id);
+  nextFrontier.push(caller.id);
+  if (!levels[depth]) levels[depth] = [];
+  levels[depth]!.push(toSymbolRef(caller));
+  if (onVisit) onVisit(caller, parentId, depth);
+}
+
+/** Process all callers of one frontier node, recording new nodes and expanding implementors. */
+function processFrontierNode(
+  repo: InstanceType<typeof Repository>,
+  fid: number,
+  depth: number,
+  visited: Set<number>,
+  nextFrontier: number[],
+  levels: BfsLevels,
+  noTests: boolean,
+  resolveImplementors: boolean,
+  onVisit?: BfsOnVisit,
+): void {
+  const callers = repo.findDistinctCallers(fid) as RelatedNodeRow[];
+  for (const c of callers) {
+    recordCaller(c, fid, depth, visited, nextFrontier, levels, noTests, onVisit);
+    if (resolveImplementors && INTERFACE_LIKE_KINDS.has(c.kind)) {
+      expandImplementors(repo, c.id, depth + 1, visited, nextFrontier, levels, noTests, onVisit);
+    }
+  }
+}
+
+/** Seed BFS with implementors of the start node when it is an interface/trait. */
+function seedInterfaceImplementors(
+  repo: InstanceType<typeof Repository>,
+  startId: number,
+  visited: Set<number>,
+  levels: BfsLevels,
+  noTests: boolean,
+  onVisit?: BfsOnVisit,
+): number[] {
+  const implNextFrontier: number[] = [];
+  const startNode = repo.findNodeById(startId) as NodeRow | undefined;
+  if (startNode && INTERFACE_LIKE_KINDS.has(startNode.kind)) {
+    expandImplementors(repo, startId, 1, visited, implNextFrontier, levels, noTests, onVisit);
+  }
+  return implNextFrontier;
+}
+
 export function bfsTransitiveCallers(
   dbOrRepo: BetterSqlite3Database | InstanceType<typeof Repository>,
   startId: number,
@@ -105,13 +162,9 @@ export function bfsTransitiveCallers(
   let frontier = [startId];
 
   // Seed: if start node is an interface/trait, include its implementors at depth 1
-  const implNextFrontier: number[] = [];
-  if (resolveImplementors) {
-    const startNode = repo.findNodeById(startId) as NodeRow | undefined;
-    if (startNode && INTERFACE_LIKE_KINDS.has(startNode.kind)) {
-      expandImplementors(repo, startId, 1, visited, implNextFrontier, levels, noTests, onVisit);
-    }
-  }
+  const implNextFrontier = resolveImplementors
+    ? seedInterfaceImplementors(repo, startId, visited, levels, noTests, onVisit)
+    : [];
 
   for (let d = 1; d <= maxDepth; d++) {
     if (d === 1 && implNextFrontier.length > 0) {
@@ -119,19 +172,17 @@ export function bfsTransitiveCallers(
     }
     const nextFrontier: number[] = [];
     for (const fid of frontier) {
-      const callers = repo.findDistinctCallers(fid) as RelatedNodeRow[];
-      for (const c of callers) {
-        if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) {
-          visited.add(c.id);
-          nextFrontier.push(c.id);
-          if (!levels[d]) levels[d] = [];
-          levels[d]!.push(toSymbolRef(c));
-          if (onVisit) onVisit(c, fid, d);
-        }
-        if (resolveImplementors && INTERFACE_LIKE_KINDS.has(c.kind)) {
-          expandImplementors(repo, c.id, d + 1, visited, nextFrontier, levels, noTests, onVisit);
-        }
-      }
+      processFrontierNode(
+        repo,
+        fid,
+        d,
+        visited,
+        nextFrontier,
+        levels,
+        noTests,
+        resolveImplementors,
+        onVisit,
+      );
     }
     frontier = nextFrontier;
     if (frontier.length === 0) break;
@@ -140,6 +191,53 @@ export function bfsTransitiveCallers(
   return { totalDependents: visited.size - 1, levels };
 }
 
+/** BFS over import dependents, returning visited node IDs and depth-per-id map. */
+function bfsImportDependents(
+  repo: InstanceType<typeof Repository>,
+  seedNodes: NodeRow[],
+  noTests: boolean,
+): { visited: Set<number>; levels: Map<number, number> } {
+  const visited = new Set<number>();
+  const queue: number[] = [];
+  const levels = new Map<number, number>();
+
+  for (const fn of seedNodes) {
+    visited.add(fn.id);
+    queue.push(fn.id);
+    levels.set(fn.id, 0);
+  }
+
+  while (queue.length > 0) {
+    const current = queue.shift()!;
+    const level = levels.get(current)!;
+    const dependents = repo.findImportDependents(current) as RelatedNodeRow[];
+    for (const dep of dependents) {
+      if (visited.has(dep.id)) continue;
+      if (noTests && isTestFile(dep.file)) continue;
+      visited.add(dep.id);
+      queue.push(dep.id);
+      levels.set(dep.id, level + 1);
+    }
+  }
+
+  return { visited, levels };
+}
+
+/** Group visited dependents by depth (excluding seed depth 0). */
+function groupDependentsByLevel(
+  repo: InstanceType<typeof Repository>,
+  levels: Map<number, number>,
+): Record<number, Array<{ file: string }>> {
+  const byLevel: Record<number, Array<{ file: string }>> = {};
+  for (const [id, level] of levels) {
+    if (level === 0) continue;
+    if (!byLevel[level]) byLevel[level] = [];
+    const node = repo.findNodeById(id) as NodeRow | undefined;
+    if (node) byLevel[level].push({ file: node.file });
+  }
+  return byLevel;
+}
+
 export function impactAnalysisData(
   file: string,
   customDbPath: string,
@@ -152,36 +250,8 @@ export function impactAnalysisData(
       return { file, sources: [], levels: {}, totalDependents: 0 };
     }
 
-    const visited = new Set<number>();
-    const queue: number[] = [];
-    const levels = new Map<number, number>();
-
-    for (const fn of fileNodes) {
-      visited.add(fn.id);
-      queue.push(fn.id);
-      levels.set(fn.id, 0);
-    }
-
-    while (queue.length > 0) {
-      const current = queue.shift()!;
-      const level = levels.get(current)!;
-      const dependents = repo.findImportDependents(current) as RelatedNodeRow[];
-      for (const dep of dependents) {
-        if (!visited.has(dep.id) && (!noTests || !isTestFile(dep.file))) {
-          visited.add(dep.id);
-          queue.push(dep.id);
-          levels.set(dep.id, level + 1);
-        }
-      }
-    }
-
-    const byLevel: Record<number, Array<{ file: string }>> = {};
-    for (const [id, level] of levels) {
-      if (level === 0) continue;
-      if (!byLevel[level]) byLevel[level] = [];
-      const node = repo.findNodeById(id) as NodeRow | undefined;
-      if (node) byLevel[level].push({ file: node.file });
-    }
+    const { visited, levels } = bfsImportDependents(repo, fileNodes, noTests);
+    const byLevel = groupDependentsByLevel(repo, levels);
 
     return {
       file,
diff --git a/src/domain/analysis/module-map.ts b/src/domain/analysis/module-map.ts
index 887c644a9..71383c213 100644
--- a/src/domain/analysis/module-map.ts
+++ b/src/domain/analysis/module-map.ts
@@ -4,7 +4,7 @@ import { loadConfig } from '../../infrastructure/config.js';
 import { debug } from '../../infrastructure/logger.js';
 import { isTestFile } from '../../infrastructure/test-filter.js';
 import { DEAD_ROLE_PREFIX } from '../../shared/kinds.js';
-import type { BetterSqlite3Database } from '../../types.js';
+import type { BetterSqlite3Database, NativeDatabase } from '../../types.js';
 import { findCycles } from '../graph/cycles.js';
 import { LANGUAGE_REGISTRY } from '../parser.js';
 
@@ -198,30 +198,13 @@ function computeQualityMetrics(
   ).c;
   const callConfidence = totalCallEdges > 0 ? highConfCallEdges / totalCallEdges : 0;
 
-  const fpRows = db
-    .prepare(`
-      SELECT n.name, n.file, n.line, COUNT(e.source_id) as caller_count
-      FROM nodes n
-      LEFT JOIN edges e ON n.id = e.target_id AND e.kind = 'calls'
-      WHERE n.kind IN ('function', 'method')
-      GROUP BY n.id
-      HAVING caller_count > ?
-      ORDER BY caller_count DESC
-    `)
-    .all(fpThreshold) as Array<{ name: string; file: string; line: number; caller_count: number }>;
-  const falsePositiveWarnings = fpRows
-    .filter((r) =>
-      FALSE_POSITIVE_NAMES.has(r.name.includes('.') ? r.name.split('.').pop()! : r.name),
-    )
-    .map((r) => ({ name: r.name, file: r.file, line: r.line, callerCount: r.caller_count }));
+  const falsePositiveWarnings = buildFalsePositiveWarnings(queryFalsePositiveRows(db, fpThreshold));
 
   let fpEdgeCount = 0;
   for (const fp of falsePositiveWarnings) fpEdgeCount += fp.callerCount;
   const falsePositiveRatio = totalCallEdges > 0 ? fpEdgeCount / totalCallEdges : 0;
 
-  const score = Math.round(
-    callerCoverage * 40 + callConfidence * 40 + (1 - falsePositiveRatio) * 20,
-  );
+  const score = computeQualityScore(callerCoverage, callConfidence, falsePositiveRatio);
 
   return {
     score,
@@ -347,6 +330,169 @@ export function moduleMapData(customDbPath: string, limit = 20, opts: { noTests?
   }
 }
 
+type FalsePositiveRow = { name: string; file: string; line: number; caller_count: number };
+
+/** SQL query for false-positive caller counts above a threshold (shared by native and JS paths). */
+function queryFalsePositiveRows(
+  db: BetterSqlite3Database,
+  fpThreshold: number,
+): FalsePositiveRow[] {
+  return db
+    .prepare(`
+      SELECT n.name, n.file, n.line, COUNT(e.source_id) as caller_count
+      FROM nodes n
+      LEFT JOIN edges e ON n.id = e.target_id AND e.kind = 'calls'
+      WHERE n.kind IN ('function', 'method')
+      GROUP BY n.id
+      HAVING caller_count > ?
+      ORDER BY caller_count DESC
+    `)
+    .all(fpThreshold) as FalsePositiveRow[];
+}
+
+/** Filter false-positive rows by the configured name set and shape them for the report. */
+function buildFalsePositiveWarnings(rows: FalsePositiveRow[]) {
+  return rows
+    .filter((r) =>
+      FALSE_POSITIVE_NAMES.has(r.name.includes('.') ? r.name.split('.').pop()! : r.name),
+    )
+    .map((r) => ({ name: r.name, file: r.file, line: r.line, callerCount: r.caller_count }));
+}
+
+/** Compute the composite quality score (0-100) from coverage, confidence, and FP ratio. */
+function computeQualityScore(
+  callerCoverage: number,
+  callConfidence: number,
+  falsePositiveRatio: number,
+): number {
+  return Math.round(callerCoverage * 40 + callConfidence * 40 + (1 - falsePositiveRatio) * 20);
+}
+
+/** Aggregate role counts and derive the `dead` total. */
+function aggregateRolesFromNative(roleCounts: Array<{ role: string; count: number }>) {
+  const roles: Record<string, number> & { dead?: number } = {};
+  let deadTotal = 0;
+  for (const r of roleCounts) {
+    roles[r.role] = r.count;
+    if (r.role.startsWith(DEAD_ROLE_PREFIX)) deadTotal += r.count;
+  }
+  if (deadTotal > 0) roles.dead = deadTotal;
+  return roles;
+}
+
+type NativeGraphStatsFn = NonNullable<NativeDatabase['getGraphStats']>;
+type NativeGraphStats = ReturnType<NativeGraphStatsFn>;
+
+/** Build the native fast-path stats result by combining native aggregations with JS-only sections. */
+function buildStatsFromNative(
+  db: BetterSqlite3Database,
+  nativeStats: NativeGraphStats,
+  config: any,
+  jsSections: {
+    files: ReturnType<typeof countFilesByLanguage>;
+    fileCycles: unknown[];
+    fnCycles: unknown[];
+  },
+) {
+  const s = nativeStats;
+  const nodesByKind: Record<string, number> = {};
+  for (const k of s.nodesByKind) nodesByKind[k.kind] = k.count;
+  const edgesByKind: Record<string, number> = {};
+  for (const k of s.edgesByKind) edgesByKind[k.kind] = k.count;
+  const roles = aggregateRolesFromNative(s.roleCounts);
+
+  const callerCoverage =
+    s.quality.callableTotal > 0 ? s.quality.callableWithCallers / s.quality.callableTotal : 0;
+  const callConfidence =
+    s.quality.callEdges > 0 ? s.quality.highConfCallEdges / s.quality.callEdges : 0;
+
+  // False-positive analysis still uses JS (needs FALSE_POSITIVE_NAMES set)
+  const fpThreshold = config.analysis?.falsePositiveCallers ?? FALSE_POSITIVE_CALLER_THRESHOLD;
+  const falsePositiveWarnings = buildFalsePositiveWarnings(queryFalsePositiveRows(db, fpThreshold));
+  let fpEdgeCount = 0;
+  for (const fp of falsePositiveWarnings) fpEdgeCount += fp.callerCount;
+  const falsePositiveRatio = s.quality.callEdges > 0 ? fpEdgeCount / s.quality.callEdges : 0;
+  const score = computeQualityScore(callerCoverage, callConfidence, falsePositiveRatio);
+
+  return {
+    nodes: { total: s.totalNodes, byKind: nodesByKind },
+    edges: { total: s.totalEdges, byKind: edgesByKind },
+    files: jsSections.files,
+    cycles: { fileLevel: jsSections.fileCycles.length, functionLevel: jsSections.fnCycles.length },
+    hotspots: s.hotspots.map((h) => ({ file: h.file, fanIn: h.fanIn, fanOut: h.fanOut })),
+    embeddings: s.embeddings
+      ? {
+          count: s.embeddings.count,
+          model: s.embeddings.model,
+          dim: s.embeddings.dim,
+          builtAt: s.embeddings.builtAt,
+        }
+      : null,
+    quality: {
+      score,
+      callerCoverage: {
+        ratio: callerCoverage,
+        covered: s.quality.callableWithCallers,
+        total: s.quality.callableTotal,
+      },
+      callConfidence: {
+        ratio: callConfidence,
+        highConf: s.quality.highConfCallEdges,
+        total: s.quality.callEdges,
+      },
+      falsePositiveWarnings,
+    },
+    roles,
+    complexity: s.complexity
+      ? {
+          analyzed: s.complexity.analyzed,
+          avgCognitive: s.complexity.avgCognitive,
+          avgCyclomatic: s.complexity.avgCyclomatic,
+          maxCognitive: s.complexity.maxCognitive,
+          maxCyclomatic: s.complexity.maxCyclomatic,
+          avgMI: s.complexity.avgMi,
+          minMI: s.complexity.minMi,
+        }
+      : null,
+  };
+}
+
+/** Build the JS-fallback stats result using SQL aggregations from the helpers above. */
+function buildStatsFromJs(
+  db: BetterSqlite3Database,
+  noTests: boolean,
+  config: any,
+  jsSections: {
+    files: ReturnType<typeof countFilesByLanguage>;
+    fileCycles: unknown[];
+    fnCycles: unknown[];
+  },
+) {
+  const testFilter = testFilterSQL('n.file', noTests);
+
+  const { total: totalNodes, byKind: nodesByKind } = countNodesByKind(db, noTests);
+  const { total: totalEdges, byKind: edgesByKind } = countEdgesByKind(db, noTests);
+
+  const hotspots = findHotspots(db, noTests, 5);
+  const embeddings = getEmbeddingsInfo(db);
+  const fpThreshold = config.analysis?.falsePositiveCallers ?? FALSE_POSITIVE_CALLER_THRESHOLD;
+  const quality = computeQualityMetrics(db, testFilter, fpThreshold);
+  const roles = countRoles(db, noTests);
+  const complexity = getComplexitySummary(db, testFilter);
+
+  return {
+    nodes: { total: totalNodes, byKind: nodesByKind },
+    edges: { total: totalEdges, byKind: edgesByKind },
+    files: jsSections.files,
+    cycles: { fileLevel: jsSections.fileCycles.length, functionLevel: jsSections.fnCycles.length },
+    hotspots,
+    embeddings,
+    quality,
+    roles,
+    complexity,
+  };
+}
+
 export function statsData(customDbPath: string, opts: { noTests?: boolean; config?: any } = {}) {
   const { db, nativeDb, close } = openReadonlyWithNative(customDbPath);
   try {
@@ -354,127 +500,16 @@ export function statsData(customDbPath: string, opts: { noTests?: boolean; confi
     const config = opts.config || loadConfig();
 
     // These always need JS (non-SQL logic)
-    const files = countFilesByLanguage(db, noTests);
-    const fileCycles = findCycles(db, { fileLevel: true, noTests });
-    const fnCycles = findCycles(db, { fileLevel: false, noTests });
-
-    // ── Native fast path: batch all SQL aggregations in one napi call ──
-    if (nativeDb?.getGraphStats) {
-      const s = nativeDb.getGraphStats(noTests);
-      const nodesByKind: Record<string, number> = {};
-      for (const k of s.nodesByKind) nodesByKind[k.kind] = k.count;
-      const edgesByKind: Record<string, number> = {};
-      for (const k of s.edgesByKind) edgesByKind[k.kind] = k.count;
-      const roles: Record<string, number> & { dead?: number } = {};
-      let deadTotal = 0;
-      for (const r of s.roleCounts) {
-        roles[r.role] = r.count;
-        if (r.role.startsWith(DEAD_ROLE_PREFIX)) deadTotal += r.count;
-      }
-      if (deadTotal > 0) roles.dead = deadTotal;
-
-      const callerCoverage =
-        s.quality.callableTotal > 0 ? s.quality.callableWithCallers / s.quality.callableTotal : 0;
-      const callConfidence =
-        s.quality.callEdges > 0 ? s.quality.highConfCallEdges / s.quality.callEdges : 0;
-
-      // False-positive analysis still uses JS (needs FALSE_POSITIVE_NAMES set)
-      const fpThreshold = config.analysis?.falsePositiveCallers ?? FALSE_POSITIVE_CALLER_THRESHOLD;
-      const fpRows = db
-        .prepare(`
-          SELECT n.name, n.file, n.line, COUNT(e.source_id) as caller_count
-          FROM nodes n
-          LEFT JOIN edges e ON n.id = e.target_id AND e.kind = 'calls'
-          WHERE n.kind IN ('function', 'method')
-          GROUP BY n.id
-          HAVING caller_count > ?
-          ORDER BY caller_count DESC
-        `)
-        .all(fpThreshold) as Array<{
-        name: string;
-        file: string;
-        line: number;
-        caller_count: number;
-      }>;
-      const falsePositiveWarnings = fpRows
-        .filter((r) =>
-          FALSE_POSITIVE_NAMES.has(r.name.includes('.') ? r.name.split('.').pop()! : r.name),
-        )
-        .map((r) => ({ name: r.name, file: r.file, line: r.line, callerCount: r.caller_count }));
-      let fpEdgeCount = 0;
-      for (const fp of falsePositiveWarnings) fpEdgeCount += fp.callerCount;
-      const falsePositiveRatio = s.quality.callEdges > 0 ? fpEdgeCount / s.quality.callEdges : 0;
-      const score = Math.round(
-        callerCoverage * 40 + callConfidence * 40 + (1 - falsePositiveRatio) * 20,
-      );
-
-      return {
-        nodes: { total: s.totalNodes, byKind: nodesByKind },
-        edges: { total: s.totalEdges, byKind: edgesByKind },
-        files,
-        cycles: { fileLevel: fileCycles.length, functionLevel: fnCycles.length },
-        hotspots: s.hotspots.map((h) => ({ file: h.file, fanIn: h.fanIn, fanOut: h.fanOut })),
-        embeddings: s.embeddings
-          ? {
-              count: s.embeddings.count,
-              model: s.embeddings.model,
-              dim: s.embeddings.dim,
-              builtAt: s.embeddings.builtAt,
-            }
-          : null,
-        quality: {
-          score,
-          callerCoverage: {
-            ratio: callerCoverage,
-            covered: s.quality.callableWithCallers,
-            total: s.quality.callableTotal,
-          },
-          callConfidence: {
-            ratio: callConfidence,
-            highConf: s.quality.highConfCallEdges,
-            total: s.quality.callEdges,
-          },
-          falsePositiveWarnings,
-        },
-        roles,
-        complexity: s.complexity
-          ? {
-              analyzed: s.complexity.analyzed,
-              avgCognitive: s.complexity.avgCognitive,
-              avgCyclomatic: s.complexity.avgCyclomatic,
-              maxCognitive: s.complexity.maxCognitive,
-              maxCyclomatic: s.complexity.maxCyclomatic,
-              avgMI: s.complexity.avgMi,
-              minMI: s.complexity.minMi,
-            }
-          : null,
-      };
-    }
-
-    // ── JS fallback ───────────────────────────────────────────────────
-    const testFilter = testFilterSQL('n.file', noTests);
-
-    const { total: totalNodes, byKind: nodesByKind } = countNodesByKind(db, noTests);
-    const { total: totalEdges, byKind: edgesByKind } = countEdgesByKind(db, noTests);
-
-    const hotspots = findHotspots(db, noTests, 5);
-    const embeddings = getEmbeddingsInfo(db);
-    const fpThreshold = config.analysis?.falsePositiveCallers ?? FALSE_POSITIVE_CALLER_THRESHOLD;
-    const quality = computeQualityMetrics(db, testFilter, fpThreshold);
-    const roles = countRoles(db, noTests);
-    const complexity = getComplexitySummary(db, testFilter);
-
-    return {
-      nodes: { total: totalNodes, byKind: nodesByKind },
-      edges: { total: totalEdges, byKind: edgesByKind },
-      files,
-      cycles: { fileLevel: fileCycles.length, functionLevel: fnCycles.length },
-      hotspots,
-      embeddings,
-      quality,
-      roles,
-      complexity,
+    const jsSections = {
+      files: countFilesByLanguage(db, noTests),
+      fileCycles: findCycles(db, { fileLevel: true, noTests }),
+      fnCycles: findCycles(db, { fileLevel: false, noTests }),
     };
+
+    const nativeStats = nativeDb?.getGraphStats?.(noTests);
+    return nativeStats
+      ? buildStatsFromNative(db, nativeStats, config, jsSections)
+      : buildStatsFromJs(db, noTests, config, jsSections);
   } finally {
     close();
   }

From 4f344044829c684ffa40e4be6b214920c5ea5f36 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 13:36:57 -0600
Subject: [PATCH 17/27] refactor(search): decompose generator and reduce
 complexity in semantic and hybrid search

---
 src/domain/search/generator.ts       | 206 +++++++++++++++++----------
 src/domain/search/search/hybrid.ts   |  95 ++++++------
 src/domain/search/search/semantic.ts | 170 +++++++++++++---------
 3 files changed, 290 insertions(+), 181 deletions(-)

diff --git a/src/domain/search/generator.ts b/src/domain/search/generator.ts
index ef0ddf353..02e43f1ca 100644
--- a/src/domain/search/generator.ts
+++ b/src/domain/search/generator.ts
@@ -8,6 +8,19 @@ import { embed, getModelConfig } from './models.js';
 import { buildSourceText } from './strategies/source.js';
 import { buildStructuredText } from './strategies/structured.js';
 
+type EmbeddingNode = NodeRow & { id: number };
+type EmbeddingStrategy = 'structured' | 'source';
+
+interface PreparedEmbeddings {
+  texts: string[];
+  nodeIds: number[];
+  nodeNames: string[];
+  previews: string[];
+  overflowCount: number;
+  filesRead: number;
+  filesSkipped: number;
+}
+
 /**
  * Rough token estimate (~4 chars per token for code/English).
  * Conservative — avoids adding a tokenizer dependency.
@@ -47,47 +60,22 @@ function initEmbeddingsSchema(db: BetterSqlite3Database): void {
   `);
 }
 
-export interface BuildEmbeddingsOptions {
-  strategy?: 'structured' | 'source';
-}
-
 /**
- * Build embeddings for all functions/methods/classes in the graph.
+ * Resolve the repo root for embedding. Prefer the root recorded at build time;
+ * fall back to `<dbParent>` only when the DB lives at the conventional
+ * `<root>/.codegraph/graph.db` layout — otherwise trust the caller's rootDir.
  */
-export async function buildEmbeddings(
-  rootDir: string,
-  modelKey: string,
-  customDbPath?: string,
-  options: BuildEmbeddingsOptions = {},
-): Promise<void> {
-  const strategy = options.strategy || 'structured';
-  const dbPath = customDbPath || findDbPath(undefined);
-
-  if (!fs.existsSync(dbPath)) {
-    throw new DbError(
-      `No codegraph database found at ${dbPath}.\nRun "codegraph build" first to analyze your codebase.`,
-      { file: dbPath },
-    );
-  }
-
-  const db = openDb(dbPath) as BetterSqlite3Database;
-  initEmbeddingsSchema(db);
-
-  // Prefer the repo root recorded at build time — embed may be invoked from a
-  // different cwd (e.g. `codegraph embed --db /abs/path/graph.db`) and the
-  // positional rootDir will be wrong in that case. For legacy DBs without
-  // root_dir metadata, fall back to `<dbParent>` only when the DB lives at
-  // the conventional `<root>/.codegraph/graph.db` layout — otherwise trust
-  // the caller-provided rootDir (which may be an explicit positional arg).
-  // `path.dirname(...)` is always non-empty (`'.'` at minimum), so the
-  // conventional-layout check is required to keep the rootDir path reachable.
+function resolveRoot(db: BetterSqlite3Database, dbPath: string, rootDir: string): string {
   const metaRoot = getBuildMeta(db, 'root_dir');
   const resolvedDbPath = path.resolve(dbPath);
   const dbDirName = path.basename(path.dirname(resolvedDbPath));
   const dbParent =
     dbDirName === '.codegraph' ? path.dirname(path.dirname(resolvedDbPath)) : undefined;
-  const resolvedRoot = metaRoot || dbParent || rootDir;
+  return metaRoot || dbParent || rootDir;
+}
 
+/** Reset embedding tables and load eligible symbols grouped by file. */
+function loadNodesByFile(db: BetterSqlite3Database): Map<string, EmbeddingNode[]> {
   db.exec('DELETE FROM embeddings');
   db.exec('DELETE FROM embedding_meta');
   db.exec('DELETE FROM fts_index');
@@ -96,22 +84,52 @@ export async function buildEmbeddings(
     .prepare(
       `SELECT * FROM nodes WHERE kind IN ('function', 'method', 'class') ORDER BY file, line`,
     )
-    .all() as Array<NodeRow & { id: number }>;
+    .all() as EmbeddingNode[];
 
-  console.log(`Building embeddings for ${nodes.length} symbols (strategy: ${strategy})...`);
-
-  const byFile = new Map<string, typeof nodes>();
+  const byFile = new Map<string, EmbeddingNode[]>();
   for (const node of nodes) {
     if (!byFile.has(node.file)) byFile.set(node.file, []);
     byFile.get(node.file)?.push(node);
   }
+  return byFile;
+}
+
+/** Build embedding text for a single node, truncating if it would overflow. */
+function buildNodeText(
+  node: EmbeddingNode,
+  file: string,
+  lines: string[],
+  db: BetterSqlite3Database,
+  strategy: EmbeddingStrategy,
+  contextWindow: number,
+): { text: string; overflowed: boolean } {
+  let text =
+    strategy === 'structured'
+      ? buildStructuredText(node, file, lines, db)
+      : buildSourceText(node, file, lines);
+  const tokens = estimateTokens(text);
+  if (tokens > contextWindow) {
+    text = text.slice(0, contextWindow * 4);
+    return { text, overflowed: true };
+  }
+  return { text, overflowed: false };
+}
 
+/**
+ * Walk files in the graph, read source, and produce parallel arrays of
+ * texts / nodeIds / nodeNames / previews ready for embedding.
+ */
+function prepareEmbeddingTexts(
+  byFile: Map<string, EmbeddingNode[]>,
+  db: BetterSqlite3Database,
+  resolvedRoot: string,
+  strategy: EmbeddingStrategy,
+  contextWindow: number,
+): PreparedEmbeddings {
   const texts: string[] = [];
   const nodeIds: number[] = [];
   const nodeNames: string[] = [];
   const previews: string[] = [];
-  const config = getModelConfig(modelKey);
-  const contextWindow = config.contextWindow;
   let overflowCount = 0;
   let filesRead = 0;
   let filesSkipped = 0;
@@ -129,19 +147,8 @@ export async function buildEmbeddings(
     }
 
     for (const node of fileNodes) {
-      let text =
-        strategy === 'structured'
-          ? buildStructuredText(node, file, lines, db)
-          : buildSourceText(node, file, lines);
-
-      // Detect and handle context window overflow
-      const tokens = estimateTokens(text);
-      if (tokens > contextWindow) {
-        overflowCount++;
-        const maxChars = contextWindow * 4;
-        text = text.slice(0, maxChars);
-      }
-
+      const { text, overflowed } = buildNodeText(node, file, lines, db, strategy, contextWindow);
+      if (overflowed) overflowCount++;
       texts.push(text);
       nodeIds.push(node.id);
       nodeNames.push(node.name);
@@ -149,28 +156,19 @@ export async function buildEmbeddings(
     }
   }
 
-  if (overflowCount > 0) {
-    warn(
-      `${overflowCount} symbol(s) exceeded model context window (${contextWindow} tokens) and were truncated`,
-    );
-  }
-
-  // If there were symbols to embed but every file failed to read, the DB was
-  // almost certainly built from a different location than the current cwd.
-  // Surface this clearly instead of emitting a silent "Stored 0 embeddings".
-  if (byFile.size > 0 && filesRead === 0) {
-    closeDb(db);
-    throw new DbError(
-      `embed: could not read any of the ${filesSkipped} source files recorded in the graph — the DB may have been built from a different location than the current working directory.\n` +
-        `Tried resolving against: ${resolvedRoot}\n` +
-        'Pass a positional <dir> argument pointing at the original repo root, or re-run "codegraph build" from that directory.',
-      { file: dbPath },
-    );
-  }
-
-  console.log(`Embedding ${texts.length} symbols...`);
-  const { vectors, dim } = await embed(texts, modelKey);
+  return { texts, nodeIds, nodeNames, previews, overflowCount, filesRead, filesSkipped };
+}
 
+/** Persist vectors, FTS rows, and embedding metadata in a single transaction. */
+function persistEmbeddings(
+  db: BetterSqlite3Database,
+  prepared: PreparedEmbeddings,
+  vectors: Float32Array[],
+  dim: number,
+  modelName: string,
+  strategy: EmbeddingStrategy,
+): void {
+  const { nodeIds, nodeNames, previews, texts, overflowCount } = prepared;
   const insert = db.prepare(
     'INSERT OR REPLACE INTO embeddings (node_id, vector, text_preview, full_text) VALUES (?, ?, ?, ?)',
   );
@@ -182,7 +180,7 @@ export async function buildEmbeddings(
       insert.run(nodeIds[i], Buffer.from(vec.buffer), previews[i], texts[i]);
       insertFts.run(nodeIds[i], nodeNames[i], texts[i]);
     }
-    insertMeta.run('model', config.name);
+    insertMeta.run('model', modelName);
     insertMeta.run('dim', String(dim));
     insertMeta.run('count', String(vectors.length));
     insertMeta.run('fts_count', String(vectors.length));
@@ -193,6 +191,66 @@ export async function buildEmbeddings(
     }
   });
   insertAll();
+}
+
+export interface BuildEmbeddingsOptions {
+  strategy?: EmbeddingStrategy;
+}
+
+/**
+ * Build embeddings for all functions/methods/classes in the graph.
+ */
+export async function buildEmbeddings(
+  rootDir: string,
+  modelKey: string,
+  customDbPath?: string,
+  options: BuildEmbeddingsOptions = {},
+): Promise<void> {
+  const strategy = options.strategy || 'structured';
+  const dbPath = customDbPath || findDbPath(undefined);
+
+  if (!fs.existsSync(dbPath)) {
+    throw new DbError(
+      `No codegraph database found at ${dbPath}.\nRun "codegraph build" first to analyze your codebase.`,
+      { file: dbPath },
+    );
+  }
+
+  const db = openDb(dbPath) as BetterSqlite3Database;
+  initEmbeddingsSchema(db);
+
+  const resolvedRoot = resolveRoot(db, dbPath, rootDir);
+  const byFile = loadNodesByFile(db);
+
+  const nodeCount = [...byFile.values()].reduce((acc, list) => acc + list.length, 0);
+  console.log(`Building embeddings for ${nodeCount} symbols (strategy: ${strategy})...`);
+
+  const config = getModelConfig(modelKey);
+  const prepared = prepareEmbeddingTexts(byFile, db, resolvedRoot, strategy, config.contextWindow);
+
+  if (prepared.overflowCount > 0) {
+    warn(
+      `${prepared.overflowCount} symbol(s) exceeded model context window (${config.contextWindow} tokens) and were truncated`,
+    );
+  }
+
+  // If there were symbols to embed but every file failed to read, the DB was
+  // almost certainly built from a different location than the current cwd.
+  // Surface this clearly instead of emitting a silent "Stored 0 embeddings".
+  if (byFile.size > 0 && prepared.filesRead === 0) {
+    closeDb(db);
+    throw new DbError(
+      `embed: could not read any of the ${prepared.filesSkipped} source files recorded in the graph — the DB may have been built from a different location than the current working directory.\n` +
+        `Tried resolving against: ${resolvedRoot}\n` +
+        'Pass a positional <dir> argument pointing at the original repo root, or re-run "codegraph build" from that directory.',
+      { file: dbPath },
+    );
+  }
+
+  console.log(`Embedding ${prepared.texts.length} symbols...`);
+  const { vectors, dim } = await embed(prepared.texts, modelKey);
+
+  persistEmbeddings(db, prepared, vectors as Float32Array[], dim, config.name, strategy);
 
   console.log(
     `\nStored ${vectors.length} embeddings (${dim}d, ${config.name}, strategy: ${strategy}) in graph.db`,
diff --git a/src/domain/search/search/hybrid.ts b/src/domain/search/search/hybrid.ts
index ef7c2fc4c..bf6406c6c 100644
--- a/src/domain/search/search/hybrid.ts
+++ b/src/domain/search/search/hybrid.ts
@@ -105,61 +105,72 @@ async function collectRankedLists(
   return rankedLists;
 }
 
+/** Initialise a fusion entry seeded from the first ranked item we see for a key. */
+function createFusionEntry(item: RankedItem): FusionEntry {
+  return {
+    name: item.name,
+    kind: item.kind,
+    file: item.file,
+    line: item.line,
+    endLine: (item.endLine as number | null) ?? null,
+    role: (item.role as string | null) ?? null,
+    fileHash: (item.fileHash as string | null) ?? null,
+    rrfScore: 0,
+    bm25Score: null,
+    bm25Rank: null,
+    similarity: null,
+    semanticRank: null,
+  };
+}
+
+/** Merge a single ranked item into its fusion entry: update RRF and best per-source rank. */
+function mergeRankedItem(entry: FusionEntry, item: RankedItem, k: number): void {
+  entry.rrfScore += 1 / (k + item.rank);
+  if (item.source === 'bm25') {
+    if (entry.bm25Rank === null || item.rank < entry.bm25Rank) {
+      entry.bm25Score = item.bm25Score ?? null;
+      entry.bm25Rank = item.rank;
+    }
+  } else if (entry.semanticRank === null || item.rank < entry.semanticRank) {
+    entry.similarity = item.similarity ?? null;
+    entry.semanticRank = item.rank;
+  }
+}
+
+/** Flatten a fusion entry into the public-facing hybrid result shape. */
+function toHybridResult(e: FusionEntry): HybridResult {
+  return {
+    name: e.name,
+    kind: e.kind,
+    file: e.file,
+    line: e.line,
+    endLine: e.endLine,
+    role: e.role,
+    fileHash: e.fileHash,
+    rrf: e.rrfScore,
+    bm25Score: e.bm25Score,
+    bm25Rank: e.bm25Rank,
+    similarity: e.similarity,
+    semanticRank: e.semanticRank,
+  };
+}
+
 /** Reciprocal Rank Fusion: merge ranked lists into a single scored result set. */
 function fuseResults(rankedLists: RankedItem[][], k: number, limit: number): HybridResult[] {
   const fusionMap = new Map<string, FusionEntry>();
-
   for (const list of rankedLists) {
     for (const item of list) {
       if (!fusionMap.has(item.key)) {
-        fusionMap.set(item.key, {
-          name: item.name,
-          kind: item.kind,
-          file: item.file,
-          line: item.line,
-          endLine: (item.endLine as number | null) ?? null,
-          role: (item.role as string | null) ?? null,
-          fileHash: (item.fileHash as string | null) ?? null,
-          rrfScore: 0,
-          bm25Score: null,
-          bm25Rank: null,
-          similarity: null,
-          semanticRank: null,
-        });
-      }
-      const entry = fusionMap.get(item.key)!;
-      entry.rrfScore += 1 / (k + item.rank);
-      if (item.source === 'bm25') {
-        if (entry.bm25Rank === null || item.rank < entry.bm25Rank) {
-          entry.bm25Score = (item as RankedItem & { bm25Score?: number }).bm25Score ?? null;
-          entry.bm25Rank = item.rank;
-        }
-      } else {
-        if (entry.semanticRank === null || item.rank < entry.semanticRank) {
-          entry.similarity = (item as RankedItem & { similarity?: number }).similarity ?? null;
-          entry.semanticRank = item.rank;
-        }
+        fusionMap.set(item.key, createFusionEntry(item));
       }
+      mergeRankedItem(fusionMap.get(item.key)!, item, k);
     }
   }
 
   return [...fusionMap.values()]
     .sort((a, b) => b.rrfScore - a.rrfScore)
     .slice(0, limit)
-    .map((e) => ({
-      name: e.name,
-      kind: e.kind,
-      file: e.file,
-      line: e.line,
-      endLine: e.endLine,
-      role: e.role,
-      fileHash: e.fileHash,
-      rrf: e.rrfScore,
-      bm25Score: e.bm25Score,
-      bm25Rank: e.bm25Rank,
-      similarity: e.similarity,
-      semanticRank: e.semanticRank,
-    }));
+    .map(toHybridResult);
 }
 
 export async function hybridSearchData(
diff --git a/src/domain/search/search/semantic.ts b/src/domain/search/search/semantic.ts
index 40e2f8870..2c0b82616 100644
--- a/src/domain/search/search/semantic.ts
+++ b/src/domain/search/search/semantic.ts
@@ -4,7 +4,7 @@ import type { BetterSqlite3Database, CodegraphConfig } from '../../../types.js';
 import { normalizeSymbol } from '../../queries.js';
 import { embed } from '../models.js';
 import { cosineSim } from '../stores/sqlite-blob.js';
-import { prepareSearch } from './prepare.js';
+import { type PreparedSearch, prepareSearch } from './prepare.js';
 
 export interface SemanticSearchOpts {
   config?: CodegraphConfig;
@@ -30,6 +30,25 @@ export interface SearchDataResult {
   results: SemanticResult[];
 }
 
+type StoredRow = PreparedSearch['rows'][number];
+
+/** Reconstitute a stored embedding row's vector blob into a Float32Array. */
+function rowVector(row: StoredRow): Float32Array {
+  return new Float32Array(new Uint8Array(row.vector as unknown as ArrayBuffer).buffer);
+}
+
+/** Warn when stored embeddings and the query model use different dimensions. */
+function checkDimensionMismatch(storedDim: number | null, dim: number): boolean {
+  if (storedDim && dim !== storedDim) {
+    console.log(
+      `Warning: query model dimension (${dim}) doesn't match stored embeddings (${storedDim}).`,
+    );
+    console.log(`  Re-run \`codegraph embed\` with the same model, or use --model to match.`);
+    return true;
+  }
+  return false;
+}
+
 export async function searchData(
   query: string,
   customDbPath: string | undefined,
@@ -50,20 +69,12 @@ export async function searchData(
       dim,
     } = await embed([query], modelKey ?? undefined);
 
-    if (storedDim && dim !== storedDim) {
-      console.log(
-        `Warning: query model dimension (${dim}) doesn't match stored embeddings (${storedDim}).`,
-      );
-      console.log(`  Re-run \`codegraph embed\` with the same model, or use --model to match.`);
-      return null;
-    }
+    if (checkDimensionMismatch(storedDim, dim)) return null;
 
     const hc = new Map<string, string>();
     const results: SemanticResult[] = [];
     for (const row of rows) {
-      const vec = new Float32Array(new Uint8Array(row.vector as unknown as ArrayBuffer).buffer);
-      const sim = cosineSim(queryVec!, vec);
-
+      const sim = cosineSim(queryVec!, rowVector(row));
       if (sim >= minScore) {
         results.push({
           ...normalizeSymbol(row, db as BetterSqlite3Database, hc),
@@ -91,6 +102,82 @@ export interface MultiSearchResult {
   }>;
 }
 
+interface RankedHit {
+  rowIndex: number;
+  similarity: number;
+  rank: number;
+}
+
+interface FusionEntry {
+  rrfScore: number;
+  queryScores: Array<{ query: string; similarity: number; rank: number }>;
+}
+
+/**
+ * Emit a warning for any query pair whose embeddings are nearly identical,
+ * since RRF would over-weight matches shared between them.
+ */
+function warnOnSimilarQueries(
+  queries: string[],
+  queryVecs: Float32Array[],
+  threshold: number,
+): void {
+  for (let i = 0; i < queryVecs.length; i++) {
+    for (let j = i + 1; j < queryVecs.length; j++) {
+      const sim = cosineSim(queryVecs[i]!, queryVecs[j]!);
+      if (sim >= threshold) {
+        warn(
+          `Queries "${queries[i]}" and "${queries[j]}" are very similar ` +
+            `(${(sim * 100).toFixed(0)}% cosine similarity). ` +
+            `This may bias RRF results toward their shared matches. ` +
+            `Consider using more distinct queries.`,
+        );
+      }
+    }
+  }
+}
+
+/** Rank stored rows for a single query, keeping only those above minScore. */
+function rankRowsForQuery(
+  queryVec: Float32Array,
+  rowVecs: Float32Array[],
+  minScore: number,
+): RankedHit[] {
+  const scored: Array<{ rowIndex: number; similarity: number }> = [];
+  for (let ri = 0; ri < rowVecs.length; ri++) {
+    const sim = cosineSim(queryVec, rowVecs[ri]!);
+    if (sim >= minScore) {
+      scored.push({ rowIndex: ri, similarity: sim });
+    }
+  }
+  scored.sort((a, b) => b.similarity - a.similarity);
+  return scored.map((item, rank) => ({ ...item, rank: rank + 1 }));
+}
+
+/** Reciprocal Rank Fusion across each query's ranked hits. */
+function fuseRankedHits(
+  queries: string[],
+  perQueryRanked: RankedHit[][],
+  k: number,
+): Map<number, FusionEntry> {
+  const fusionMap = new Map<number, FusionEntry>();
+  for (let qi = 0; qi < queries.length; qi++) {
+    for (const item of perQueryRanked[qi]!) {
+      if (!fusionMap.has(item.rowIndex)) {
+        fusionMap.set(item.rowIndex, { rrfScore: 0, queryScores: [] });
+      }
+      const entry = fusionMap.get(item.rowIndex)!;
+      entry.rrfScore += 1 / (k + item.rank);
+      entry.queryScores.push({
+        query: queries[qi]!,
+        similarity: item.similarity,
+        rank: item.rank,
+      });
+    }
+  }
+  return fusionMap;
+}
+
 export async function multiSearchData(
   queries: string[],
   customDbPath: string | undefined,
@@ -101,6 +188,7 @@ export async function multiSearchData(
   const limit = opts.limit ?? searchCfg.topK ?? 15;
   const minScore = opts.minScore ?? searchCfg.defaultMinScore ?? 0.2;
   const k = opts.rrfK ?? searchCfg.rrfK ?? 60;
+  const similarityWarnThreshold = searchCfg.similarityWarnThreshold ?? 0.85;
 
   const prepared = prepareSearch(customDbPath, opts);
   if (!prepared) return null;
@@ -109,63 +197,15 @@ export async function multiSearchData(
   try {
     const { vectors: queryVecs, dim } = await embed(queries, modelKey ?? undefined);
 
-    const SIMILARITY_WARN_THRESHOLD = searchCfg.similarityWarnThreshold ?? 0.85;
-    for (let i = 0; i < queryVecs.length; i++) {
-      for (let j = i + 1; j < queryVecs.length; j++) {
-        const sim = cosineSim(queryVecs[i]!, queryVecs[j]!);
-        if (sim >= SIMILARITY_WARN_THRESHOLD) {
-          warn(
-            `Queries "${queries[i]}" and "${queries[j]}" are very similar ` +
-              `(${(sim * 100).toFixed(0)}% cosine similarity). ` +
-              `This may bias RRF results toward their shared matches. ` +
-              `Consider using more distinct queries.`,
-          );
-        }
-      }
-    }
+    warnOnSimilarQueries(queries, queryVecs as Float32Array[], similarityWarnThreshold);
 
-    if (storedDim && dim !== storedDim) {
-      console.log(
-        `Warning: query model dimension (${dim}) doesn't match stored embeddings (${storedDim}).`,
-      );
-      console.log(`  Re-run \`codegraph embed\` with the same model, or use --model to match.`);
-      return null;
-    }
+    if (checkDimensionMismatch(storedDim, dim)) return null;
 
-    const rowVecs = rows.map(
-      (row) => new Float32Array(new Uint8Array(row.vector as unknown as ArrayBuffer).buffer),
+    const rowVecs = rows.map(rowVector);
+    const perQueryRanked = queries.map((_q, qi) =>
+      rankRowsForQuery(queryVecs[qi]!, rowVecs, minScore),
     );
-
-    const perQueryRanked = queries.map((_query, qi) => {
-      const scored: Array<{ rowIndex: number; similarity: number }> = [];
-      for (let ri = 0; ri < rows.length; ri++) {
-        const sim = cosineSim(queryVecs[qi]!, rowVecs[ri]!);
-        if (sim >= minScore) {
-          scored.push({ rowIndex: ri, similarity: sim });
-        }
-      }
-      scored.sort((a, b) => b.similarity - a.similarity);
-      return scored.map((item, rank) => ({ ...item, rank: rank + 1 }));
-    });
-
-    const fusionMap = new Map<
-      number,
-      { rrfScore: number; queryScores: Array<{ query: string; similarity: number; rank: number }> }
-    >();
-    for (let qi = 0; qi < queries.length; qi++) {
-      for (const item of perQueryRanked[qi]!) {
-        if (!fusionMap.has(item.rowIndex)) {
-          fusionMap.set(item.rowIndex, { rrfScore: 0, queryScores: [] });
-        }
-        const entry = fusionMap.get(item.rowIndex)!;
-        entry.rrfScore += 1 / (k + item.rank);
-        entry.queryScores.push({
-          query: queries[qi]!,
-          similarity: item.similarity,
-          rank: item.rank,
-        });
-      }
-    }
+    const fusionMap = fuseRankedHits(queries, perQueryRanked, k);
 
     const hc = new Map<string, string>();
     const results: MultiSearchResult['results'] = [];

From 0a12e8c8b730f06a53fad9318285efa30104c1a8 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 13:46:19 -0600
Subject: [PATCH 18/27] refactor(features): decompose complexity, structure,
 graph-enrichment, structure-query, and owners
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Internal refactor — no public API or behaviour change, so docs check acknowledged.

- complexity.ts: split collectNativeBulkRows (cog=70) into classify/build/collect-file helpers;
  extract classifyHalsteadToken + summarizeHalsteadCounts from computeHalsteadMetrics.
- structure.ts: merge classifyNodeRolesFull/Incremental DRY via shared buildActiveFilesSet
  + buildClassifierInput helpers.
- graph-enrichment.ts: decompose prepareFileLevelData (cog=32, cyc=26) into loadFileLevelEdges,
  computeFileFanCounts, detectFileCommunities, buildFileVisNode, selectFileSeedNodes.
- structure-query.ts: split hotspotsData (cog=34, sloc=102) using a strategy pattern
  (HOTSPOT_ORDER_BY) and mapNative/JsHotspotRow helpers.
- owners.ts: split ownersData (sloc=158, bugs=1.55) into loadFilteredFiles, buildOwnerIndex,
  loadSymbolsForFiles, computeOwnerBoundaries, buildOwnersSummary.
---
 src/features/complexity.ts       | 213 ++++++++++++--------
 src/features/graph-enrichment.ts | 175 +++++++++-------
 src/features/owners.ts           | 330 +++++++++++++++++--------------
 src/features/structure-query.ts  | 173 ++++++++--------
 src/features/structure.ts        | 112 +++++------
 5 files changed, 573 insertions(+), 430 deletions(-)

diff --git a/src/features/complexity.ts b/src/features/complexity.ts
index 509d03478..5627b6e36 100644
--- a/src/features/complexity.ts
+++ b/src/features/complexity.ts
@@ -31,44 +31,36 @@ const COMPLEXITY_EXTENSIONS = buildExtensionSet(COMPLEXITY_RULES);
 
 // ─── Halstead Metrics Computation ─────────────────────────────────────────
 
-export function computeHalsteadMetrics(
-  functionNode: TreeSitterNode,
-  language: string,
-): HalsteadDerivedMetrics | null {
-  const rules = HALSTEAD_RULES.get(language) as HalsteadRules | undefined;
-  if (!rules) return null;
-
-  const operators = new Map<string, number>(); // type -> count
-  const operands = new Map<string, number>(); // text -> count
-
-  function walk(node: TreeSitterNode | null): void {
-    if (!node) return;
-
-    // Skip type annotation subtrees
-    if (rules?.skipTypes.has(node.type)) return;
+/** Classify a tree-sitter node as a Halstead operator or operand,
+ *  updating the running counts. Pure helper extracted from computeHalsteadMetrics
+ *  to keep the dispatcher thin. */
+function classifyHalsteadToken(
+  node: TreeSitterNode,
+  rules: HalsteadRules,
+  operators: Map<string, number>,
+  operands: Map<string, number>,
+): void {
+  // Compound operators (non-leaf): count the node type as an operator
+  if (rules.compoundOperators.has(node.type)) {
+    operators.set(node.type, (operators.get(node.type) || 0) + 1);
+  }
 
-    // Compound operators (non-leaf): count the node type as an operator
-    if (rules?.compoundOperators.has(node.type)) {
+  // Leaf nodes: classify as operator or operand
+  if (node.childCount === 0) {
+    if (rules.operatorLeafTypes.has(node.type)) {
       operators.set(node.type, (operators.get(node.type) || 0) + 1);
-    }
-
-    // Leaf nodes: classify as operator or operand
-    if (node.childCount === 0) {
-      if (rules?.operatorLeafTypes.has(node.type)) {
-        operators.set(node.type, (operators.get(node.type) || 0) + 1);
-      } else if (rules?.operandLeafTypes.has(node.type)) {
-        const text = node.text;
-        operands.set(text, (operands.get(text) || 0) + 1);
-      }
-    }
-
-    for (let i = 0; i < node.childCount; i++) {
-      walk(node.child(i));
+    } else if (rules.operandLeafTypes.has(node.type)) {
+      const text = node.text;
+      operands.set(text, (operands.get(text) || 0) + 1);
     }
   }
+}
 
-  walk(functionNode);
-
+/** Build a HalsteadDerivedMetrics summary from the raw operator/operand counts. */
+function summarizeHalsteadCounts(
+  operators: Map<string, number>,
+  operands: Map<string, number>,
+): HalsteadDerivedMetrics {
   const n1 = operators.size; // distinct operators
   const n2 = operands.size; // distinct operands
   let bigN1 = 0; // total operators
@@ -79,7 +71,6 @@ export function computeHalsteadMetrics(
   const vocabulary = n1 + n2;
   const length = bigN1 + bigN2;
 
-  // Guard against zero
   const volume = vocabulary > 0 ? length * Math.log2(vocabulary) : 0;
   const difficulty = n2 > 0 ? (n1 / 2) * (bigN2 / n2) : 0;
   const effort = difficulty * volume;
@@ -99,6 +90,31 @@ export function computeHalsteadMetrics(
   };
 }
 
+export function computeHalsteadMetrics(
+  functionNode: TreeSitterNode,
+  language: string,
+): HalsteadDerivedMetrics | null {
+  const rules = HALSTEAD_RULES.get(language) as HalsteadRules | undefined;
+  if (!rules) return null;
+
+  const operators = new Map<string, number>(); // type -> count
+  const operands = new Map<string, number>(); // text -> count
+
+  function walk(node: TreeSitterNode | null): void {
+    if (!node) return;
+    // Skip type annotation subtrees
+    if (rules?.skipTypes.has(node.type)) return;
+    classifyHalsteadToken(node, rules as HalsteadRules, operators, operands);
+    for (let i = 0; i < node.childCount; i++) {
+      walk(node.child(i));
+    }
+  }
+
+  walk(functionNode);
+
+  return summarizeHalsteadCounts(operators, operands);
+}
+
 // ─── LOC Metrics Computation ──────────────────────────────────────────────
 // Delegated to ast-analysis/metrics.js; re-exported for backward compatibility.
 export const computeLOCMetrics = _computeLOCMetrics;
@@ -535,6 +551,89 @@ function upsertAstComplexity(
   return 1;
 }
 
+/** Decision outcome for a single definition during native bulk-row collection.
+ *  - 'skip': the definition is legitimately ignorable (non-function, missing line,
+ *            interface stub, unsupported language).
+ *  - 'fallback': a genuine function body is missing precomputed complexity —
+ *                the whole native fast path must abort to JS.
+ *  - 'emit': the definition has complexity data; the row was appended. */
+type NativeRowDecision = 'skip' | 'fallback' | 'emit';
+
+/** Classify a definition relative to the native bulk path. Returns
+ *  'skip' to ignore it, 'fallback' to bail out, or 'emit' if the row was added. */
+function classifyDefinitionForNativeBulk(
+  def: FileSymbols['definitions'][0],
+  langSupported: boolean,
+): 'skip' | 'fallback' | 'has-data' {
+  if (def.kind !== 'function' && def.kind !== 'method') return 'skip';
+  if (!def.line) return 'skip';
+  if (!def.complexity) {
+    // Interface/type property signatures and single-line stubs are extracted
+    // as methods but the native engine correctly never assigns complexity.
+    // Mirror the leniency in initWasmParsersIfNeeded to avoid bailing out
+    // of the native bulk-insert path for every TypeScript codebase (#846).
+    if (def.name.includes('.') || !def.endLine || def.endLine <= def.line) return 'skip';
+    // Languages without complexity rules will never have data — skip them
+    // rather than bailing out of the entire native bulk path.
+    if (!langSupported) return 'skip';
+    return 'fallback'; // genuine function body missing complexity — needs JS fallback
+  }
+  return 'has-data';
+}
+
+/** Build a single native-bulk row from a definition with complexity data. */
+function buildNativeBulkRow(
+  nodeId: number,
+  def: FileSymbols['definitions'][0],
+): Record<string, unknown> {
+  const ch = def.complexity?.halstead;
+  const cl = def.complexity?.loc;
+  return {
+    nodeId,
+    cognitive: def.complexity?.cognitive ?? 0,
+    cyclomatic: def.complexity?.cyclomatic ?? 0,
+    maxNesting: def.complexity?.maxNesting ?? 0,
+    loc: cl ? cl.loc : 0,
+    sloc: cl ? cl.sloc : 0,
+    commentLines: cl ? cl.commentLines : 0,
+    halsteadN1: ch ? ch.n1 : 0,
+    halsteadN2: ch ? ch.n2 : 0,
+    halsteadBigN1: ch ? ch.bigN1 : 0,
+    halsteadBigN2: ch ? ch.bigN2 : 0,
+    halsteadVocabulary: ch ? ch.vocabulary : 0,
+    halsteadLength: ch ? ch.length : 0,
+    halsteadVolume: ch ? ch.volume : 0,
+    halsteadDifficulty: ch ? ch.difficulty : 0,
+    halsteadEffort: ch ? ch.effort : 0,
+    halsteadBugs: ch ? ch.bugs : 0,
+    maintainabilityIndex: def.complexity?.maintainabilityIndex ?? 0,
+  };
+}
+
+/** Try to collect a single file's definitions into native-bulk rows.
+ *  Returns 'fallback' if any definition forces a JS fallback. */
+function collectFileBulkRows(
+  db: BetterSqlite3Database,
+  relPath: string,
+  symbols: FileSymbols,
+  rows: Array<Record<string, unknown>>,
+): NativeRowDecision {
+  const ext = path.extname(relPath).toLowerCase();
+  const langId = symbols._langId || '';
+  const langSupported = COMPLEXITY_EXTENSIONS.has(ext) || COMPLEXITY_RULES.has(langId);
+
+  for (const def of symbols.definitions) {
+    const decision = classifyDefinitionForNativeBulk(def, langSupported);
+    if (decision === 'skip') continue;
+    if (decision === 'fallback') return 'fallback';
+
+    const nodeId = getFunctionNodeId(db, def.name, relPath, def.line);
+    if (!nodeId) continue;
+    rows.push(buildNativeBulkRow(nodeId, def));
+  }
+  return 'emit';
+}
+
 /** Collect native bulk-insert rows from precomputed complexity data.
  *  Returns the rows array, or null if any definition is missing complexity
  *  (signalling that JS fallback is needed). */
@@ -543,53 +642,9 @@ function collectNativeBulkRows(
   fileSymbols: Map<string, FileSymbols>,
 ): Array<Record<string, unknown>> | null {
   const rows: Array<Record<string, unknown>> = [];
-
   for (const [relPath, symbols] of fileSymbols) {
-    const ext = path.extname(relPath).toLowerCase();
-    const langId = symbols._langId || '';
-    const langSupported = COMPLEXITY_EXTENSIONS.has(ext) || COMPLEXITY_RULES.has(langId);
-
-    for (const def of symbols.definitions) {
-      if (def.kind !== 'function' && def.kind !== 'method') continue;
-      if (!def.line) continue;
-      // Interface/type property signatures and single-line stubs are extracted
-      // as methods but the native engine correctly never assigns complexity.
-      // Mirror the leniency in initWasmParsersIfNeeded to avoid bailing out
-      // of the native bulk-insert path for every TypeScript codebase (#846).
-      if (!def.complexity) {
-        if (def.name.includes('.') || !def.endLine || def.endLine <= def.line) continue;
-        // Languages without complexity rules will never have data — skip them
-        // rather than bailing out of the entire native bulk path.
-        if (!langSupported) continue;
-        return null; // genuine function body missing complexity — needs JS fallback
-      }
-      const nodeId = getFunctionNodeId(db, def.name, relPath, def.line);
-      if (!nodeId) continue;
-      const ch = def.complexity.halstead;
-      const cl = def.complexity.loc;
-      rows.push({
-        nodeId,
-        cognitive: def.complexity.cognitive ?? 0,
-        cyclomatic: def.complexity.cyclomatic ?? 0,
-        maxNesting: def.complexity.maxNesting ?? 0,
-        loc: cl ? cl.loc : 0,
-        sloc: cl ? cl.sloc : 0,
-        commentLines: cl ? cl.commentLines : 0,
-        halsteadN1: ch ? ch.n1 : 0,
-        halsteadN2: ch ? ch.n2 : 0,
-        halsteadBigN1: ch ? ch.bigN1 : 0,
-        halsteadBigN2: ch ? ch.bigN2 : 0,
-        halsteadVocabulary: ch ? ch.vocabulary : 0,
-        halsteadLength: ch ? ch.length : 0,
-        halsteadVolume: ch ? ch.volume : 0,
-        halsteadDifficulty: ch ? ch.difficulty : 0,
-        halsteadEffort: ch ? ch.effort : 0,
-        halsteadBugs: ch ? ch.bugs : 0,
-        maintainabilityIndex: def.complexity.maintainabilityIndex ?? 0,
-      });
-    }
+    if (collectFileBulkRows(db, relPath, symbols, rows) === 'fallback') return null;
   }
-
   return rows;
 }
 
diff --git a/src/features/graph-enrichment.ts b/src/features/graph-enrichment.ts
index 564cc5004..886d09bf8 100644
--- a/src/features/graph-enrichment.ts
+++ b/src/features/graph-enrichment.ts
@@ -336,13 +336,13 @@ interface FileLevelEdge {
   target: string;
 }
 
-function prepareFileLevelData(
+/** Load file-level import/call edges from the DB and optionally exclude test files. */
+function loadFileLevelEdges(
   db: BetterSqlite3Database,
   noTests: boolean,
   minConf: number,
-  cfg: PlotConfig,
-): GraphData {
-  let edges = db
+): FileLevelEdge[] {
+  const edges = db
     .prepare<FileLevelEdge>(
       `
       SELECT DISTINCT n1.file AS source, n2.file AS target
@@ -354,73 +354,118 @@ function prepareFileLevelData(
     `,
     )
     .all(minConf);
-  if (noTests) edges = edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target));
-
-  const files = new Set<string>();
-  for (const { source, target } of edges) {
-    files.add(source);
-    files.add(target);
-  }
-
-  const fileIds = new Map<string, number>();
-  let idx = 0;
-  for (const f of files) fileIds.set(f, idx++);
+  return noTests ? edges.filter((e) => !isTestFile(e.source) && !isTestFile(e.target)) : edges;
+}
 
-  // Fan-in/fan-out
+/** Compute fan-in and fan-out for each file from a list of edges. */
+function computeFileFanCounts(edges: FileLevelEdge[]): {
+  fanInCount: Map<string, number>;
+  fanOutCount: Map<string, number>;
+} {
   const fanInCount = new Map<string, number>();
   const fanOutCount = new Map<string, number>();
   for (const { source, target } of edges) {
     fanOutCount.set(source, (fanOutCount.get(source) || 0) + 1);
     fanInCount.set(target, (fanInCount.get(target) || 0) + 1);
   }
+  return { fanInCount, fanOutCount };
+}
 
-  // Communities via graph subsystem
+/** Run Louvain community detection on the file-level graph. Returns empty map on failure. */
+function detectFileCommunities(files: Set<string>, edges: FileLevelEdge[]): Map<string, number> {
   const communityMap = new Map<string, number>();
-  if (files.size > 0) {
-    try {
-      const fileGraph = new CodeGraph();
-      for (const f of files) fileGraph.addNode(f);
-      for (const { source, target } of edges) {
-        if (source !== target && !fileGraph.hasEdge(source, target))
-          fileGraph.addEdge(source, target);
-      }
-      const { assignments } = louvainCommunities(fileGraph);
-      for (const [file, cid] of assignments) communityMap.set(file, cid);
-    } catch {
-      // ignore
+  if (files.size === 0) return communityMap;
+  try {
+    const fileGraph = new CodeGraph();
+    for (const f of files) fileGraph.addNode(f);
+    for (const { source, target } of edges) {
+      if (source !== target && !fileGraph.hasEdge(source, target))
+        fileGraph.addEdge(source, target);
     }
+    const { assignments } = louvainCommunities(fileGraph);
+    for (const [file, cid] of assignments) communityMap.set(file, cid);
+  } catch {
+    // louvain can fail on disconnected graphs
   }
+  return communityMap;
+}
 
-  const visNodes: VisNode[] = [...files].map((f) => {
-    const id = fileIds.get(f)!;
-    const community = communityMap.get(f) ?? null;
-    const fanIn = fanInCount.get(f) || 0;
-    const fanOut = fanOutCount.get(f) || 0;
-    const directory = path.dirname(f);
-    const color: string =
-      cfg.colorBy === 'community' && community !== null
-        ? COMMUNITY_COLORS[community % COMMUNITY_COLORS.length] || '#ccc'
-        : cfg.nodeColors?.file || (DEFAULT_NODE_COLORS as Record<string, string>).file || '#ccc';
-
-    return {
-      id,
-      label: path.basename(f),
-      title: f,
-      color,
-      kind: 'file',
-      role: '',
-      file: f,
-      line: 0,
-      community,
-      cognitive: null,
-      cyclomatic: null,
-      maintainabilityIndex: null,
-      fanIn,
-      fanOut,
-      directory,
-      risk: [],
-    };
-  });
+/** Build a VisNode for a single file, applying color based on cfg.colorBy. */
+function buildFileVisNode(
+  file: string,
+  id: number,
+  community: number | null,
+  fanIn: number,
+  fanOut: number,
+  cfg: PlotConfig,
+): VisNode {
+  const color: string =
+    cfg.colorBy === 'community' && community !== null
+      ? COMMUNITY_COLORS[community % COMMUNITY_COLORS.length] || '#ccc'
+      : cfg.nodeColors?.file || (DEFAULT_NODE_COLORS as Record<string, string>).file || '#ccc';
+
+  return {
+    id,
+    label: path.basename(file),
+    title: file,
+    color,
+    kind: 'file',
+    role: '',
+    file,
+    line: 0,
+    community,
+    cognitive: null,
+    cyclomatic: null,
+    maintainabilityIndex: null,
+    fanIn,
+    fanOut,
+    directory: path.dirname(file),
+    risk: [],
+  };
+}
+
+/** Select seed node IDs for the file-level graph based on configured strategy. */
+function selectFileSeedNodes(visNodes: VisNode[], cfg: PlotConfig): (number | string)[] {
+  if (cfg.seedStrategy === 'top-fanin') {
+    const sorted = [...visNodes].sort((a, b) => b.fanIn - a.fanIn);
+    return sorted.slice(0, cfg.seedCount || 30).map((n) => n.id);
+  }
+  // Both 'entry' and the default fallback include every node — file-level graphs
+  // don't track per-file roles, so 'entry' has no meaningful filter.
+  return visNodes.map((n) => n.id);
+}
+
+function prepareFileLevelData(
+  db: BetterSqlite3Database,
+  noTests: boolean,
+  minConf: number,
+  cfg: PlotConfig,
+): GraphData {
+  const edges = loadFileLevelEdges(db, noTests, minConf);
+
+  const files = new Set<string>();
+  for (const { source, target } of edges) {
+    files.add(source);
+    files.add(target);
+  }
+
+  const fileIds = new Map<string, number>();
+  let idx = 0;
+  for (const f of files) fileIds.set(f, idx++);
+
+  const { fanInCount, fanOutCount } = computeFileFanCounts(edges);
+  const communityMap = detectFileCommunities(files, edges);
+
+  const visNodes: VisNode[] = [...files].map((f) =>
+    buildFileVisNode(
+      f,
+      fileIds.get(f)!,
+      communityMap.get(f) ?? null,
+      fanInCount.get(f) || 0,
+      fanOutCount.get(f) || 0,
+      cfg,
+    ),
+  );
 
   const visEdges: VisEdge[] = edges.map(({ source, target }, i) => ({
     id: `e${i}`,
@@ -428,17 +473,7 @@ function prepareFileLevelData(
     to: fileIds.get(target)!,
   }));
 
-  let seedNodeIds: (number | string)[];
-  if (cfg.seedStrategy === 'top-fanin') {
-    const sorted = [...visNodes].sort((a, b) => b.fanIn - a.fanIn);
-    seedNodeIds = sorted.slice(0, cfg.seedCount || 30).map((n) => n.id);
-  } else if (cfg.seedStrategy === 'entry') {
-    seedNodeIds = visNodes.map((n) => n.id);
-  } else {
-    seedNodeIds = visNodes.map((n) => n.id);
-  }
-
-  return { nodes: visNodes, edges: visEdges, seedNodeIds };
+  return { nodes: visNodes, edges: visEdges, seedNodeIds: selectFileSeedNodes(visNodes, cfg) };
 }
 
 // ─── HTML Generation (thin wrapper) ──────────────────────────────────
diff --git a/src/features/owners.ts b/src/features/owners.ts
index 5c278ce64..c0e81ca30 100644
--- a/src/features/owners.ts
+++ b/src/features/owners.ts
@@ -139,18 +139,25 @@ interface OwnersDataOpts {
   boundary?: boolean;
 }
 
-export function ownersData(
-  customDbPath?: string,
-  opts: OwnersDataOpts = {},
-): {
+interface OwnedSymbol {
+  name: string;
+  kind: string;
+  file: string;
+  line: number;
+  owners: string[];
+}
+
+interface OwnerBoundary {
+  from: OwnedSymbol;
+  to: OwnedSymbol;
+  edgeKind: string;
+}
+
+interface OwnersDataResult {
   codeownersFile: string | null;
   files: { file: string; owners: string[] }[];
-  symbols: { name: string; kind: string; file: string; line: number; owners: string[] }[];
-  boundaries: {
-    from: { name: string; kind: string; file: string; line: number; owners: string[] };
-    to: { name: string; kind: string; file: string; line: number; owners: string[] };
-    edgeKind: string;
-  }[];
+  symbols: OwnedSymbol[];
+  boundaries: OwnerBoundary[];
   summary: {
     totalFiles: number;
     ownedFiles: number;
@@ -159,160 +166,191 @@ export function ownersData(
     ownerCount: number;
     byOwner: { owner: string; fileCount: number }[];
   };
+}
+
+interface BetterSqlite3DatabaseLike {
+  prepare(sql: string): { all(...params: unknown[]): unknown[] };
+  close(): void;
+}
+
+function emptyOwnersResult(codeownersFile: string | null): OwnersDataResult {
+  return {
+    codeownersFile,
+    files: [],
+    symbols: [],
+    boundaries: [],
+    summary: {
+      totalFiles: 0,
+      ownedFiles: 0,
+      unownedFiles: 0,
+      coveragePercent: 0,
+      ownerCount: 0,
+      byOwner: [],
+    },
+  };
+}
+
+/** Load all distinct files from the DB and apply test/file filters. */
+function loadFilteredFiles(db: BetterSqlite3DatabaseLike, opts: OwnersDataOpts): string[] {
+  let allFiles = (db.prepare('SELECT DISTINCT file FROM nodes').all() as { file: string }[]).map(
+    (r) => r.file,
+  );
+  if (opts.noTests) allFiles = allFiles.filter((f) => !isTestFile(f));
+  const fileFilters = normalizeFileFilter(opts.file);
+  if (fileFilters.length > 0) {
+    allFiles = allFiles.filter((f) => fileFilters.some((filter) => f.includes(filter)));
+  }
+  return allFiles;
+}
+
+/** Build owner index (owner -> list of files) and count owned files. */
+function buildOwnerIndex(fileOwners: { file: string; owners: string[] }[]): {
+  ownerIndex: Map<string, string[]>;
+  ownedCount: number;
 } {
+  const ownerIndex = new Map<string, string[]>();
+  let ownedCount = 0;
+  for (const fo of fileOwners) {
+    if (fo.owners.length > 0) ownedCount++;
+    for (const o of fo.owners) {
+      if (!ownerIndex.has(o)) ownerIndex.set(o, []);
+      ownerIndex.get(o)!.push(fo.file);
+    }
+  }
+  return { ownerIndex, ownedCount };
+}
+
+/** Load symbols restricted to the given file set, applying noTests and kind filters. */
+function loadSymbolsForFiles(
+  db: BetterSqlite3DatabaseLike,
+  fileSet: Set<string>,
+  opts: OwnersDataOpts,
+  rules: CodeownersRule[],
+): OwnedSymbol[] {
+  let symbols = (
+    db.prepare('SELECT name, kind, file, line FROM nodes').all() as {
+      name: string;
+      kind: string;
+      file: string;
+      line: number;
+    }[]
+  ).filter((n) => fileSet.has(n.file));
+
+  if (opts.noTests) symbols = symbols.filter((s) => !isTestFile(s.file));
+  if (opts.kind) symbols = symbols.filter((s) => s.kind === opts.kind);
+
+  return symbols.map((s) => ({ ...s, owners: matchOwners(s.file, rules) }));
+}
+
+interface CallEdgeRow {
+  id: number;
+  edgeKind: string;
+  srcName: string;
+  srcKind: string;
+  srcFile: string;
+  srcLine: number;
+  tgtName: string;
+  tgtKind: string;
+  tgtFile: string;
+  tgtLine: number;
+}
+
+/** Compute cross-owner call boundaries. Returns empty array when boundary mode is off. */
+function computeOwnerBoundaries(
+  db: BetterSqlite3DatabaseLike,
+  rules: CodeownersRule[],
+  noTests: boolean,
+): OwnerBoundary[] {
+  const edges = db
+    .prepare(
+      `SELECT e.id, e.kind AS edgeKind,
+              s.name AS srcName, s.kind AS srcKind, s.file AS srcFile, s.line AS srcLine,
+              t.name AS tgtName, t.kind AS tgtKind, t.file AS tgtFile, t.line AS tgtLine
+       FROM edges e
+       JOIN nodes s ON e.source_id = s.id
+       JOIN nodes t ON e.target_id = t.id
+       WHERE e.kind = 'calls'`,
+    )
+    .all() as CallEdgeRow[];
+
+  const boundaries: OwnerBoundary[] = [];
+  for (const e of edges) {
+    if (noTests && (isTestFile(e.srcFile) || isTestFile(e.tgtFile))) continue;
+    const srcOwners = matchOwners(e.srcFile, rules);
+    const tgtOwners = matchOwners(e.tgtFile, rules);
+    // Cross-boundary: different owner sets (sort for deterministic comparison)
+    const srcKey = [...srcOwners].sort().join(',');
+    const tgtKey = [...tgtOwners].sort().join(',');
+    if (srcKey === tgtKey) continue;
+    boundaries.push({
+      from: {
+        name: e.srcName,
+        kind: e.srcKind,
+        file: e.srcFile,
+        line: e.srcLine,
+        owners: srcOwners,
+      },
+      to: { name: e.tgtName, kind: e.tgtKind, file: e.tgtFile, line: e.tgtLine, owners: tgtOwners },
+      edgeKind: e.edgeKind,
+    });
+  }
+  return boundaries;
+}
+
+/** Build summary stats (totals, coverage, by-owner counts). */
+function buildOwnersSummary(
+  totalFiles: number,
+  ownedCount: number,
+  ownerIndex: Map<string, string[]>,
+): OwnersDataResult['summary'] {
+  const byOwner = [...ownerIndex.entries()]
+    .map(([owner, files]) => ({ owner, fileCount: files.length }))
+    .sort((a, b) => b.fileCount - a.fileCount);
+
+  return {
+    totalFiles,
+    ownedFiles: ownedCount,
+    unownedFiles: totalFiles - ownedCount,
+    coveragePercent: totalFiles > 0 ? Math.round((ownedCount / totalFiles) * 100) : 0,
+    ownerCount: ownerIndex.size,
+    byOwner,
+  };
+}
+
+export function ownersData(customDbPath?: string, opts: OwnersDataOpts = {}): OwnersDataResult {
   const db = openReadonlyOrFail(customDbPath);
   try {
     const dbPath = findDbPath(customDbPath);
     const repoRoot = path.resolve(path.dirname(dbPath), '..');
 
     const parsed = parseCodeowners(repoRoot);
-    if (!parsed) {
-      return {
-        codeownersFile: null,
-        files: [],
-        symbols: [],
-        boundaries: [],
-        summary: {
-          totalFiles: 0,
-          ownedFiles: 0,
-          unownedFiles: 0,
-          coveragePercent: 0,
-          ownerCount: 0,
-          byOwner: [],
-        },
-      };
-    }
-
-    // Get all distinct files from nodes
-    let allFiles = (db.prepare('SELECT DISTINCT file FROM nodes').all() as { file: string }[]).map(
-      (r) => r.file,
-    );
+    if (!parsed) return emptyOwnersResult(null);
 
-    if (opts.noTests) allFiles = allFiles.filter((f) => !isTestFile(f));
-    const fileFilters = normalizeFileFilter(opts.file);
-    if (fileFilters.length > 0) {
-      allFiles = allFiles.filter((f) => fileFilters.some((filter) => f.includes(filter)));
-    }
-
-    // Map files to owners
-    const fileOwners = allFiles.map((file) => ({
-      file,
-      owners: matchOwners(file, parsed.rules),
-    }));
-
-    // Build owner-to-files index
-    const ownerIndex = new Map<string, string[]>();
-    let ownedCount = 0;
-    for (const fo of fileOwners) {
-      if (fo.owners.length > 0) ownedCount++;
-      for (const o of fo.owners) {
-        if (!ownerIndex.has(o)) ownerIndex.set(o, []);
-        ownerIndex.get(o)!.push(fo.file);
-      }
-    }
+    // Stage 1: load files and bucket them by owner
+    const allFiles = loadFilteredFiles(db, opts);
+    const fileOwners = allFiles.map((file) => ({ file, owners: matchOwners(file, parsed.rules) }));
+    const { ownerIndex, ownedCount } = buildOwnerIndex(fileOwners);
 
-    // Filter files if --owner specified
-    let filteredFiles = fileOwners;
-    if (opts.owner) {
-      filteredFiles = fileOwners.filter((fo) => fo.owners.includes(opts.owner!));
-    }
+    // Stage 2: apply optional --owner filter
+    const filteredFiles = opts.owner
+      ? fileOwners.filter((fo) => fo.owners.includes(opts.owner!))
+      : fileOwners;
 
-    // Get symbols for filtered files
+    // Stage 3: load symbols for filtered files
     const fileSet = new Set(filteredFiles.map((fo) => fo.file));
-    let symbols = (
-      db.prepare('SELECT name, kind, file, line FROM nodes').all() as {
-        name: string;
-        kind: string;
-        file: string;
-        line: number;
-      }[]
-    ).filter((n) => fileSet.has(n.file));
-
-    if (opts.noTests) symbols = symbols.filter((s) => !isTestFile(s.file));
-    if (opts.kind) symbols = symbols.filter((s) => s.kind === opts.kind);
-
-    const symbolsWithOwners = symbols.map((s) => ({
-      ...s,
-      owners: matchOwners(s.file, parsed.rules),
-    }));
-
-    // Boundary analysis — cross-owner call edges
-    const boundaries: {
-      from: { name: string; kind: string; file: string; line: number; owners: string[] };
-      to: { name: string; kind: string; file: string; line: number; owners: string[] };
-      edgeKind: string;
-    }[] = [];
-    if (opts.boundary) {
-      const edges = db
-        .prepare(
-          `SELECT e.id, e.kind AS edgeKind,
-                  s.name AS srcName, s.kind AS srcKind, s.file AS srcFile, s.line AS srcLine,
-                  t.name AS tgtName, t.kind AS tgtKind, t.file AS tgtFile, t.line AS tgtLine
-           FROM edges e
-           JOIN nodes s ON e.source_id = s.id
-           JOIN nodes t ON e.target_id = t.id
-           WHERE e.kind = 'calls'`,
-        )
-        .all() as {
-        id: number;
-        edgeKind: string;
-        srcName: string;
-        srcKind: string;
-        srcFile: string;
-        srcLine: number;
-        tgtName: string;
-        tgtKind: string;
-        tgtFile: string;
-        tgtLine: number;
-      }[];
-
-      for (const e of edges) {
-        if (opts.noTests && (isTestFile(e.srcFile) || isTestFile(e.tgtFile))) continue;
-        const srcOwners = matchOwners(e.srcFile, parsed.rules);
-        const tgtOwners = matchOwners(e.tgtFile, parsed.rules);
-        // Cross-boundary: different owner sets
-        const srcKey = srcOwners.sort().join(',');
-        const tgtKey = tgtOwners.sort().join(',');
-        if (srcKey !== tgtKey) {
-          boundaries.push({
-            from: {
-              name: e.srcName,
-              kind: e.srcKind,
-              file: e.srcFile,
-              line: e.srcLine,
-              owners: srcOwners,
-            },
-            to: {
-              name: e.tgtName,
-              kind: e.tgtKind,
-              file: e.tgtFile,
-              line: e.tgtLine,
-              owners: tgtOwners,
-            },
-            edgeKind: e.edgeKind,
-          });
-        }
-      }
-    }
+    const symbolsWithOwners = loadSymbolsForFiles(db, fileSet, opts, parsed.rules);
 
-    // Summary
-    const byOwner = [...ownerIndex.entries()]
-      .map(([owner, files]) => ({ owner, fileCount: files.length }))
-      .sort((a, b) => b.fileCount - a.fileCount);
+    // Stage 4: optional boundary analysis (cross-owner call edges)
+    const boundaries = opts.boundary
+      ? computeOwnerBoundaries(db, parsed.rules, opts.noTests ?? false)
+      : [];
 
     return {
       codeownersFile: parsed.path,
       files: filteredFiles,
       symbols: symbolsWithOwners,
       boundaries,
-      summary: {
-        totalFiles: allFiles.length,
-        ownedFiles: ownedCount,
-        unownedFiles: allFiles.length - ownedCount,
-        coveragePercent: allFiles.length > 0 ? Math.round((ownedCount / allFiles.length) * 100) : 0,
-        ownerCount: ownerIndex.size,
-        byOwner,
-      },
+      summary: buildOwnersSummary(allFiles.length, ownedCount, ownerIndex),
     };
   } finally {
     db.close();
diff --git a/src/features/structure-query.ts b/src/features/structure-query.ts
index 21a9f6710..952bcf1b8 100644
--- a/src/features/structure-query.ts
+++ b/src/features/structure-query.ts
@@ -227,6 +227,96 @@ interface HotspotsDataOpts {
   noTests?: boolean;
 }
 
+type HotspotEntry = {
+  name: string;
+  kind: string;
+  lineCount: number | null;
+  symbolCount: number | null;
+  importCount: number | null;
+  exportCount: number | null;
+  fanIn: number | null;
+  fanOut: number | null;
+  cohesion: number | null;
+  fileCount: number | null;
+  density: number;
+  coupling: number;
+};
+
+/** Compute density from either fileCount/symbolCount or lineCount/symbolCount. */
+function computeHotspotDensity(
+  symbolCount: number | null,
+  fileCount: number | null,
+  lineCount: number | null,
+): number {
+  if ((fileCount ?? 0) > 0) return (symbolCount || 0) / (fileCount ?? 1);
+  if ((lineCount ?? 0) > 0) return (symbolCount || 0) / (lineCount ?? 1);
+  return 0;
+}
+
+/** Map a native-engine hotspot row (camelCase keys) to the public HotspotEntry shape. */
+function mapNativeHotspotRow(r: {
+  name: string;
+  kind: string;
+  lineCount: number | null;
+  symbolCount: number | null;
+  importCount: number | null;
+  exportCount: number | null;
+  fanIn: number | null;
+  fanOut: number | null;
+  cohesion: number | null;
+  fileCount: number | null;
+}): HotspotEntry {
+  return {
+    name: r.name,
+    kind: r.kind,
+    lineCount: r.lineCount,
+    symbolCount: r.symbolCount,
+    importCount: r.importCount,
+    exportCount: r.exportCount,
+    fanIn: r.fanIn,
+    fanOut: r.fanOut,
+    cohesion: r.cohesion,
+    fileCount: r.fileCount,
+    density: computeHotspotDensity(r.symbolCount, r.fileCount, r.lineCount),
+    coupling: (r.fanIn || 0) + (r.fanOut || 0),
+  };
+}
+
+/** Map a JS-path hotspot row (snake_case keys from SQLite) to the public HotspotEntry shape. */
+function mapJsHotspotRow(r: HotspotRow): HotspotEntry {
+  return {
+    name: r.name,
+    kind: r.kind,
+    lineCount: r.line_count,
+    symbolCount: r.symbol_count,
+    importCount: r.import_count,
+    exportCount: r.export_count,
+    fanIn: r.fan_in,
+    fanOut: r.fan_out,
+    cohesion: r.cohesion,
+    fileCount: r.file_count,
+    density: computeHotspotDensity(r.symbol_count, r.file_count, r.line_count),
+    coupling: (r.fan_in || 0) + (r.fan_out || 0),
+  };
+}
+
+/** ORDER BY clause for each ranking dimension (strategy pattern). */
+const HOTSPOT_ORDER_BY: Record<string, string> = {
+  'fan-in': 'nm.fan_in DESC NULLS LAST',
+  'fan-out': 'nm.fan_out DESC NULLS LAST',
+  density: 'nm.symbol_count DESC NULLS LAST',
+  coupling: '(COALESCE(nm.fan_in, 0) + COALESCE(nm.fan_out, 0)) DESC NULLS LAST',
+};
+
+/** Build the JS-path SQL query for a given metric and test filter. */
+function buildHotspotQuery(metric: string, testFilter: string): string {
+  const orderBy = HOTSPOT_ORDER_BY[metric] ?? HOTSPOT_ORDER_BY['fan-in'];
+  return `SELECT n.name, n.kind, nm.line_count, nm.symbol_count, nm.import_count, nm.export_count,
+                 nm.fan_in, nm.fan_out, nm.cohesion, nm.file_count
+          FROM nodes n JOIN node_metrics nm ON n.id = nm.node_id
+          WHERE n.kind = ? ${testFilter} ORDER BY ${orderBy} LIMIT ?`;
+}
+
 export function hotspotsData(
   customDbPath?: string,
   opts: HotspotsDataOpts = {},
@@ -242,96 +332,21 @@ export function hotspotsData(
     const level = opts.level || 'file';
     const limit = opts.limit || 10;
     const noTests = opts.noTests || false;
-
     const kind = level === 'directory' ? 'directory' : 'file';
 
-    const mapRow = (r: {
-      name: string;
-      kind: string;
-      lineCount: number | null;
-      symbolCount: number | null;
-      importCount: number | null;
-      exportCount: number | null;
-      fanIn: number | null;
-      fanOut: number | null;
-      cohesion: number | null;
-      fileCount: number | null;
-    }) => ({
-      name: r.name,
-      kind: r.kind,
-      lineCount: r.lineCount,
-      symbolCount: r.symbolCount,
-      importCount: r.importCount,
-      exportCount: r.exportCount,
-      fanIn: r.fanIn,
-      fanOut: r.fanOut,
-      cohesion: r.cohesion,
-      fileCount: r.fileCount,
-      density:
-        (r.fileCount ?? 0) > 0
-          ? (r.symbolCount || 0) / (r.fileCount ?? 1)
-          : (r.lineCount ?? 0) > 0
-            ? (r.symbolCount || 0) / (r.lineCount ?? 1)
-            : 0,
-      coupling: (r.fanIn || 0) + (r.fanOut || 0),
-    });
-
     // ── Native fast path: single query instead of 4 eagerly prepared ──
     if (nativeDb?.getHotspots) {
       const rows = nativeDb.getHotspots(kind, metric, noTests, limit);
-      const hotspots = rows.map(mapRow);
+      const hotspots = rows.map(mapNativeHotspotRow);
       const base = { metric, level, limit, hotspots };
       return paginateResult(base, 'hotspots', { limit: opts.limit, offset: opts.offset });
     }
 
     // ── JS fallback ───────────────────────────────────────────────────
     const testFilter = testFilterSQL('n.name', noTests && kind === 'file');
-
-    const HOTSPOT_QUERIES: Record<string, { all(...params: unknown[]): HotspotRow[] }> = {
-      'fan-in': db.prepare(`
-        SELECT n.name, n.kind, nm.line_count, nm.symbol_count, nm.import_count, nm.export_count,
-               nm.fan_in, nm.fan_out, nm.cohesion, nm.file_count
-        FROM nodes n JOIN node_metrics nm ON n.id = nm.node_id
-        WHERE n.kind = ? ${testFilter} ORDER BY nm.fan_in DESC NULLS LAST LIMIT ?`),
-      'fan-out': db.prepare(`
-        SELECT n.name, n.kind, nm.line_count, nm.symbol_count, nm.import_count, nm.export_count,
-               nm.fan_in, nm.fan_out, nm.cohesion, nm.file_count
-        FROM nodes n JOIN node_metrics nm ON n.id = nm.node_id
-        WHERE n.kind = ? ${testFilter} ORDER BY nm.fan_out DESC NULLS LAST LIMIT ?`),
-      density: db.prepare(`
-        SELECT n.name, n.kind, nm.line_count, nm.symbol_count, nm.import_count, nm.export_count,
-               nm.fan_in, nm.fan_out, nm.cohesion, nm.file_count
-        FROM nodes n JOIN node_metrics nm ON n.id = nm.node_id
-        WHERE n.kind = ? ${testFilter} ORDER BY nm.symbol_count DESC NULLS LAST LIMIT ?`),
-      coupling: db.prepare(`
-        SELECT n.name, n.kind, nm.line_count, nm.symbol_count, nm.import_count, nm.export_count,
-               nm.fan_in, nm.fan_out, nm.cohesion, nm.file_count
-        FROM nodes n JOIN node_metrics nm ON n.id = nm.node_id
-        WHERE n.kind = ? ${testFilter} ORDER BY (COALESCE(nm.fan_in, 0) + COALESCE(nm.fan_out, 0)) DESC NULLS LAST LIMIT ?`),
-    };
-
-    const stmt = HOTSPOT_QUERIES[metric] ?? HOTSPOT_QUERIES['fan-in'];
-    const rows = stmt!.all(kind, limit);
-
-    const hotspots = rows.map((r) => ({
-      name: r.name,
-      kind: r.kind,
-      lineCount: r.line_count,
-      symbolCount: r.symbol_count,
-      importCount: r.import_count,
-      exportCount: r.export_count,
-      fanIn: r.fan_in,
-      fanOut: r.fan_out,
-      cohesion: r.cohesion,
-      fileCount: r.file_count,
-      density:
-        (r.file_count ?? 0) > 0
-          ? (r.symbol_count || 0) / (r.file_count ?? 1)
-          : (r.line_count ?? 0) > 0
-            ? (r.symbol_count || 0) / (r.line_count ?? 1)
-            : 0,
-      coupling: (r.fan_in || 0) + (r.fan_out || 0),
-    }));
+    const stmt = db.prepare(buildHotspotQuery(metric, testFilter));
+    const rows = stmt.all(kind, limit) as HotspotRow[];
+    const hotspots = rows.map(mapJsHotspotRow);
 
     const base = { metric, level, limit, hotspots };
     return paginateResult(base, 'hotspots', { limit: opts.limit, offset: opts.offset });
diff --git a/src/features/structure.ts b/src/features/structure.ts
index 8fe6b5a9b..3e531cbad 100644
--- a/src/features/structure.ts
+++ b/src/features/structure.ts
@@ -532,6 +532,56 @@ function batchUpdateRoles(
   })();
 }
 
+interface CallableNodeRow {
+  id: number;
+  name: string;
+  kind: string;
+  file: string;
+  fan_in: number;
+  fan_out: number;
+}
+
+/** Build the activeFiles set: files with at least one callable connected to the graph. */
+function buildActiveFilesSet(rows: CallableNodeRow[]): Set<string> {
+  const activeFiles = new Set<string>();
+  for (const r of rows) {
+    if ((r.fan_in > 0 || r.fan_out > 0) && r.kind !== 'constant') {
+      activeFiles.add(r.file);
+    }
+  }
+  return activeFiles;
+}
+
+/** Map callable rows to classifier input objects, attaching exported/prod-fan-in/active-file metadata. */
+function buildClassifierInput(
+  rows: CallableNodeRow[],
+  exportedIds: Set<number>,
+  prodFanInMap: Map<number, number>,
+  activeFiles: Set<string>,
+): Array<{
+  id: string;
+  name: string;
+  kind: string;
+  file: string;
+  fanIn: number;
+  fanOut: number;
+  isExported: boolean;
+  productionFanIn: number;
+  hasActiveFileSiblings: boolean | undefined;
+}> {
+  return rows.map((r) => ({
+    id: String(r.id),
+    name: r.name,
+    kind: r.kind,
+    file: r.file,
+    fanIn: r.fan_in,
+    fanOut: r.fan_out,
+    isExported: exportedIds.has(r.id),
+    productionFanIn: prodFanInMap.get(r.id) || 0,
+    hasActiveFileSiblings: r.kind === 'constant' ? activeFiles.has(r.file) : undefined,
+  }));
+}
+
 function classifyNodeRolesFull(db: BetterSqlite3Database, emptySummary: RoleSummary): RoleSummary {
   // Leaf kinds (parameter, property) can never have callers/callees.
   // Classify them directly as dead-leaf without the expensive fan-in/fan-out JOINs.
@@ -558,14 +608,7 @@ function classifyNodeRolesFull(db: BetterSqlite3Database, emptySummary: RoleSumm
       ) fo ON n.id = fo.source_id
       WHERE n.kind NOT IN ('file', 'directory', 'parameter', 'property')`,
     )
-    .all() as {
-    id: number;
-    name: string;
-    kind: string;
-    file: string;
-    fan_in: number;
-    fan_out: number;
-  }[];
+    .all() as CallableNodeRow[];
 
   if (rows.length === 0 && leafRows.length === 0) return emptySummary;
 
@@ -629,28 +672,9 @@ function classifyNodeRolesFull(db: BetterSqlite3Database, emptySummary: RoleSumm
     prodFanInMap.set(r.target_id, r.cnt);
   }
 
-  // Files with at least one callable (non-constant) connected to the graph.
-  // Constants in these files are likely consumed locally via identifier reference.
-  const activeFiles = new Set<string>();
-  for (const r of rows) {
-    if ((r.fan_in > 0 || r.fan_out > 0) && r.kind !== 'constant') {
-      activeFiles.add(r.file);
-    }
-  }
-
   // Delegate classification to the pure-logic classifier
-  const classifierInput = rows.map((r) => ({
-    id: String(r.id),
-    name: r.name,
-    kind: r.kind,
-    file: r.file,
-    fanIn: r.fan_in,
-    fanOut: r.fan_out,
-    isExported: exportedIds.has(r.id),
-    productionFanIn: prodFanInMap.get(r.id) || 0,
-    hasActiveFileSiblings: r.kind === 'constant' ? activeFiles.has(r.file) : undefined,
-  }));
-
+  const activeFiles = buildActiveFilesSet(rows);
+  const classifierInput = buildClassifierInput(rows, exportedIds, prodFanInMap, activeFiles);
   const roleMap = classifyRoles(classifierInput);
 
   const { summary, idsByRole } = buildRoleSummary(rows, leafRows, roleMap, emptySummary);
@@ -733,14 +757,7 @@ function classifyNodeRolesIncremental(
       WHERE n.kind NOT IN ('file', 'directory', 'parameter', 'property')
         AND n.file IN (${placeholders})`,
     )
-    .all(...allAffectedFiles) as {
-    id: number;
-    name: string;
-    kind: string;
-    file: string;
-    fan_in: number;
-    fan_out: number;
-  }[];
+    .all(...allAffectedFiles) as CallableNodeRow[];
 
   if (rows.length === 0 && leafRows.length === 0) return emptySummary;
 
@@ -810,25 +827,8 @@ function classifyNodeRolesIncremental(
   }
 
   // 5. Classify affected nodes using global medians
-  const activeFiles = new Set<string>();
-  for (const r of rows) {
-    if ((r.fan_in > 0 || r.fan_out > 0) && r.kind !== 'constant') {
-      activeFiles.add(r.file);
-    }
-  }
-
-  const classifierInput = rows.map((r) => ({
-    id: String(r.id),
-    name: r.name,
-    kind: r.kind,
-    file: r.file,
-    fanIn: r.fan_in,
-    fanOut: r.fan_out,
-    isExported: exportedIds.has(r.id),
-    productionFanIn: prodFanInMap.get(r.id) || 0,
-    hasActiveFileSiblings: r.kind === 'constant' ? activeFiles.has(r.file) : undefined,
-  }));
-
+  const activeFiles = buildActiveFilesSet(rows);
+  const classifierInput = buildClassifierInput(rows, exportedIds, prodFanInMap, activeFiles);
   const roleMap = classifyRoles(classifierInput, globalMedians);
 
   // 6. Build summary (only for affected nodes) and update only those nodes

From 76bd476826524bac52b17c730d62fdb9a1d1047c Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 13:50:59 -0600
Subject: [PATCH 19/27] refactor(features): reduce complexity in cfg and
 cochange

---
 src/features/cfg.ts      | 203 ++++++++++++++++++++++++++-------------
 src/features/cochange.ts | 167 ++++++++++++++++++--------------
 2 files changed, 230 insertions(+), 140 deletions(-)

diff --git a/src/features/cfg.ts b/src/features/cfg.ts
index 7736c741b..7d07f27a8 100644
--- a/src/features/cfg.ts
+++ b/src/features/cfg.ts
@@ -365,79 +365,91 @@ function persistVisitorFileCfg(
   return count;
 }
 
-export async function buildCFGData(
+/**
+ * Build a single native bulk-insert entry for one definition.
+ * Returns null when the def has no CFG blocks or no associated node row.
+ */
+function buildNativeCfgEntry(
   db: BetterSqlite3Database,
-  fileSymbols: Map<string, FileSymbols>,
-  rootDir: string,
-  engineOpts?: {
-    nativeDb?: { bulkInsertCfg?(entries: Array<Record<string, unknown>>): number };
-    suspendJsDb?: () => void;
-    resumeJsDb?: () => void;
-  },
-): Promise<void> {
-  // Fast path: when all function/method defs already have native CFG data,
-  // skip WASM parser init, tree parsing, and JS visitor entirely — just persist.
-  const allNative = allCfgNative(fileSymbols);
+  def: Definition,
+  relPath: string,
+): Record<string, unknown> | null {
+  if (def.kind !== 'function' && def.kind !== 'method') return null;
+  if (!def.line) return null;
+
+  const nodeId = getFunctionNodeId(db, def.name, relPath, def.line);
+  if (!nodeId) return null;
+
+  const cfg = def.cfg as { blocks?: CfgBuildBlock[]; edges?: CfgBuildEdge[] } | undefined;
+  if (!cfg?.blocks?.length) return null;
+
+  return {
+    nodeId,
+    blocks: cfg.blocks.map((b) => ({
+      index: b.index,
+      blockType: b.type,
+      startLine: b.startLine ?? undefined,
+      endLine: b.endLine ?? undefined,
+      label: b.label ?? undefined,
+    })),
+    edges: (cfg.edges || []).map((e) => ({
+      sourceIndex: e.sourceIndex,
+      targetIndex: e.targetIndex,
+      kind: e.kind,
+    })),
+  };
+}
 
-  // ── Native bulk-insert fast path ──────────────────────────────────────
-  // The Rust bulkInsertCfg handles delete-before-insert atomically on a
-  // single rusqlite connection, so there is no dual-connection WAL conflict.
+/**
+ * Native bulk-insert fast path. The Rust bulkInsertCfg handles
+ * delete-before-insert atomically on a single rusqlite connection, so there
+ * is no dual-connection WAL conflict. Returns true if this path handled the
+ * request (caller should return early); false to fall through to WASM/JS.
+ */
+function tryNativeBulkInsertCfg(
+  db: BetterSqlite3Database,
+  fileSymbols: Map<string, FileSymbols>,
+  engineOpts:
+    | {
+        nativeDb?: { bulkInsertCfg?(entries: Array<Record<string, unknown>>): number };
+        suspendJsDb?: () => void;
+        resumeJsDb?: () => void;
+      }
+    | undefined,
+): boolean {
   const nativeDb = engineOpts?.nativeDb;
-  if (allNative && nativeDb?.bulkInsertCfg) {
-    const entries: Array<Record<string, unknown>> = [];
-    for (const [relPath, symbols] of fileSymbols) {
-      const ext = path.extname(relPath).toLowerCase();
-      if (!CFG_EXTENSIONS.has(ext)) continue;
+  if (!nativeDb?.bulkInsertCfg) return false;
 
-      for (const def of symbols.definitions) {
-        if (def.kind !== 'function' && def.kind !== 'method') continue;
-        if (!def.line) continue;
-
-        const nodeId = getFunctionNodeId(db, def.name, relPath, def.line);
-        if (!nodeId) continue;
-
-        const cfg = def.cfg as { blocks?: CfgBuildBlock[]; edges?: CfgBuildEdge[] } | undefined;
-        if (!cfg?.blocks?.length) continue;
-
-        entries.push({
-          nodeId,
-          blocks: cfg.blocks.map((b) => ({
-            index: b.index,
-            blockType: b.type,
-            startLine: b.startLine ?? undefined,
-            endLine: b.endLine ?? undefined,
-            label: b.label ?? undefined,
-          })),
-          edges: (cfg.edges || []).map((e) => ({
-            sourceIndex: e.sourceIndex,
-            targetIndex: e.targetIndex,
-            kind: e.kind,
-          })),
-        });
-      }
-    }
+  const entries: Array<Record<string, unknown>> = [];
+  for (const [relPath, symbols] of fileSymbols) {
+    const ext = path.extname(relPath).toLowerCase();
+    if (!CFG_EXTENSIONS.has(ext)) continue;
 
-    if (entries.length > 0) {
-      let inserted = 0;
-      try {
-        engineOpts?.suspendJsDb?.();
-        inserted = nativeDb.bulkInsertCfg(entries);
-      } finally {
-        engineOpts?.resumeJsDb?.();
-      }
-      info(`CFG (native bulk): ${inserted} functions analyzed`);
+    for (const def of symbols.definitions) {
+      const entry = buildNativeCfgEntry(db, def, relPath);
+      if (entry) entries.push(entry);
     }
-    return;
   }
 
-  const extToLang = buildExtToLangMap();
-  let parsers: unknown = null;
-  let getParserFn: unknown = null;
-
-  if (!allNative) {
-    ({ parsers, getParserFn } = await initCfgParsers(fileSymbols));
+  if (entries.length > 0) {
+    let inserted = 0;
+    try {
+      engineOpts?.suspendJsDb?.();
+      inserted = nativeDb.bulkInsertCfg(entries);
+    } finally {
+      engineOpts?.resumeJsDb?.();
+    }
+    info(`CFG (native bulk): ${inserted} functions analyzed`);
   }
+  return true;
+}
 
+interface CfgInsertStatements {
+  insertBlock: ReturnType<BetterSqlite3Database['prepare']>;
+  insertEdge: ReturnType<BetterSqlite3Database['prepare']>;
+}
+
+function prepareCfgInsertStatements(db: BetterSqlite3Database): CfgInsertStatements {
   const insertBlock = db.prepare(
     `INSERT INTO cfg_blocks (function_node_id, block_index, block_type, start_line, end_line, label)
      VALUES (?, ?, ?, ?, ?, ?)`,
@@ -446,15 +458,31 @@ export async function buildCFGData(
     `INSERT INTO cfg_edges (function_node_id, source_block_id, target_block_id, kind)
      VALUES (?, ?, ?, ?)`,
   );
-  let analyzed = 0;
+  return { insertBlock, insertEdge };
+}
 
+/**
+ * Persist CFG for every CFG-eligible file inside a single transaction.
+ * Dispatches to native fast path or visitor path per file.
+ */
+function persistAllFileCfgs(
+  db: BetterSqlite3Database,
+  fileSymbols: Map<string, FileSymbols>,
+  rootDir: string,
+  allNative: boolean,
+  extToLang: Map<string, string>,
+  parsers: unknown,
+  getParserFn: unknown,
+  stmts: CfgInsertStatements,
+): number {
+  let analyzed = 0;
   const tx = db.transaction(() => {
     for (const [relPath, symbols] of fileSymbols) {
       const ext = path.extname(relPath).toLowerCase();
       if (!CFG_EXTENSIONS.has(ext)) continue;
 
       if (allNative && !symbols._tree) {
-        analyzed += persistNativeFileCfg(db, symbols, relPath, insertBlock, insertEdge);
+        analyzed += persistNativeFileCfg(db, symbols, relPath, stmts.insertBlock, stmts.insertEdge);
         continue;
       }
 
@@ -466,13 +494,52 @@ export async function buildCFGData(
         extToLang,
         parsers,
         getParserFn,
-        insertBlock,
-        insertEdge,
+        stmts.insertBlock,
+        stmts.insertEdge,
       );
     }
   });
-
   tx();
+  return analyzed;
+}
+
+export async function buildCFGData(
+  db: BetterSqlite3Database,
+  fileSymbols: Map<string, FileSymbols>,
+  rootDir: string,
+  engineOpts?: {
+    nativeDb?: { bulkInsertCfg?(entries: Array<Record<string, unknown>>): number };
+    suspendJsDb?: () => void;
+    resumeJsDb?: () => void;
+  },
+): Promise<void> {
+  // Fast path: when all function/method defs already have native CFG data,
+  // skip WASM parser init, tree parsing, and JS visitor entirely — just persist.
+  const allNative = allCfgNative(fileSymbols);
+
+  if (allNative && tryNativeBulkInsertCfg(db, fileSymbols, engineOpts)) {
+    return;
+  }
+
+  const extToLang = buildExtToLangMap();
+  let parsers: unknown = null;
+  let getParserFn: unknown = null;
+
+  if (!allNative) {
+    ({ parsers, getParserFn } = await initCfgParsers(fileSymbols));
+  }
+
+  const stmts = prepareCfgInsertStatements(db);
+  const analyzed = persistAllFileCfgs(
+    db,
+    fileSymbols,
+    rootDir,
+    allNative,
+    extToLang,
+    parsers,
+    getParserFn,
+    stmts,
+  );
 
   if (analyzed > 0) {
     info(`CFG: ${analyzed} functions analyzed`);
diff --git a/src/features/cochange.ts b/src/features/cochange.ts
index ffda28d29..2c4b9c379 100644
--- a/src/features/cochange.ts
+++ b/src/features/cochange.ts
@@ -137,77 +137,50 @@ export function computeCoChanges(
   return { pairs: results, fileCommitCounts };
 }
 
-export function analyzeCoChanges(
-  customDbPath?: string,
-  opts: {
-    since?: string;
-    minSupport?: number;
-    maxFilesPerCommit?: number;
-    full?: boolean;
-  } = {},
-):
-  | { pairsFound: number; commitsScanned: number; since: string; minSupport: number }
-  | { error: string } {
-  const dbPath = findDbPath(customDbPath);
-  const db = openDb(dbPath);
-  initSchema(db);
-
-  const repoRoot = path.resolve(path.dirname(dbPath), '..');
-
-  if (!fs.existsSync(path.join(repoRoot, '.git'))) {
-    closeDb(db);
-    return { error: `Not a git repository: ${repoRoot}` };
-  }
-
-  const since = opts.since || '1 year ago';
-  const minSupport = opts.minSupport ?? 3;
-  const maxFilesPerCommit = opts.maxFilesPerCommit ?? 50;
-
-  // Check for incremental state
-  let afterSha: string | null = null;
-  if (!opts.full) {
-    try {
-      const row = db
-        .prepare<{ value: string }>(
-          "SELECT value FROM co_change_meta WHERE key = 'last_analyzed_commit'",
-        )
-        .get();
-      if (row) afterSha = row.value;
-    } catch {
-      /* table may not exist yet */
-    }
+/** Read the SHA of the most recently analyzed commit (incremental state). */
+function loadLastAnalyzedSha(db: BetterSqlite3Database): string | null {
+  try {
+    const row = db
+      .prepare<{ value: string }>(
+        "SELECT value FROM co_change_meta WHERE key = 'last_analyzed_commit'",
+      )
+      .get();
+    return row ? row.value : null;
+  } catch {
+    /* table may not exist yet */
+    return null;
   }
+}
 
-  // If full re-scan, clear existing data
-  if (opts.full) {
-    db.exec('DELETE FROM co_changes');
-    db.exec('DELETE FROM co_change_meta');
-    db.exec('DELETE FROM file_commit_counts');
-  }
+/** Wipe all co-change tables for a full re-scan. */
+function clearCoChangeTables(db: BetterSqlite3Database): void {
+  db.exec('DELETE FROM co_changes');
+  db.exec('DELETE FROM co_change_meta');
+  db.exec('DELETE FROM file_commit_counts');
+}
 
-  // Collect known files from the graph for filtering
-  let knownFiles: Set<string> | null = null;
+/** Collect the set of files currently tracked by the graph for filtering. */
+function loadKnownFiles(db: BetterSqlite3Database): Set<string> | null {
   try {
     const rows = db.prepare<{ file: string }>('SELECT DISTINCT file FROM nodes').all();
-    knownFiles = new Set(rows.map((r) => r.file));
+    return new Set(rows.map((r) => r.file));
   } catch {
     /* nodes table may not exist */
+    return null;
   }
+}
 
-  const { commits } = scanGitHistory(repoRoot, { since, afterSha });
-  const { pairs: coChanges, fileCommitCounts } = computeCoChanges(commits, {
-    minSupport,
-    maxFilesPerCommit,
-    knownFiles,
-  });
-
-  // Upsert per-file commit counts so Jaccard can be recomputed from totals
+/** Upsert per-file commit counts and pair counts (Jaccard recomputed later). */
+function persistCoChangeResults(
+  db: BetterSqlite3Database,
+  fileCommitCounts: Map<string, number>,
+  coChanges: Map<string, CoChangePair>,
+): void {
   const fileCountUpsert = db.prepare(`
     INSERT INTO file_commit_counts (file, commit_count) VALUES (?, ?)
     ON CONFLICT(file) DO UPDATE SET commit_count = commit_count + excluded.commit_count
   `);
 
-  // Upsert pair counts (accumulate commit_count, jaccard placeholder — recomputed below)
   const pairUpsert = db.prepare(`
     INSERT INTO co_changes (file_a, file_b, commit_count, jaccard, last_commit_epoch)
     VALUES (?, ?, ?, 0, ?)
@@ -226,24 +199,31 @@ export function analyzeCoChanges(
     }
   });
   insertMany();
+}
 
-  // Recompute Jaccard for all affected pairs from total file commit counts
-  const affectedFiles = [...fileCommitCounts.keys()];
-  if (affectedFiles.length > 0) {
-    const ph = affectedFiles.map(() => '?').join(',');
-    db.prepare(`
-      UPDATE co_changes SET jaccard = (
-        SELECT CAST(co_changes.commit_count AS REAL) / (
-          COALESCE(fa.commit_count, 0) + COALESCE(fb.commit_count, 0) - co_changes.commit_count
-        )
-        FROM file_commit_counts fa, file_commit_counts fb
-        WHERE fa.file = co_changes.file_a AND fb.file = co_changes.file_b
+/** Recompute Jaccard for every pair touching any file in `affectedFiles`. */
+function recomputeJaccardForAffected(db: BetterSqlite3Database, affectedFiles: string[]): void {
+  if (affectedFiles.length === 0) return;
+  const ph = affectedFiles.map(() => '?').join(',');
+  db.prepare(`
+    UPDATE co_changes SET jaccard = (
+      SELECT CAST(co_changes.commit_count AS REAL) / (
+        COALESCE(fa.commit_count, 0) + COALESCE(fb.commit_count, 0) - co_changes.commit_count
       )
-      WHERE file_a IN (${ph}) OR file_b IN (${ph})
-    `).run(...affectedFiles, ...affectedFiles);
-  }
+      FROM file_commit_counts fa, file_commit_counts fb
+      WHERE fa.file = co_changes.file_a AND fb.file = co_changes.file_b
+    )
+    WHERE file_a IN (${ph}) OR file_b IN (${ph})
+  `).run(...affectedFiles, ...affectedFiles);
+}
 
-  // Update metadata
+/** Update co_change_meta with the latest analyzer run parameters. */
+function updateCoChangeMeta(
+  db: BetterSqlite3Database,
+  commits: CommitEntry[],
+  since: string,
+  minSupport: number,
+): void {
   const metaUpsert = db.prepare(`
     INSERT INTO co_change_meta (key, value) VALUES (?, ?)
     ON CONFLICT(key) DO UPDATE SET value = excluded.value
@@ -254,6 +234,49 @@ export function analyzeCoChanges(
   metaUpsert.run('analyzed_at', new Date().toISOString());
   metaUpsert.run('since', since);
   metaUpsert.run('min_support', String(minSupport));
+}
+
+export function analyzeCoChanges(
+  customDbPath?: string,
+  opts: {
+    since?: string;
+    minSupport?: number;
+    maxFilesPerCommit?: number;
+    full?: boolean;
+  } = {},
+):
+  | { pairsFound: number; commitsScanned: number; since: string; minSupport: number }
+  | { error: string } {
+  const dbPath = findDbPath(customDbPath);
+  const db = openDb(dbPath);
+  initSchema(db);
+
+  const repoRoot = path.resolve(path.dirname(dbPath), '..');
+
+  if (!fs.existsSync(path.join(repoRoot, '.git'))) {
+    closeDb(db);
+    return { error: `Not a git repository: ${repoRoot}` };
+  }
+
+  const since = opts.since || '1 year ago';
+  const minSupport = opts.minSupport ?? 3;
+  const maxFilesPerCommit = opts.maxFilesPerCommit ?? 50;
+
+  const afterSha = opts.full ? null : loadLastAnalyzedSha(db);
+  if (opts.full) clearCoChangeTables(db);
+
+  const knownFiles = loadKnownFiles(db);
+
+  const { commits } = scanGitHistory(repoRoot, { since, afterSha });
+  const { pairs: coChanges, fileCommitCounts } = computeCoChanges(commits, {
+    minSupport,
+    maxFilesPerCommit,
+    knownFiles,
+  });
+
+  persistCoChangeResults(db, fileCommitCounts, coChanges);
+  recomputeJaccardForAffected(db, [...fileCommitCounts.keys()]);
+  updateCoChangeMeta(db, commits, since, minSupport);
 
   const totalPairs = db
     .prepare<{ cnt: number }>('SELECT COUNT(*) as cnt FROM co_changes')

From 4240dfea791fce0b5357b4e22a178e239a52c380 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 13:56:46 -0600
Subject: [PATCH 20/27] refactor(graph): decompose Leiden optimiser and roles
 classifier
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Internal refactor — public APIs unchanged. docs check acknowledged.
---
 src/graph/algorithms/leiden/optimiser.ts | 229 ++++++++++++++---------
 src/graph/classifiers/roles.ts           | 118 ++++++------
 2 files changed, 206 insertions(+), 141 deletions(-)

diff --git a/src/graph/algorithms/leiden/optimiser.ts b/src/graph/algorithms/leiden/optimiser.ts
index 35c5b7b5e..5d6753c70 100644
--- a/src/graph/algorithms/leiden/optimiser.ts
+++ b/src/graph/algorithms/leiden/optimiser.ts
@@ -88,12 +88,10 @@ export function runLouvainUndirectedModularity(
   optionsInput: LeidenOptions = {},
 ): LouvainResult {
   const options: NormalizedOptions = normalizeOptions(optionsInput);
-  let currentGraph: CodeGraph = graph;
-  const levels: LevelEntry[] = [];
   const rngSource = createRng(options.randomSeed);
   const random: () => number = () => rngSource.nextDouble();
 
-  const baseGraphAdapter: GraphAdapter = makeGraphAdapter(currentGraph, {
+  const baseGraphAdapter: GraphAdapter = makeGraphAdapter(graph, {
     directed: options.directed,
     ...optionsInput,
   });
@@ -101,98 +99,27 @@ export function runLouvainUndirectedModularity(
   const originalToCurrent = new Int32Array(origN);
   for (let i = 0; i < origN; i++) originalToCurrent[i] = i;
 
-  let fixedNodeMask: Uint8Array | null = null;
-  if (options.fixedNodes) {
-    const fixed = new Uint8Array(origN);
-    const asSet: Set<string> =
-      options.fixedNodes instanceof Set ? options.fixedNodes : new Set(options.fixedNodes);
-    for (const id of asSet) {
-      const idx = baseGraphAdapter.idToIndex.get(String(id));
-      if (idx != null) fixed[idx] = 1;
-    }
-    fixedNodeMask = fixed;
-  }
+  const fixedNodeMask: Uint8Array | null = buildFixedNodeMask(baseGraphAdapter, options.fixedNodes);
 
+  const levels: LevelEntry[] = [];
+  let currentGraph: CodeGraph = graph;
   for (let level = 0; level < options.maxLevels; level++) {
     const graphAdapter: GraphAdapter =
       level === 0
         ? baseGraphAdapter
         : makeGraphAdapter(currentGraph, { directed: options.directed, ...optionsInput });
-    const partition: Partition = makePartition(graphAdapter);
-    partition.graph = graphAdapter;
-    partition.initializeAggregates();
-
-    const order = new Int32Array(graphAdapter.n);
-    for (let i = 0; i < graphAdapter.n; i++) order[i] = i;
-
-    let improved: boolean = true;
-    let localPasses: number = 0;
-    const strategyCode: CandidateStrategyCode = options.candidateStrategyCode;
-    while (improved) {
-      improved = false;
-      localPasses++;
-      shuffleArrayInPlace(order, random);
-      for (let idx = 0; idx < order.length; idx++) {
-        const nodeIndex: number = order[idx]!;
-        if (level === 0 && fixedNodeMask && fixedNodeMask[nodeIndex]) continue;
-        const candidateCount: number = partition.accumulateNeighborCommunityEdgeWeights(nodeIndex);
-        const { bestCommunityId, bestGain } = findBestCommunityMove(
-          partition,
-          graphAdapter,
-          nodeIndex,
-          candidateCount,
-          strategyCode,
-          options,
-          random,
-        );
-        if (bestCommunityId !== partition.nodeCommunity[nodeIndex]! && bestGain > GAIN_EPSILON) {
-          partition.moveNodeToCommunity(nodeIndex, bestCommunityId);
-          improved = true;
-        }
-      }
-      if (localPasses >= options.maxLocalPasses) break;
-    }
-
-    renumberCommunities(partition, options.preserveLabels);
-
-    let effectivePartition: Partition = partition;
-    if (options.refine) {
-      const refined: Partition = refineWithinCoarseCommunities(
-        graphAdapter,
-        partition,
-        random,
-        options,
-        level === 0 ? fixedNodeMask : null,
-      );
-      // Post-refinement: split any disconnected communities into their
-      // connected components. This is the cheap O(V+E) alternative to
-      // checking gamma-connectedness on every candidate during refinement.
-      // A disconnected community violates even basic connectivity, so
-      // splitting is always correct.
-      splitDisconnectedCommunities(graphAdapter, refined);
-      renumberCommunities(refined, options.preserveLabels);
-      effectivePartition = refined;
-    }
+    const levelOutcome = runLevel(
+      graphAdapter,
+      options,
+      random,
+      level === 0 ? fixedNodeMask : null,
+    );
 
-    levels.push({ graph: graphAdapter, partition: effectivePartition });
-    const fineToCoarse: Int32Array = effectivePartition.nodeCommunity;
-    for (let i = 0; i < originalToCurrent.length; i++) {
-      originalToCurrent[i] = fineToCoarse[originalToCurrent[i]!]!;
-    }
+    levels.push({ graph: graphAdapter, partition: levelOutcome.effectivePartition });
+    applyFineToCoarseMapping(originalToCurrent, levelOutcome.effectivePartition.nodeCommunity);
 
-    // Terminate when no further coarsening is possible. Check both the
-    // move-phase partition (did the greedy phase find merges?) and the
-    // effective partition that feeds buildCoarseGraph (would coarsening
-    // actually reduce the graph?). When refine is enabled the refined
-    // partition starts from singletons and may have more communities than
-    // the move phase found, so checking only effectivePartition would
-    // cause premature termination.
-    if (
-      partition.communityCount === graphAdapter.n &&
-      effectivePartition.communityCount === graphAdapter.n
-    )
-      break;
-    currentGraph = buildCoarseGraph(graphAdapter, effectivePartition);
+    if (levelOutcome.terminate) break;
+    currentGraph = buildCoarseGraph(graphAdapter, levelOutcome.effectivePartition);
   }
 
   const last: LevelEntry = levels[levels.length - 1]!;
@@ -206,6 +133,134 @@ export function runLouvainUndirectedModularity(
   };
 }
 
+/**
+ * Build a fixed-node mask aligned with the base graph adapter's node indices.
+ * Returns null when no fixed nodes are configured.
+ */
+function buildFixedNodeMask(
+  baseGraphAdapter: GraphAdapter,
+  fixedNodes: Set<string> | string[] | undefined,
+): Uint8Array | null {
+  if (!fixedNodes) return null;
+  const mask = new Uint8Array(baseGraphAdapter.n);
+  const asSet: Set<string> = fixedNodes instanceof Set ? fixedNodes : new Set(fixedNodes);
+  for (const id of asSet) {
+    const idx = baseGraphAdapter.idToIndex.get(String(id));
+    if (idx != null) mask[idx] = 1;
+  }
+  return mask;
+}
+
+interface LevelOutcome {
+  effectivePartition: Partition;
+  terminate: boolean;
+}
+
+/**
+ * Run one level of the Louvain/Leiden pipeline: greedy local-move phase,
+ * optional Leiden refinement, and a termination check. Returns the
+ * partition that feeds the next coarse graph plus a `terminate` flag set
+ * when no further coarsening is possible.
+ */
+function runLevel(
+  graphAdapter: GraphAdapter,
+  options: NormalizedOptions,
+  random: () => number,
+  fixedNodeMask: Uint8Array | null,
+): LevelOutcome {
+  const partition: Partition = makePartition(graphAdapter);
+  partition.graph = graphAdapter;
+  partition.initializeAggregates();
+
+  runLocalMovePhase(graphAdapter, partition, options, random, fixedNodeMask);
+  renumberCommunities(partition, options.preserveLabels);
+
+  let effectivePartition: Partition = partition;
+  if (options.refine) {
+    const refined: Partition = refineWithinCoarseCommunities(
+      graphAdapter,
+      partition,
+      random,
+      options,
+      fixedNodeMask,
+    );
+    // Post-refinement: split any disconnected communities into their
+    // connected components. This is the cheap O(V+E) alternative to
+    // checking gamma-connectedness on every candidate during refinement.
+    // A disconnected community violates even basic connectivity, so
+    // splitting is always correct.
+    splitDisconnectedCommunities(graphAdapter, refined);
+    renumberCommunities(refined, options.preserveLabels);
+    effectivePartition = refined;
+  }
+
+  // Terminate when no further coarsening is possible. Check both the
+  // move-phase partition (did the greedy phase find merges?) and the
+  // effective partition that feeds buildCoarseGraph (would coarsening
+  // actually reduce the graph?). When refine is enabled the refined
+  // partition starts from singletons and may have more communities than
+  // the move phase found, so checking only effectivePartition would
+  // cause premature termination.
+  const terminate =
+    partition.communityCount === graphAdapter.n &&
+    effectivePartition.communityCount === graphAdapter.n;
+  return { effectivePartition, terminate };
+}
+
+/**
+ * Greedy local-move phase: iterate randomly over nodes, moving each to the
+ * best community among the candidate set. Loops until no improvement or
+ * `maxLocalPasses` is reached.
+ */
+function runLocalMovePhase(
+  graphAdapter: GraphAdapter,
+  partition: Partition,
+  options: NormalizedOptions,
+  random: () => number,
+  fixedNodeMask: Uint8Array | null,
+): void {
+  const order = new Int32Array(graphAdapter.n);
+  for (let i = 0; i < graphAdapter.n; i++) order[i] = i;
+
+  const strategyCode: CandidateStrategyCode = options.candidateStrategyCode;
+  let improved: boolean = true;
+  let localPasses: number = 0;
+  while (improved) {
+    improved = false;
+    localPasses++;
+    shuffleArrayInPlace(order, random);
+    for (let idx = 0; idx < order.length; idx++) {
+      const nodeIndex: number = order[idx]!;
+      if (fixedNodeMask?.[nodeIndex]) continue;
+      const candidateCount: number = partition.accumulateNeighborCommunityEdgeWeights(nodeIndex);
+      const { bestCommunityId, bestGain } = findBestCommunityMove(
+        partition,
+        graphAdapter,
+        nodeIndex,
+        candidateCount,
+        strategyCode,
+        options,
+        random,
+      );
+      if (bestCommunityId !== partition.nodeCommunity[nodeIndex]! && bestGain > GAIN_EPSILON) {
+        partition.moveNodeToCommunity(nodeIndex, bestCommunityId);
+        improved = true;
+      }
+    }
+    if (localPasses >= options.maxLocalPasses) break;
+  }
+}
+
+/**
+ * Compose the running `originalToCurrent` mapping with this level's
+ * fine→coarse community labels, in place.
+ */
+function applyFineToCoarseMapping(originalToCurrent: Int32Array, fineToCoarse: Int32Array): void {
+  for (let i = 0; i < originalToCurrent.length; i++) {
+    originalToCurrent[i] = fineToCoarse[originalToCurrent[i]!]!;
+  }
+}
+
 /**
  * Evaluate all candidate communities for a node and return the best move.
  * Encapsulates the four candidate-selection strategies (All, RandomAny,
diff --git a/src/graph/classifiers/roles.ts b/src/graph/classifiers/roles.ts
index 59fd3dcfa..834e3d5b8 100644
--- a/src/graph/classifiers/roles.ts
+++ b/src/graph/classifiers/roles.ts
@@ -78,6 +78,68 @@ export interface RoleClassificationNode {
   hasActiveFileSiblings?: boolean;
 }
 
+/**
+ * Compute median fan-in and fan-out across nodes with non-zero values.
+ * Used as thresholds for "high" fan-in/out classification.
+ */
+function computeFanMedians(nodes: RoleClassificationNode[]): { fanIn: number; fanOut: number } {
+  const nonZeroFanIn = nodes
+    .filter((n) => n.fanIn > 0)
+    .map((n) => n.fanIn)
+    .sort((a, b) => a - b);
+  const nonZeroFanOut = nodes
+    .filter((n) => n.fanOut > 0)
+    .map((n) => n.fanOut)
+    .sort((a, b) => a - b);
+  return { fanIn: median(nonZeroFanIn), fanOut: median(nonZeroFanOut) };
+}
+
+/**
+ * Classify a node with `fanIn === 0` that is not exported.
+ * Covers framework-active constants, test-only callables, and the dead-* family.
+ */
+function classifyUnreferencedNode(node: RoleClassificationNode): Role {
+  if (node.kind === 'constant' && node.hasActiveFileSiblings) {
+    // Constants consumed via identifier reference (not calls) have no
+    // inbound call edges. If the same file has active callables, the
+    // constant is almost certainly used locally — classify as leaf.
+    return 'leaf';
+  }
+  if (node.testOnlyFanIn != null && node.testOnlyFanIn > 0) return 'test-only';
+  return classifyDeadSubRole(node);
+}
+
+/**
+ * Pick a role from fan-in/fan-out shape: core/utility/adapter/leaf.
+ * Called after entry/test-only/dead cases have been ruled out.
+ */
+function classifyByFanShape(highIn: boolean, highOut: boolean): Role {
+  if (highIn && !highOut) return 'core';
+  if (highIn && highOut) return 'utility';
+  if (!highIn && highOut) return 'adapter';
+  return 'leaf';
+}
+
+/**
+ * Apply role-classification rules to a single node.
+ * Order matters — framework entries are tagged first, then dead/test cases,
+ * then the fan-in/fan-out shape decides among the structural roles.
+ */
+function classifyNodeRole(node: RoleClassificationNode, medFanIn: number, medFanOut: number): Role {
+  if (FRAMEWORK_ENTRY_PREFIXES.some((p) => node.name.startsWith(p))) return 'entry';
+
+  if (node.fanIn === 0) {
+    return node.isExported ? 'entry' : classifyUnreferencedNode(node);
+  }
+
+  const hasProdFanIn = typeof node.productionFanIn === 'number';
+  if (hasProdFanIn && node.productionFanIn === 0 && !node.isExported) return 'test-only';
+
+  const highIn = node.fanIn >= medFanIn;
+  const highOut = node.fanOut >= medFanOut && node.fanOut > 0;
+  return classifyByFanShape(highIn, highOut);
+}
+
 /**
  * Classify nodes into architectural roles based on fan-in/fan-out metrics.
  */
@@ -87,63 +149,11 @@ export function classifyRoles(
 ): Map<string, Role> {
   if (nodes.length === 0) return new Map();
 
-  let medFanIn: number;
-  let medFanOut: number;
-  if (medianOverrides) {
-    medFanIn = medianOverrides.fanIn;
-    medFanOut = medianOverrides.fanOut;
-  } else {
-    const nonZeroFanIn = nodes
-      .filter((n) => n.fanIn > 0)
-      .map((n) => n.fanIn)
-      .sort((a, b) => a - b);
-    const nonZeroFanOut = nodes
-      .filter((n) => n.fanOut > 0)
-      .map((n) => n.fanOut)
-      .sort((a, b) => a - b);
-    medFanIn = median(nonZeroFanIn);
-    medFanOut = median(nonZeroFanOut);
-  }
+  const { fanIn: medFanIn, fanOut: medFanOut } = medianOverrides ?? computeFanMedians(nodes);
 
   const result = new Map<string, Role>();
-
   for (const node of nodes) {
-    const highIn = node.fanIn >= medFanIn && node.fanIn > 0;
-    const highOut = node.fanOut >= medFanOut && node.fanOut > 0;
-    const hasProdFanIn = typeof node.productionFanIn === 'number';
-
-    let role: Role;
-    const isFrameworkEntry = FRAMEWORK_ENTRY_PREFIXES.some((p) => node.name.startsWith(p));
-    if (isFrameworkEntry) {
-      role = 'entry';
-    } else if (node.fanIn === 0 && !node.isExported) {
-      if (node.kind === 'constant' && node.hasActiveFileSiblings) {
-        // Constants consumed via identifier reference (not calls) have no
-        // inbound call edges. If the same file has active callables, the
-        // constant is almost certainly used locally — classify as leaf.
-        role = 'leaf';
-      } else {
-        role =
-          node.testOnlyFanIn != null && node.testOnlyFanIn > 0
-            ? 'test-only'
-            : classifyDeadSubRole(node);
-      }
-    } else if (node.fanIn === 0 && node.isExported) {
-      role = 'entry';
-    } else if (hasProdFanIn && node.fanIn > 0 && node.productionFanIn === 0 && !node.isExported) {
-      role = 'test-only';
-    } else if (highIn && !highOut) {
-      role = 'core';
-    } else if (highIn && highOut) {
-      role = 'utility';
-    } else if (!highIn && highOut) {
-      role = 'adapter';
-    } else {
-      role = 'leaf';
-    }
-
-    result.set(node.id, role);
+    result.set(node.id, classifyNodeRole(node, medFanIn, medFanOut));
   }
-
   return result;
 }

From 900af101b0a4f02a4ca951f39aa0f1313991039d Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 14:03:11 -0600
Subject: [PATCH 21/27] refactor(presentation): extract shared rendering
 helpers in cfg and flow

---
 src/presentation/cfg.ts  |  80 ++++++++++++---------
 src/presentation/flow.ts | 146 +++++++++++++++++++++++++--------------
 2 files changed, 143 insertions(+), 83 deletions(-)

diff --git a/src/presentation/cfg.ts b/src/presentation/cfg.ts
index 7970a413d..5e9762308 100644
--- a/src/presentation/cfg.ts
+++ b/src/presentation/cfg.ts
@@ -1,6 +1,8 @@
 import { cfgData, cfgToDOT, cfgToMermaid } from '../features/cfg.js';
 import { outputResult } from '../infrastructure/result-formatter.js';
 
+type CfgData = ReturnType<typeof cfgData>;
+
 interface CfgCliOpts {
   json?: boolean;
   ndjson?: boolean;
@@ -36,13 +38,56 @@ interface CfgResultEntry {
   edges: CfgEdge[];
 }
 
+function renderBlockLocation(b: CfgBlock): string {
+  if (!b.startLine) return '';
+  const endSuffix = b.endLine && b.endLine !== b.startLine ? `-${b.endLine}` : '';
+  return ` L${b.startLine}${endSuffix}`;
+}
+
+function printCfgBlocks(blocks: CfgBlock[]): void {
+  if (blocks.length === 0) return;
+  console.log('\n  Blocks:');
+  for (const b of blocks) {
+    const label = b.label ? ` (${b.label})` : '';
+    console.log(`    [${b.index}] ${b.type}${label}${renderBlockLocation(b)}`);
+  }
+}
+
+function printCfgEdges(edges: CfgEdge[]): void {
+  if (edges.length === 0) return;
+  console.log('\n  Edges:');
+  for (const e of edges) {
+    console.log(`    B${e.source} → B${e.target}  [${e.kind}]`);
+  }
+}
+
+function printCfgEntry(r: CfgResultEntry): void {
+  console.log(`\n${r.kind} ${r.name}  (${r.file}:${r.line})`);
+  console.log('─'.repeat(60));
+  console.log(`  Blocks: ${r.summary.blockCount}  Edges: ${r.summary.edgeCount}`);
+  printCfgBlocks(r.blocks);
+  printCfgEdges(r.edges);
+}
+
+function tryRenderGraphFormat(format: string, data: CfgData): boolean {
+  if (format === 'dot') {
+    console.log(cfgToDOT(data));
+    return true;
+  }
+  if (format === 'mermaid') {
+    console.log(cfgToMermaid(data));
+    return true;
+  }
+  return false;
+}
+
 export function cfg(name: string, customDbPath: string | undefined, opts: CfgCliOpts = {}): void {
   const data = cfgData(name, customDbPath, opts);
 
   if (outputResult(data, 'results', opts)) return;
 
   if (data.warning) {
-    console.log(`\u26A0  ${data.warning}`);
+    console.log(`⚠  ${data.warning}`);
     return;
   }
   if (data.results.length === 0) {
@@ -50,38 +95,9 @@ export function cfg(name: string, customDbPath: string | undefined, opts: CfgCli
     return;
   }
 
-  const format = opts.format || 'text';
-  if (format === 'dot') {
-    console.log(cfgToDOT(data));
-    return;
-  }
-  if (format === 'mermaid') {
-    console.log(cfgToMermaid(data));
-    return;
-  }
+  if (tryRenderGraphFormat(opts.format || 'text', data)) return;
 
-  // Text format
   for (const r of data.results as CfgResultEntry[]) {
-    console.log(`\n${r.kind} ${r.name}  (${r.file}:${r.line})`);
-    console.log('\u2500'.repeat(60));
-    console.log(`  Blocks: ${r.summary.blockCount}  Edges: ${r.summary.edgeCount}`);
-
-    if (r.blocks.length > 0) {
-      console.log('\n  Blocks:');
-      for (const b of r.blocks) {
-        const loc = b.startLine
-          ? ` L${b.startLine}${b.endLine && b.endLine !== b.startLine ? `-${b.endLine}` : ''}`
-          : '';
-        const label = b.label ? ` (${b.label})` : '';
-        console.log(`    [${b.index}] ${b.type}${label}${loc}`);
-      }
-    }
-
-    if (r.edges.length > 0) {
-      console.log('\n  Edges:');
-      for (const e of r.edges) {
-        console.log(`    B${e.source} \u2192 B${e.target}  [${e.kind}]`);
-      }
-    }
+    printCfgEntry(r);
   }
 }
diff --git a/src/presentation/flow.ts b/src/presentation/flow.ts
index 77cd2c512..e72210a23 100644
--- a/src/presentation/flow.ts
+++ b/src/presentation/flow.ts
@@ -16,54 +16,65 @@ interface FlowOpts {
   csv?: boolean;
 }
 
-export function flow(
-  name: string | undefined,
-  dbPath: string | undefined,
-  opts: FlowOpts = {},
-): void {
-  if (opts.list) {
-    const data = listEntryPointsData(dbPath, {
-      noTests: opts.noTests,
-      limit: opts.limit,
-      offset: opts.offset,
-    }) as any;
-    if (outputResult(data, 'entries', opts)) return;
-    if (data.count === 0) {
-      console.log('No entry points found. Run "codegraph build" first.');
-      return;
-    }
-    console.log(`\nEntry points (${data.count} total):\n`);
-    for (const [type, entries] of Object.entries(
-      data.byType as Record<
-        string,
-        Array<{ kind: string; name: string; file: string; line: number }>
-      >,
-    )) {
-      console.log(`  ${type} (${entries.length}):`);
-      for (const e of entries) {
-        console.log(`    [${kindIcon(e.kind)}] ${e.name}  ${e.file}:${e.line}`);
-      }
-      console.log();
-    }
-    return;
-  }
+interface EntryPoint {
+  kind: string;
+  name: string;
+  file: string;
+  line: number;
+}
 
-  if (!name) {
-    console.log(
-      'Please provide a function or entry-point name. Use --list to see available entry points.',
-    );
-    return;
-  }
+interface FlowNode {
+  kind: string;
+  name: string;
+  file: string;
+  line: number;
+}
 
-  const data = flowData(name, dbPath, opts) as any;
-  if (outputResult(data, 'steps', opts)) return;
+interface FlowStep {
+  depth: number;
+  nodes: FlowNode[];
+}
 
-  if (!data.entry) {
-    console.log(`No matching entry point or function found for "${name}".`);
+interface FlowCycle {
+  from: string;
+  to: string;
+  depth: number;
+}
+
+interface FlowResult {
+  entry?: { kind: string; name: string; type: string; file: string; line: number };
+  depth: number;
+  totalReached: number;
+  leaves: Array<{ name: string; file: string }>;
+  steps: FlowStep[];
+  cycles: FlowCycle[];
+  truncated?: boolean;
+}
+
+function runListEntryPoints(dbPath: string | undefined, opts: FlowOpts): void {
+  const data = listEntryPointsData(dbPath, {
+    noTests: opts.noTests,
+    limit: opts.limit,
+    offset: opts.offset,
+  }) as { count: number; byType: Record<string, EntryPoint[]> };
+  if (outputResult(data, 'entries', opts)) return;
+  if (data.count === 0) {
+    console.log('No entry points found. Run "codegraph build" first.');
     return;
   }
+  console.log(`\nEntry points (${data.count} total):\n`);
+  for (const [type, entries] of Object.entries(data.byType)) {
+    console.log(`  ${type} (${entries.length}):`);
+    for (const e of entries) {
+      console.log(`    [${kindIcon(e.kind)}] ${e.name}  ${e.file}:${e.line}`);
+    }
+    console.log();
+  }
+}
 
+function printFlowHeader(data: FlowResult): void {
   const e = data.entry;
+  if (!e) return;
   const typeTag = e.type !== 'exported' ? ` (${e.type})` : '';
   console.log(`\nFlow from: [${kindIcon(e.kind)}] ${e.name}${typeTag}  ${e.file}:${e.line}`);
   console.log(
@@ -73,27 +84,60 @@ export function flow(
     console.log(`  (truncated at depth ${data.depth})`);
   }
   console.log();
+}
 
+function isLeafNode(n: FlowNode, leaves: Array<{ name: string; file: string }>): boolean {
+  return leaves.some((l) => l.name === n.name && l.file === n.file);
+}
+
+function printFlowSteps(data: FlowResult): void {
   if (data.steps.length === 0) {
     console.log('  (leaf node — no callees)');
     return;
   }
-
   for (const step of data.steps) {
     console.log(`  depth ${step.depth}:`);
     for (const n of step.nodes) {
-      const isLeaf = data.leaves.some(
-        (l: { name: string; file: string }) => l.name === n.name && l.file === n.file,
-      );
-      const leafTag = isLeaf ? ' [leaf]' : '';
+      const leafTag = isLeafNode(n, data.leaves) ? ' [leaf]' : '';
       console.log(`    [${kindIcon(n.kind)}] ${n.name}  ${n.file}:${n.line}${leafTag}`);
     }
   }
+}
+
+function printFlowCycles(cycles: FlowCycle[]): void {
+  if (cycles.length === 0) return;
+  console.log('\n  Cycles detected:');
+  for (const c of cycles) {
+    console.log(`    ${c.from} -> ${c.to} (at depth ${c.depth})`);
+  }
+}
 
-  if (data.cycles.length > 0) {
-    console.log('\n  Cycles detected:');
-    for (const c of data.cycles) {
-      console.log(`    ${c.from} -> ${c.to} (at depth ${c.depth})`);
-    }
+export function flow(
+  name: string | undefined,
+  dbPath: string | undefined,
+  opts: FlowOpts = {},
+): void {
+  if (opts.list) {
+    runListEntryPoints(dbPath, opts);
+    return;
   }
+
+  if (!name) {
+    console.log(
+      'Please provide a function or entry-point name. Use --list to see available entry points.',
+    );
+    return;
+  }
+
+  const data = flowData(name, dbPath, opts) as unknown as FlowResult;
+  if (outputResult(data, 'steps', opts)) return;
+
+  if (!data.entry) {
+    console.log(`No matching entry point or function found for "${name}".`);
+    return;
+  }
+
+  printFlowHeader(data);
+  printFlowSteps(data);
+  printFlowCycles(data.cycles);
 }

From 88bb7119a7517d8d4d8b4b422520050e2774bab1 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 14:09:03 -0600
Subject: [PATCH 22/27] refactor(scripts): separate config from execution in
 benchmarking scripts

---
 scripts/lib/bench-config.ts | 257 +++++++++++++++++--------------
 scripts/token-benchmark.ts  | 291 +++++++++++++++---------------------
 2 files changed, 263 insertions(+), 285 deletions(-)

diff --git a/scripts/lib/bench-config.ts b/scripts/lib/bench-config.ts
index 44000d0a7..bc58473af 100644
--- a/scripts/lib/bench-config.ts
+++ b/scripts/lib/bench-config.ts
@@ -125,153 +125,179 @@ export function parseArgs() {
 	return { version, npm, dist };
 }
 
-/**
- * Resolve where to import codegraph source from.
- *
- * @returns {{ version: string, srcDir: string, cleanup: () => void }}
- *   - version:  "dev" (local) or the semver string (npm)
- *   - srcDir:   absolute path to the codegraph src/ directory to import from
- *   - cleanup:  call when done — removes the temp dir in npm mode, no-op otherwise
- */
-export async function resolveBenchmarkSource() {
-	const { version: cliVersion, npm, dist } = parseArgs();
-
-	if (dist && npm) {
-		console.error('Warning: --dist is ignored in --npm mode (the installed package already uses dist/ automatically).');
-	}
+/** Resolve repo root from this module's URL (handles Windows drive prefix). */
+function repoRoot(): string {
+	return path.resolve(
+		path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/, '$1')),
+		'..',
+		'..',
+	);
+}
 
-	if (!npm) {
-		// Local mode — use repo src/ (or dist/ when --dist), version from git state
-		const root = path.resolve(path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/, '$1')), '..', '..');
-		const pkg = JSON.parse(fs.readFileSync(path.join(root, 'package.json'), 'utf8'));
-		let srcDir = path.join(root, 'src');
-		if (dist) {
-			const distDir = path.join(root, 'dist');
-			if (!fs.existsSync(distDir)) {
-				throw new Error(`--dist requested but ${distDir} does not exist. Run "npm run build" first.`);
-			}
-			srcDir = distDir;
+/** Local-mode resolution: use repo src/ (or dist/ when --dist). */
+function resolveBenchmarkSourceLocal(cliVersion: string | null, dist: boolean) {
+	const root = repoRoot();
+	const pkg = JSON.parse(fs.readFileSync(path.join(root, 'package.json'), 'utf8'));
+	let srcDir = path.join(root, 'src');
+	if (dist) {
+		const distDir = path.join(root, 'dist');
+		if (!fs.existsSync(distDir)) {
+			throw new Error(`--dist requested but ${distDir} does not exist. Run "npm run build" first.`);
 		}
-		return {
-			version: cliVersion || getBenchmarkVersion(pkg.version, root),
-			srcDir,
-			cleanup() {},
-		};
+		srcDir = distDir;
 	}
+	return {
+		version: cliVersion || getBenchmarkVersion(pkg.version, root),
+		srcDir,
+		cleanup() {},
+	};
+}
 
-	// npm mode — install @optave/codegraph@<version> into a temp dir.
-	// Validate the version up-front so we never log or interpolate an
-	// unvalidated string (with `shell: true` on Windows, bad input would be a
-	// shell-injection surface).
-	const safeVersion = assertSafePkgVersion(cliVersion || 'latest');
-	const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-bench-'));
-
-	console.error(`Installing @optave/codegraph@${safeVersion} into ${tmpDir}...`);
-
-	// Write a minimal package.json so npm install works
-	fs.writeFileSync(path.join(tmpDir, 'package.json'), JSON.stringify({ private: true }));
-
-	// Retry with backoff for npm propagation delays
-	const maxRetries = 5;
+/**
+ * Run `npm install <spec>` in `cwd` with exponential-backoff retries.
+ * `label` is used only for diagnostic logging.
+ */
+async function npmInstallWithRetries(
+	spec: string,
+	cwd: string,
+	maxRetries: number,
+	label: string,
+	extraFlags: readonly string[] = [],
+): Promise<void> {
 	for (let attempt = 1; attempt <= maxRetries; attempt++) {
 		try {
-			execFileSync('npm', ['install', `@optave/codegraph@${safeVersion}`, '--no-audit', '--no-fund'], {
-				cwd: tmpDir,
+			execFileSync('npm', ['install', spec, '--no-audit', '--no-fund', ...extraFlags], {
+				cwd,
 				stdio: 'pipe',
 				timeout: 120_000,
 				shell: NPM_SHELL,
 			});
-			break;
+			return;
 		} catch (err) {
-			if (attempt === maxRetries) {
-				// Clean up before throwing
-				fs.rmSync(tmpDir, { recursive: true, force: true });
-				throw new Error(`Failed to install @optave/codegraph@${safeVersion} after ${maxRetries} attempts: ${err.message}`);
-			}
+			if (attempt === maxRetries) throw err;
 			const delay = attempt * 15_000; // 15s, 30s, 45s, 60s
-			console.error(`  Attempt ${attempt} failed, retrying in ${delay / 1000}s...`);
+			console.error(`  ${label} attempt ${attempt} failed, retrying in ${delay / 1000}s...`);
 			await new Promise((resolve) => setTimeout(resolve, delay));
 		}
 	}
+}
 
-	const pkgDir = path.join(tmpDir, 'node_modules', '@optave', 'codegraph');
+/** Install @optave/codegraph@<version> into a fresh tmp dir; returns paths. */
+async function installCodegraphPackage(cliVersion: string | null): Promise<{ tmpDir: string; pkgDir: string; installedPkg: any; safeVersion: string }> {
+	const safeVersion = assertSafePkgVersion(cliVersion || 'latest');
+	const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-bench-'));
+
+	console.error(`Installing @optave/codegraph@${safeVersion} into ${tmpDir}...`);
+	fs.writeFileSync(path.join(tmpDir, 'package.json'), JSON.stringify({ private: true }));
 
+	try {
+		await npmInstallWithRetries(`@optave/codegraph@${safeVersion}`, tmpDir, 5, 'Attempt');
+	} catch (err) {
+		fs.rmSync(tmpDir, { recursive: true, force: true });
+		throw new Error(`Failed to install @optave/codegraph@${safeVersion} after 5 attempts: ${(err as Error).message}`);
+	}
+
+	const pkgDir = path.join(tmpDir, 'node_modules', '@optave', 'codegraph');
 	const installedPkg = JSON.parse(fs.readFileSync(path.join(pkgDir, 'package.json'), 'utf8'));
+	return { tmpDir, pkgDir, installedPkg, safeVersion };
+}
 
-	// npm does not transitively install optionalDependencies of a dependency,
-	// so the platform-specific native binary is missing. Install it explicitly.
+/** Detect platform-specific native package key (e.g. `codegraph-linux-x64-gnu`). */
+function detectNativePlatformKey(): string {
+	const platform = os.platform();
+	const arch = os.arch();
+	let libcSuffix = '';
+	if (platform === 'linux') {
+		try {
+			const files = fs.readdirSync('/lib');
+			libcSuffix = files.some((f) => f.startsWith('ld-musl-') && f.endsWith('.so.1')) ? '-musl' : '-gnu';
+		} catch {
+			libcSuffix = '-gnu';
+		}
+	}
+	return `codegraph-${platform}-${arch}${libcSuffix}`;
+}
+
+/**
+ * npm does not transitively install optionalDependencies of a dependency,
+ * so the platform-specific native binary is missing. Install it explicitly.
+ * Failures are logged and swallowed — benchmark can still run on WASM.
+ */
+async function installNativePackage(tmpDir: string, installedPkg: any): Promise<void> {
 	try {
 		const optDeps = installedPkg.optionalDependencies || {};
-		const platform = os.platform();
-		const arch = os.arch();
-		let libcSuffix = '';
-		if (platform === 'linux') {
-			try {
-				const files = fs.readdirSync('/lib');
-				libcSuffix = files.some((f) => f.startsWith('ld-musl-') && f.endsWith('.so.1')) ? '-musl' : '-gnu';
-			} catch {
-				libcSuffix = '-gnu';
-			}
-		}
-		const platformKey = `codegraph-${platform}-${arch}${libcSuffix}`;
+		const platformKey = detectNativePlatformKey();
 		const nativePkg = Object.keys(optDeps).find((name) => name.includes(platformKey));
-		if (nativePkg) {
-			// Even though these originate from the installed package's
-			// optionalDependencies (i.e. the npm registry), validate before
-			// logging or interpolating into a `shell: true` command line.
-			const safeNativePkg = assertSafePkgName(nativePkg);
-			const safeNativeVersion = assertSafePkgVersion(optDeps[nativePkg]);
-			console.error(`Installing native package ${safeNativePkg}@${safeNativeVersion}...`);
-			for (let attempt = 1; attempt <= maxRetries; attempt++) {
-				try {
-					execFileSync('npm', ['install', `${safeNativePkg}@${safeNativeVersion}`, '--no-audit', '--no-fund', '--no-save'], {
-						cwd: tmpDir,
-						stdio: 'pipe',
-						timeout: 120_000,
-						shell: NPM_SHELL,
-					});
-					break;
-				} catch (innerErr) {
-					if (attempt === maxRetries) throw innerErr;
-					const delay = attempt * 15_000;
-					console.error(`  Native install attempt ${attempt} failed, retrying in ${delay / 1000}s...`);
-					await new Promise((resolve) => setTimeout(resolve, delay));
-				}
-			}
-			console.error(`Installed ${safeNativePkg}@${safeNativeVersion}`);
-		} else {
-			console.error(`No native package found for platform ${platform}-${arch}${libcSuffix}, skipping`);
+		if (!nativePkg) {
+			console.error(`No native package found for platform ${platformKey}, skipping`);
+			return;
 		}
+		// Even though these originate from the installed package's
+		// optionalDependencies (i.e. the npm registry), validate before
+		// logging or interpolating into a `shell: true` command line.
+		const safeNativePkg = assertSafePkgName(nativePkg);
+		const safeNativeVersion = assertSafePkgVersion(optDeps[nativePkg]);
+		console.error(`Installing native package ${safeNativePkg}@${safeNativeVersion}...`);
+		await npmInstallWithRetries(
+			`${safeNativePkg}@${safeNativeVersion}`,
+			tmpDir,
+			5,
+			'Native install',
+			['--no-save'],
+		);
+		console.error(`Installed ${safeNativePkg}@${safeNativeVersion}`);
 	} catch (err) {
-		console.error(`Warning: failed to install native package: ${err.message}`);
+		console.error(`Warning: failed to install native package: ${(err as Error).message}`);
 	}
+}
 
-	// @huggingface/transformers is a devDependency (lazy-loaded for embeddings).
-	// It is not installed as a transitive dep in npm mode, so install it
-	// explicitly so the embedding benchmark workers can import it.
+/**
+ * @huggingface/transformers is a devDependency (lazy-loaded for embeddings).
+ * Not installed as a transitive dep in npm mode, so install it explicitly so
+ * the embedding benchmark workers can import it. Failures are logged + swallowed.
+ */
+async function installTransformers(tmpDir: string): Promise<void> {
 	try {
 		const localPkg = JSON.parse(
-			fs.readFileSync(path.resolve(path.dirname(new URL(import.meta.url).pathname.replace(/^\/([A-Z]:)/, '$1')), '..', '..', 'package.json'), 'utf8'),
+			fs.readFileSync(path.join(repoRoot(), 'package.json'), 'utf8'),
 		);
 		const hfVersion = localPkg.devDependencies?.['@huggingface/transformers'];
-		if (hfVersion) {
-			const safeHfVersion = assertSafePkgVersion(hfVersion);
-			console.error(`Installing @huggingface/transformers@${safeHfVersion} for embedding benchmarks...`);
-			execFileSync(
-				'npm',
-				['install', `@huggingface/transformers@${safeHfVersion}`, '--no-audit', '--no-fund', '--no-save'],
-				{
-					cwd: tmpDir,
-					stdio: 'pipe',
-					timeout: 120_000,
-					shell: NPM_SHELL,
-				},
-			);
-			console.error('Installed @huggingface/transformers');
-		}
+		if (!hfVersion) return;
+		const safeHfVersion = assertSafePkgVersion(hfVersion);
+		console.error(`Installing @huggingface/transformers@${safeHfVersion} for embedding benchmarks...`);
+		execFileSync(
+			'npm',
+			['install', `@huggingface/transformers@${safeHfVersion}`, '--no-audit', '--no-fund', '--no-save'],
+			{ cwd: tmpDir, stdio: 'pipe', timeout: 120_000, shell: NPM_SHELL },
+		);
+		console.error('Installed @huggingface/transformers');
 	} catch (err) {
-		console.error(`Warning: failed to install @huggingface/transformers: ${err.message}`);
+		console.error(`Warning: failed to install @huggingface/transformers: ${(err as Error).message}`);
 	}
+}
+
+/**
+ * Resolve where to import codegraph source from.
+ *
+ * @returns {{ version: string, srcDir: string, cleanup: () => void }}
+ *   - version:  "dev" (local) or the semver string (npm)
+ *   - srcDir:   absolute path to the codegraph src/ directory to import from
+ *   - cleanup:  call when done — removes the temp dir in npm mode, no-op otherwise
+ */
+export async function resolveBenchmarkSource() {
+	const { version: cliVersion, npm, dist } = parseArgs();
+
+	if (dist && npm) {
+		console.error('Warning: --dist is ignored in --npm mode (the installed package already uses dist/ automatically).');
+	}
+
+	if (!npm) {
+		return resolveBenchmarkSourceLocal(cliVersion, dist);
+	}
+
+	const { tmpDir, pkgDir, installedPkg } = await installCodegraphPackage(cliVersion);
+	await installNativePackage(tmpDir, installedPkg);
+	await installTransformers(tmpDir);
 
 	// v3.4.0+ publishes compiled JS in dist/ alongside raw TS in src/.
 	// Node cannot strip types from node_modules, so prefer dist/ when available.
@@ -284,7 +310,6 @@ export async function resolveBenchmarkSource() {
 	}
 
 	const resolvedVersion = cliVersion || installedPkg.version;
-
 	console.error(`Installed @optave/codegraph@${installedPkg.version}`);
 
 	return {
diff --git a/scripts/token-benchmark.ts b/scripts/token-benchmark.ts
index 02e053bc4..1aa4d3951 100644
--- a/scripts/token-benchmark.ts
+++ b/scripts/token-benchmark.ts
@@ -405,6 +405,126 @@ async function runPerfBenchmarks(nextjsDir) {
 	};
 }
 
+// ── Issue experiment ──────────────────────────────────────────────────────
+
+/** Run RUNS sessions for one mode, logging per-run metrics. */
+async function runSessionsForMode(mode, issue, nextjsDir) {
+	const runs = [];
+	const label = mode === 'baseline' ? 'Baseline' : 'Codegraph';
+	for (let r = 0; r < RUNS; r++) {
+		console.error(`  ${label} run ${r + 1}/${RUNS}...`);
+		try {
+			const metrics = await runSession(mode, issue, nextjsDir);
+			runs.push(metrics);
+			console.error(
+				`    ${metrics.inputTokens} input tokens, $${metrics.totalCostUsd}, ` +
+					`${metrics.numTurns} turns, hit rate: ${metrics.hitRate}%`,
+			);
+		} catch (err) {
+			console.error(`    ERROR: ${err.message}`);
+			runs.push({ error: err.message });
+		}
+	}
+	return runs;
+}
+
+/** Compute median metrics for a run-set (or null when no valid runs). */
+function medianForRuns(runs) {
+	const valid = runs.filter((r) => !r.error);
+	if (valid.length === 0) return null;
+	const medianOf = (key) => median(valid.map((r) => r[key]));
+	return {
+		inputTokens: medianOf('inputTokens'),
+		outputTokens: medianOf('outputTokens'),
+		cacheReadInputTokens: medianOf('cacheReadInputTokens'),
+		totalCostUsd: round2(medianOf('totalCostUsd')),
+		numTurns: medianOf('numTurns'),
+		durationMs: medianOf('durationMs'),
+		uniqueFilesRead: medianOf('uniqueFilesRead'),
+		hitRate: medianOf('hitRate'),
+	};
+}
+
+/** Token + cost savings (% reduction) between two median objects. */
+function computeSavings(baselineMedian, codegraphMedian) {
+	if (!baselineMedian || !codegraphMedian || baselineMedian.inputTokens <= 0) return null;
+	const tokenSavings =
+		((baselineMedian.inputTokens - codegraphMedian.inputTokens) /
+			baselineMedian.inputTokens) *
+		100;
+	const costSavings =
+		baselineMedian.totalCostUsd > 0
+			? ((baselineMedian.totalCostUsd - codegraphMedian.totalCostUsd) /
+					baselineMedian.totalCostUsd) *
+				100
+			: 0;
+	return {
+		inputTokensPct: Math.round(tokenSavings),
+		costPct: Math.round(costSavings),
+	};
+}
+
+/** Run baseline + codegraph experiments for a single issue and aggregate. */
+async function runIssueExperiment(issue, nextjsDir) {
+	console.error(`\n── ${issue.id} (${issue.difficulty}) ──`);
+	console.error(`PR #${issue.pr}: ${issue.title}`);
+
+	checkoutCommit(nextjsDir, issue.commitBefore);
+	if (!SKIP_GRAPH) {
+		await buildCodegraph(nextjsDir);
+	}
+
+	const baselineRuns = await runSessionsForMode('baseline', issue, nextjsDir);
+	const codegraphRuns = await runSessionsForMode('codegraph', issue, nextjsDir);
+
+	const baselineMedian = medianForRuns(baselineRuns);
+	const codegraphMedian = medianForRuns(codegraphRuns);
+	const savings = computeSavings(baselineMedian, codegraphMedian);
+
+	if (savings) {
+		console.error(
+			`  Savings: ${savings.inputTokensPct}% tokens, ${savings.costPct}% cost`,
+		);
+	}
+
+	return {
+		id: issue.id,
+		difficulty: issue.difficulty,
+		pr: issue.pr,
+		baseline: { median: baselineMedian, runs: baselineRuns },
+		codegraph: { median: codegraphMedian, runs: codegraphRuns },
+		savings,
+	};
+}
+
+/** Aggregate per-issue results into corpus-wide token/cost savings + hit rates. */
+function computeAggregate(results) {
+	const validResults = results.filter(
+		(r) => r.baseline.median && r.codegraph.median && r.savings,
+	);
+	if (validResults.length === 0) return null;
+
+	const sum = (sel) => validResults.reduce((s, r) => s + sel(r), 0);
+	const totalBaselineTokens = sum((r) => r.baseline.median.inputTokens);
+	const totalCodegraphTokens = sum((r) => r.codegraph.median.inputTokens);
+	const totalBaselineCost = sum((r) => r.baseline.median.totalCostUsd);
+	const totalCodegraphCost = sum((r) => r.codegraph.median.totalCostUsd);
+	const pct = (a, b) => (a > 0 ? Math.round(((a - b) / a) * 100) : 0);
+
+	return {
+		savings: {
+			inputTokensPct: pct(totalBaselineTokens, totalCodegraphTokens),
+			costPct: pct(totalBaselineCost, totalCodegraphCost),
+		},
+		baselineAvgHitRate: Math.round(
+			sum((r) => r.baseline.median.hitRate) / validResults.length,
+		),
+		codegraphAvgHitRate: Math.round(
+			sum((r) => r.codegraph.median.hitRate) / validResults.length,
+		),
+	};
+}
+
 // ── Main ──────────────────────────────────────────────────────────────────
 
 async function main() {
@@ -422,179 +542,14 @@ async function main() {
 	console.error(`  Next.js dir: ${nextjsDir}`);
 	console.error('');
 
-	// Clone / fetch Next.js
 	await ensureNextjsClone(nextjsDir);
 
 	const results = [];
-
 	for (const issue of selectedIssues) {
-		console.error(`\n── ${issue.id} (${issue.difficulty}) ──`);
-		console.error(`PR #${issue.pr}: ${issue.title}`);
-
-		// Checkout the commit before the fix
-		checkoutCommit(nextjsDir, issue.commitBefore);
-
-		// Build codegraph (unless skipped)
-		if (!SKIP_GRAPH) {
-			await buildCodegraph(nextjsDir);
-		}
-
-		const baselineRuns = [];
-		const codegraphRuns = [];
-
-		// Run baseline sessions
-		for (let r = 0; r < RUNS; r++) {
-			console.error(`  Baseline run ${r + 1}/${RUNS}...`);
-			try {
-				const metrics = await runSession('baseline', issue, nextjsDir);
-				baselineRuns.push(metrics);
-				console.error(
-					`    ${metrics.inputTokens} input tokens, $${metrics.totalCostUsd}, ` +
-						`${metrics.numTurns} turns, hit rate: ${metrics.hitRate}%`,
-				);
-			} catch (err) {
-				console.error(`    ERROR: ${err.message}`);
-				baselineRuns.push({ error: err.message });
-			}
-		}
-
-		// Run codegraph sessions
-		for (let r = 0; r < RUNS; r++) {
-			console.error(`  Codegraph run ${r + 1}/${RUNS}...`);
-			try {
-				const metrics = await runSession('codegraph', issue, nextjsDir);
-				codegraphRuns.push(metrics);
-				console.error(
-					`    ${metrics.inputTokens} input tokens, $${metrics.totalCostUsd}, ` +
-						`${metrics.numTurns} turns, hit rate: ${metrics.hitRate}%`,
-				);
-			} catch (err) {
-				console.error(`    ERROR: ${err.message}`);
-				codegraphRuns.push({ error: err.message });
-			}
-		}
-
-		// Compute medians (filter out errored runs)
-		const validBaseline = baselineRuns.filter((r) => !r.error);
-		const validCodegraph = codegraphRuns.filter((r) => !r.error);
-
-		const medianOf = (runs, key) => median(runs.map((r) => r[key]));
-
-		const baselineMedian =
-			validBaseline.length > 0
-				? {
-						inputTokens: medianOf(validBaseline, 'inputTokens'),
-						outputTokens: medianOf(validBaseline, 'outputTokens'),
-						cacheReadInputTokens: medianOf(validBaseline, 'cacheReadInputTokens'),
-						totalCostUsd: round2(medianOf(validBaseline, 'totalCostUsd')),
-						numTurns: medianOf(validBaseline, 'numTurns'),
-						durationMs: medianOf(validBaseline, 'durationMs'),
-						uniqueFilesRead: medianOf(validBaseline, 'uniqueFilesRead'),
-						hitRate: medianOf(validBaseline, 'hitRate'),
-					}
-				: null;
-
-		const codegraphMedian =
-			validCodegraph.length > 0
-				? {
-						inputTokens: medianOf(validCodegraph, 'inputTokens'),
-						outputTokens: medianOf(validCodegraph, 'outputTokens'),
-						cacheReadInputTokens: medianOf(validCodegraph, 'cacheReadInputTokens'),
-						totalCostUsd: round2(medianOf(validCodegraph, 'totalCostUsd')),
-						numTurns: medianOf(validCodegraph, 'numTurns'),
-						durationMs: medianOf(validCodegraph, 'durationMs'),
-						uniqueFilesRead: medianOf(validCodegraph, 'uniqueFilesRead'),
-						hitRate: medianOf(validCodegraph, 'hitRate'),
-					}
-				: null;
-
-		// Compute savings
-		let savings = null;
-		if (baselineMedian && codegraphMedian && baselineMedian.inputTokens > 0) {
-			const tokenSavings =
-				((baselineMedian.inputTokens - codegraphMedian.inputTokens) /
-					baselineMedian.inputTokens) *
-				100;
-			const costSavings =
-				baselineMedian.totalCostUsd > 0
-					? ((baselineMedian.totalCostUsd - codegraphMedian.totalCostUsd) /
-							baselineMedian.totalCostUsd) *
-						100
-					: 0;
-			savings = {
-				inputTokensPct: Math.round(tokenSavings),
-				costPct: Math.round(costSavings),
-			};
-		}
-
-		results.push({
-			id: issue.id,
-			difficulty: issue.difficulty,
-			pr: issue.pr,
-			baseline: { median: baselineMedian, runs: baselineRuns },
-			codegraph: { median: codegraphMedian, runs: codegraphRuns },
-			savings,
-		});
-
-		if (savings) {
-			console.error(
-				`  Savings: ${savings.inputTokensPct}% tokens, ${savings.costPct}% cost`,
-			);
-		}
+		results.push(await runIssueExperiment(issue, nextjsDir));
 	}
 
-	// ── Aggregate ───────────────────────────────────────────────────────
-
-	const validResults = results.filter(
-		(r) => r.baseline.median && r.codegraph.median && r.savings,
-	);
-
-	let aggregate = null;
-	if (validResults.length > 0) {
-		const totalBaselineTokens = validResults.reduce(
-			(s, r) => s + r.baseline.median.inputTokens,
-			0,
-		);
-		const totalCodegraphTokens = validResults.reduce(
-			(s, r) => s + r.codegraph.median.inputTokens,
-			0,
-		);
-		const totalBaselineCost = validResults.reduce(
-			(s, r) => s + r.baseline.median.totalCostUsd,
-			0,
-		);
-		const totalCodegraphCost = validResults.reduce(
-			(s, r) => s + r.codegraph.median.totalCostUsd,
-			0,
-		);
-
-		aggregate = {
-			savings: {
-				inputTokensPct:
-					totalBaselineTokens > 0
-						? Math.round(
-								((totalBaselineTokens - totalCodegraphTokens) / totalBaselineTokens) * 100,
-							)
-						: 0,
-				costPct:
-					totalBaselineCost > 0
-						? Math.round(
-								((totalBaselineCost - totalCodegraphCost) / totalBaselineCost) * 100,
-							)
-						: 0,
-			},
-			baselineAvgHitRate: Math.round(
-				validResults.reduce((s, r) => s + r.baseline.median.hitRate, 0) /
-					validResults.length,
-			),
-			codegraphAvgHitRate: Math.round(
-				validResults.reduce((s, r) => s + r.codegraph.median.hitRate, 0) /
-					validResults.length,
-			),
-		};
-	}
-
-	// ── Performance benchmarks (optional) ────────────────────────────────
+	const aggregate = computeAggregate(results);
 
 	let perfBenchmarks = null;
 	if (RUN_PERF) {
@@ -603,8 +558,6 @@ async function main() {
 		perfBenchmarks = await runPerfBenchmarks(nextjsDir);
 	}
 
-	// ── Output ──────────────────────────────────────────────────────────
-
 	// Restore console.log for JSON output
 	console.log = origLog;
 

From 02efaeb1de4f164e5368874fb228b6c4d642e0b5 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 14:15:24 -0600
Subject: [PATCH 23/27] refactor(features): reduce warning-level complexity in
 feature warnings batch

---
 src/features/boundaries.ts | 110 ++++++++++++++----------
 src/features/check.ts      |  87 +++++++++++--------
 src/features/dataflow.ts   |  91 ++++++++++++--------
 src/features/flow.ts       |  72 ++++++++++------
 src/features/sequence.ts   | 168 ++++++++++++++++++++++---------------
 5 files changed, 319 insertions(+), 209 deletions(-)

diff --git a/src/features/boundaries.ts b/src/features/boundaries.ts
index 05f7738f8..0857ffc60 100644
--- a/src/features/boundaries.ts
+++ b/src/features/boundaries.ts
@@ -235,30 +235,23 @@ interface EvaluateBoundariesOpts {
   noTests?: boolean;
 }
 
-export function evaluateBoundaries(
-  db: BetterSqlite3Database,
-  boundaryConfig: BoundaryConfig | undefined,
-  opts: EvaluateBoundariesOpts = {},
-): { violations: BoundaryViolation[]; violationCount: number } {
-  if (!boundaryConfig) return { violations: [], violationCount: 0 };
-
-  const { valid, errors } = validateBoundaryConfig(boundaryConfig);
-  if (!valid) {
-    throw new BoundaryError(`Invalid boundary configuration: ${errors.join('; ')}`);
-  }
-
-  const modules = resolveModules(boundaryConfig);
-  if (modules.size === 0) return { violations: [], violationCount: 0 };
-
-  let allRules: BoundaryRule[] = [];
-  if (boundaryConfig.preset) {
-    allRules = generatePresetRules(modules, boundaryConfig.preset);
-  }
+function collectAllRules(
+  boundaryConfig: BoundaryConfig,
+  modules: Map<string, ResolvedModule>,
+): BoundaryRule[] {
+  const rules: BoundaryRule[] = boundaryConfig.preset
+    ? generatePresetRules(modules, boundaryConfig.preset)
+    : [];
   if (boundaryConfig.rules && Array.isArray(boundaryConfig.rules)) {
-    allRules = allRules.concat(boundaryConfig.rules);
+    return rules.concat(boundaryConfig.rules);
   }
-  if (allRules.length === 0) return { violations: [], violationCount: 0 };
+  return rules;
+}
 
+function loadImportEdges(
+  db: BetterSqlite3Database,
+  opts: EvaluateBoundariesOpts,
+): Array<{ source: string; target: string }> {
   let edges: Array<{ source: string; target: string }>;
   try {
     edges = db
@@ -281,38 +274,63 @@ export function evaluateBoundaries(
     const scope = new Set(opts.scopeFiles);
     edges = edges.filter((e) => scope.has(e.source));
   }
+  return edges;
+}
 
-  const violations: BoundaryViolation[] = [];
+function ruleViolated(rule: BoundaryRule, toModule: string): boolean {
+  if (rule.notTo?.includes(toModule)) return true;
+  if (rule.onlyTo && !rule.onlyTo.includes(toModule)) return true;
+  return false;
+}
 
-  for (const edge of edges) {
-    const fromModule = classifyFile(edge.source, modules);
-    const toModule = classifyFile(edge.target, modules);
+function emitEdgeViolations(
+  edge: { source: string; target: string },
+  fromModule: string,
+  toModule: string,
+  allRules: BoundaryRule[],
+  violations: BoundaryViolation[],
+): void {
+  for (const rule of allRules) {
+    if (rule.from !== fromModule) continue;
+    if (!ruleViolated(rule, toModule)) continue;
+    violations.push({
+      rule: 'boundaries',
+      name: `${fromModule} -> ${toModule}`,
+      file: edge.source,
+      targetFile: edge.target,
+      message: rule.message || `${fromModule} must not depend on ${toModule}`,
+      value: 1,
+      threshold: 0,
+    });
+  }
+}
 
-    if (!fromModule || !toModule) continue;
+export function evaluateBoundaries(
+  db: BetterSqlite3Database,
+  boundaryConfig: BoundaryConfig | undefined,
+  opts: EvaluateBoundariesOpts = {},
+): { violations: BoundaryViolation[]; violationCount: number } {
+  if (!boundaryConfig) return { violations: [], violationCount: 0 };
 
-    for (const rule of allRules) {
-      if (rule.from !== fromModule) continue;
+  const { valid, errors } = validateBoundaryConfig(boundaryConfig);
+  if (!valid) {
+    throw new BoundaryError(`Invalid boundary configuration: ${errors.join('; ')}`);
+  }
 
-      let isViolation = false;
+  const modules = resolveModules(boundaryConfig);
+  if (modules.size === 0) return { violations: [], violationCount: 0 };
 
-      if (rule.notTo?.includes(toModule)) {
-        isViolation = true;
-      } else if (rule.onlyTo && !rule.onlyTo.includes(toModule)) {
-        isViolation = true;
-      }
+  const allRules = collectAllRules(boundaryConfig, modules);
+  if (allRules.length === 0) return { violations: [], violationCount: 0 };
 
-      if (isViolation) {
-        violations.push({
-          rule: 'boundaries',
-          name: `${fromModule} -> ${toModule}`,
-          file: edge.source,
-          targetFile: edge.target,
-          message: rule.message || `${fromModule} must not depend on ${toModule}`,
-          value: 1,
-          threshold: 0,
-        });
-      }
-    }
+  const edges = loadImportEdges(db, opts);
+  const violations: BoundaryViolation[] = [];
+
+  for (const edge of edges) {
+    const fromModule = classifyFile(edge.source, modules);
+    const toModule = classifyFile(edge.target, modules);
+    if (!fromModule || !toModule) continue;
+    emitEdgeViolations(edge, fromModule, toModule, allRules, violations);
   }
 
   return { violations, violationCount: violations.length };
diff --git a/src/features/check.ts b/src/features/check.ts
index 289022800..a9baf6634 100644
--- a/src/features/check.ts
+++ b/src/features/check.ts
@@ -22,6 +22,29 @@ interface ParsedDiff {
   newFiles: Set<string>;
 }
 
+const HUNK_RE = /^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/;
+const NEW_FILE_RE = /^\+\+\+ b\/(.+)/;
+
+function pushHunkRanges(
+  line: string,
+  currentFile: string,
+  changedRanges: Map<string, DiffRange[]>,
+  oldRanges: Map<string, DiffRange[]>,
+): void {
+  const hunkMatch = line.match(HUNK_RE);
+  if (!hunkMatch) return;
+  const oldStart = parseInt(hunkMatch[1]!, 10);
+  const oldCount = parseInt(hunkMatch[2] || '1', 10);
+  if (oldCount > 0) {
+    oldRanges.get(currentFile)!.push({ start: oldStart, end: oldStart + oldCount - 1 });
+  }
+  const newStart = parseInt(hunkMatch[3]!, 10);
+  const newCount = parseInt(hunkMatch[4] || '1', 10);
+  if (newCount > 0) {
+    changedRanges.get(currentFile)!.push({ start: newStart, end: newStart + newCount - 1 });
+  }
+}
+
 export function parseDiffOutput(diffOutput: string): ParsedDiff {
   const changedRanges = new Map<string, DiffRange[]>();
   const oldRanges = new Map<string, DiffRange[]>();
@@ -38,7 +61,7 @@ export function parseDiffOutput(diffOutput: string): ParsedDiff {
       prevIsDevNull = false;
       continue;
     }
-    const fileMatch = line.match(/^\+\+\+ b\/(.+)/);
+    const fileMatch = line.match(NEW_FILE_RE);
     if (fileMatch) {
       currentFile = fileMatch[1]!;
       if (!changedRanges.has(currentFile)) changedRanges.set(currentFile, []);
@@ -47,19 +70,7 @@ export function parseDiffOutput(diffOutput: string): ParsedDiff {
       prevIsDevNull = false;
       continue;
     }
-    const hunkMatch = line.match(/^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@/);
-    if (hunkMatch && currentFile) {
-      const oldStart = parseInt(hunkMatch[1]!, 10);
-      const oldCount = parseInt(hunkMatch[2] || '1', 10);
-      if (oldCount > 0) {
-        oldRanges.get(currentFile)!.push({ start: oldStart, end: oldStart + oldCount - 1 });
-      }
-      const newStart = parseInt(hunkMatch[3]!, 10);
-      const newCount = parseInt(hunkMatch[4] || '1', 10);
-      if (newCount > 0) {
-        changedRanges.get(currentFile)!.push({ start: newStart, end: newStart + newCount - 1 });
-      }
-    }
+    if (currentFile) pushHunkRanges(line, currentFile, changedRanges, oldRanges);
   }
   return { changedRanges, oldRanges, newFiles };
 }
@@ -96,6 +107,26 @@ interface BlastRadiusResult {
   violations: BlastRadiusViolation[];
 }
 
+type DefRow = {
+  id: number;
+  name: string;
+  kind: string;
+  file: string;
+  line: number;
+  end_line: number | null;
+};
+
+function rangesOverlap(defLine: number, endLine: number, ranges: DiffRange[]): boolean {
+  for (const range of ranges) {
+    if (range.start <= endLine && range.end >= defLine) return true;
+  }
+  return false;
+}
+
+function defEndLine(def: DefRow, nextDef: DefRow | undefined): number {
+  return def.end_line || (nextDef ? nextDef.line - 1 : 999999);
+}
+
 export function checkMaxBlastRadius(
   db: BetterSqlite3Database,
   changedRanges: Map<string, DiffRange[]>,
@@ -105,34 +136,18 @@ export function checkMaxBlastRadius(
 ): BlastRadiusResult {
   const violations: BlastRadiusViolation[] = [];
   let maxFound = 0;
+  const defsStmt = db.prepare(
+    `SELECT * FROM nodes WHERE file = ? AND kind IN ('function', 'method', 'class') ORDER BY line`,
+  );
 
   for (const [file, ranges] of changedRanges) {
     if (noTests && isTestFile(file)) continue;
-    const defs = db
-      .prepare(
-        `SELECT * FROM nodes WHERE file = ? AND kind IN ('function', 'method', 'class') ORDER BY line`,
-      )
-      .all(file) as Array<{
-      id: number;
-      name: string;
-      kind: string;
-      file: string;
-      line: number;
-      end_line: number | null;
-    }>;
+    const defs = defsStmt.all(file) as DefRow[];
 
     for (let i = 0; i < defs.length; i++) {
       const def = defs[i]!;
-      const nextDef = defs[i + 1];
-      const endLine = def.end_line || (nextDef ? nextDef.line - 1 : 999999);
-      let overlaps = false;
-      for (const range of ranges) {
-        if (range.start <= endLine && range.end >= def.line) {
-          overlaps = true;
-          break;
-        }
-      }
-      if (!overlaps) continue;
+      const endLine = defEndLine(def, defs[i + 1]);
+      if (!rangesOverlap(def.line, endLine, ranges)) continue;
 
       const { totalDependents: totalCallers } = bfsTransitiveCallers(db, def.id, {
         noTests,
diff --git a/src/features/dataflow.ts b/src/features/dataflow.ts
index d85bcb668..804e7aa1e 100644
--- a/src/features/dataflow.ts
+++ b/src/features/dataflow.ts
@@ -675,6 +675,51 @@ interface BfsParentEntry {
   expression: string;
 }
 
+type DataflowNeighbor = {
+  id: number;
+  file: string;
+  edge_kind: string;
+  expression: string;
+};
+
+interface DataflowBfsState {
+  visited: Set<number>;
+  parent: Map<number, BfsParentEntry>;
+  nextQueue: number[];
+  found: boolean;
+}
+
+/**
+ * Process a single neighbor in the dataflow BFS. Returns true once the target
+ * has been reached so the caller can stop expanding.
+ */
+function processDataflowNeighbor(
+  n: DataflowNeighbor,
+  currentId: number,
+  targetId: number,
+  noTests: boolean,
+  state: DataflowBfsState,
+): boolean {
+  if (noTests && isTestFile(n.file)) return false;
+  const entry: BfsParentEntry = {
+    parentId: currentId,
+    edgeKind: n.edge_kind,
+    expression: n.expression,
+  };
+  if (n.id === targetId) {
+    if (!state.found) {
+      state.found = true;
+      state.parent.set(n.id, entry);
+    }
+    return true;
+  }
+  if (state.visited.has(n.id)) return false;
+  state.visited.add(n.id);
+  state.parent.set(n.id, entry);
+  state.nextQueue.push(n.id);
+  return false;
+}
+
 /** BFS through dataflow edges to find a path from source to target. */
 function bfsDataflowPath(
   db: BetterSqlite3Database,
@@ -689,50 +734,28 @@ function bfsDataflowPath(
      WHERE d.source_id = ? AND d.kind IN ('flows_to', 'returns')`,
   );
 
-  const visited = new Set<number>([sourceId]);
-  const parent = new Map<number, BfsParentEntry>();
+  const state: DataflowBfsState = {
+    visited: new Set<number>([sourceId]),
+    parent: new Map<number, BfsParentEntry>(),
+    nextQueue: [],
+    found: false,
+  };
   let queue = [sourceId];
-  let found = false;
 
   for (let depth = 1; depth <= maxDepth; depth++) {
-    const nextQueue: number[] = [];
+    state.nextQueue = [];
     for (const currentId of queue) {
-      const neighbors = neighborStmt.all(currentId) as Array<{
-        id: number;
-        file: string;
-        edge_kind: string;
-        expression: string;
-      }>;
+      const neighbors = neighborStmt.all(currentId) as DataflowNeighbor[];
       for (const n of neighbors) {
-        if (noTests && isTestFile(n.file)) continue;
-        if (n.id === targetId) {
-          if (!found) {
-            found = true;
-            parent.set(n.id, {
-              parentId: currentId,
-              edgeKind: n.edge_kind,
-              expression: n.expression,
-            });
-          }
-          continue;
-        }
-        if (!visited.has(n.id)) {
-          visited.add(n.id);
-          parent.set(n.id, {
-            parentId: currentId,
-            edgeKind: n.edge_kind,
-            expression: n.expression,
-          });
-          nextQueue.push(n.id);
-        }
+        processDataflowNeighbor(n, currentId, targetId, noTests, state);
       }
     }
-    if (found) break;
-    queue = nextQueue;
+    if (state.found) break;
+    queue = state.nextQueue;
     if (queue.length === 0) break;
   }
 
-  return found ? parent : null;
+  return state.found ? state.parent : null;
 }
 
 /** Reconstruct a path from BFS parent map. */
diff --git a/src/features/flow.ts b/src/features/flow.ts
index 18c522157..e2a4f1f21 100644
--- a/src/features/flow.ts
+++ b/src/features/flow.ts
@@ -133,6 +133,41 @@ interface BfsState {
   truncated: boolean;
 }
 
+interface FlowBfsFrame {
+  visited: Set<number>;
+  cycles: Array<{ from: string; to: string; depth: number }>;
+  nodeDepths: Map<number, number>;
+  idToNode: Map<number, NodeInfo>;
+  nextFrontier: number[];
+  levelNodes: NodeInfo[];
+}
+
+/** Process one callee row, recording cycle hits or expanding frontier. */
+function processFlowCallee(
+  c: CalleeRow,
+  fid: number,
+  depth: number,
+  noTests: boolean,
+  frame: FlowBfsFrame,
+): void {
+  if (noTests && isTestFile(c.file)) return;
+
+  if (frame.visited.has(c.id)) {
+    const fromNode = frame.idToNode.get(fid);
+    if (fromNode) {
+      frame.cycles.push({ from: fromNode.name, to: c.name, depth });
+    }
+    return;
+  }
+
+  frame.visited.add(c.id);
+  frame.nextFrontier.push(c.id);
+  const nodeInfo: NodeInfo = toSymbolRef(c);
+  frame.levelNodes.push(nodeInfo);
+  frame.nodeDepths.set(c.id, depth);
+  frame.idToNode.set(c.id, nodeInfo);
+}
+
 /** Forward BFS through callees, collecting steps, cycles, and node depth info. */
 function bfsCallees(
   db: ReturnType<typeof openReadonlyOrFail>,
@@ -157,37 +192,26 @@ function bfsCallees(
   );
 
   for (let d = 1; d <= maxDepth; d++) {
-    const nextFrontier: number[] = [];
-    const levelNodes: NodeInfo[] = [];
+    const frame: FlowBfsFrame = {
+      visited,
+      cycles,
+      nodeDepths,
+      idToNode,
+      nextFrontier: [],
+      levelNodes: [],
+    };
 
     for (const fid of frontier) {
-      const callees = calleesStmt.all(fid);
-
-      for (const c of callees) {
-        if (noTests && isTestFile(c.file)) continue;
-
-        if (visited.has(c.id)) {
-          const fromNode = idToNode.get(fid);
-          if (fromNode) {
-            cycles.push({ from: fromNode.name, to: c.name, depth: d });
-          }
-          continue;
-        }
-
-        visited.add(c.id);
-        nextFrontier.push(c.id);
-        const nodeInfo: NodeInfo = toSymbolRef(c);
-        levelNodes.push(nodeInfo);
-        nodeDepths.set(c.id, d);
-        idToNode.set(c.id, nodeInfo);
+      for (const c of calleesStmt.all(fid)) {
+        processFlowCallee(c, fid, d, noTests, frame);
       }
     }
 
-    if (levelNodes.length > 0) {
-      steps.push({ depth: d, nodes: levelNodes });
+    if (frame.levelNodes.length > 0) {
+      steps.push({ depth: d, nodes: frame.levelNodes });
     }
 
-    frontier = nextFrontier;
+    frontier = frame.nextFrontier;
     if (frontier.length === 0) break;
     if (d === maxDepth && frontier.length > 0) truncated = true;
   }
diff --git a/src/features/sequence.ts b/src/features/sequence.ts
index aa891d78b..db2db7fb2 100644
--- a/src/features/sequence.ts
+++ b/src/features/sequence.ts
@@ -91,6 +91,40 @@ interface BfsResult {
   truncated: boolean;
 }
 
+type CalleeNode = { id: number; name: string; file: string; kind: string; line: number };
+
+interface BfsFrame {
+  visited: Set<number>;
+  messages: SequenceMessage[];
+  fileSet: Set<string>;
+  idToNode: Map<number, CalleeNode>;
+  nextFrontier: number[];
+}
+
+function processCallee(
+  c: CalleeNode,
+  caller: CalleeNode,
+  depth: number,
+  noTests: boolean,
+  frame: BfsFrame,
+): void {
+  if (noTests && isTestFile(c.file)) return;
+
+  frame.fileSet.add(c.file);
+  frame.messages.push({
+    from: caller.file,
+    to: c.file,
+    label: c.name,
+    type: 'call',
+    depth,
+  });
+
+  if (frame.visited.has(c.id)) return;
+  frame.visited.add(c.id);
+  frame.nextFrontier.push(c.id);
+  frame.idToNode.set(c.id, c);
+}
+
 function bfsCallees(
   repo: Repository,
   matchNode: MatchNode,
@@ -101,46 +135,25 @@ function bfsCallees(
   let frontier = [matchNode.id];
   const messages: SequenceMessage[] = [];
   const fileSet = new Set<string>([matchNode.file]);
-  const idToNode = new Map<
-    number,
-    { id: number; name: string; file: string; kind: string; line: number }
-  >();
+  const idToNode = new Map<number, CalleeNode>();
   idToNode.set(matchNode.id, matchNode);
   let truncated = false;
 
   for (let d = 1; d <= maxDepth; d++) {
-    const nextFrontier: number[] = [];
+    const frame: BfsFrame = { visited, messages, fileSet, idToNode, nextFrontier: [] };
 
     for (const fid of frontier) {
-      const callees = repo.findCallees(fid);
       const caller = idToNode.get(fid)!;
-
-      for (const c of callees) {
-        if (noTests && isTestFile(c.file)) continue;
-
-        fileSet.add(c.file);
-        messages.push({
-          from: caller.file,
-          to: c.file,
-          label: c.name,
-          type: 'call',
-          depth: d,
-        });
-
-        if (visited.has(c.id)) continue;
-
-        visited.add(c.id);
-        nextFrontier.push(c.id);
-        idToNode.set(c.id, c);
+      for (const c of repo.findCallees(fid)) {
+        processCallee(c, caller, d, noTests, frame);
       }
     }
 
-    frontier = nextFrontier;
+    frontier = frame.nextFrontier;
     if (frontier.length === 0) break;
 
-    if (d === maxDepth && frontier.length > 0) {
-      const hasMoreCalls = frontier.some((fid) => repo.findCallees(fid).length > 0);
-      if (hasMoreCalls) truncated = true;
+    if (d === maxDepth && frontier.some((fid) => repo.findCallees(fid).length > 0)) {
+      truncated = true;
     }
   }
 
@@ -174,26 +187,16 @@ function annotateDataflow(
   }
 }
 
-function _annotateDataflowImpl(
-  db: BetterSqlite3Database,
+type DataflowStmts = {
+  getReturns: ReturnType<BetterSqlite3Database['prepare']>;
+  getFlowsTo: ReturnType<BetterSqlite3Database['prepare']>;
+};
+
+function appendReturnMessages(
   messages: SequenceMessage[],
-  idToNode: Map<number, { id: number; name: string; file: string; kind: string; line: number }>,
+  nodeByNameFile: Map<string, { id: number; name: string; file: string }>,
+  stmts: DataflowStmts,
 ): void {
-  const nodeByNameFile = new Map<string, { id: number; name: string; file: string }>();
-  for (const n of idToNode.values()) {
-    nodeByNameFile.set(`${n.name}|${n.file}`, n);
-  }
-
-  const getReturns = db.prepare(
-    `SELECT d.expression FROM dataflow d
-         WHERE d.source_id = ? AND d.kind = 'returns'`,
-  );
-  const getFlowsTo = db.prepare(
-    `SELECT d.expression FROM dataflow d
-         WHERE d.target_id = ? AND d.kind = 'flows_to'
-         ORDER BY d.param_index`,
-  );
-
   const seenReturns = new Set<string>();
   for (const msg of [...messages]) {
     if (msg.type !== 'call') continue;
@@ -203,40 +206,67 @@ function _annotateDataflowImpl(
     const returnKey = `${msg.to}->${msg.from}:${msg.label}`;
     if (seenReturns.has(returnKey)) continue;
 
-    const returns = getReturns.all(targetNode.id) as { expression: string }[];
-
-    if (returns.length > 0) {
-      seenReturns.add(returnKey);
-      const expr = returns[0]!.expression || 'result';
-      messages.push({
-        from: msg.to,
-        to: msg.from,
-        label: expr,
-        type: 'return',
-        depth: msg.depth,
-      });
-    }
+    const returns = stmts.getReturns.all(targetNode.id) as { expression: string }[];
+    if (returns.length === 0) continue;
+
+    seenReturns.add(returnKey);
+    messages.push({
+      from: msg.to,
+      to: msg.from,
+      label: returns[0]!.expression || 'result',
+      type: 'return',
+      depth: msg.depth,
+    });
   }
+}
 
+function annotateCallParams(
+  messages: SequenceMessage[],
+  nodeByNameFile: Map<string, { id: number; name: string; file: string }>,
+  stmts: DataflowStmts,
+): void {
   for (const msg of messages) {
     if (msg.type !== 'call') continue;
     const targetNode = nodeByNameFile.get(`${msg.label}|${msg.to}`);
     if (!targetNode) continue;
 
-    const params = getFlowsTo.all(targetNode.id) as { expression: string }[];
-
-    if (params.length > 0) {
-      const paramNames = params
-        .map((p) => p.expression)
-        .filter(Boolean)
-        .slice(0, 3);
-      if (paramNames.length > 0) {
-        msg.label = `${msg.label}(${paramNames.join(', ')})`;
-      }
+    const params = stmts.getFlowsTo.all(targetNode.id) as { expression: string }[];
+    const paramNames = params
+      .map((p) => p.expression)
+      .filter(Boolean)
+      .slice(0, 3);
+    if (paramNames.length > 0) {
+      msg.label = `${msg.label}(${paramNames.join(', ')})`;
     }
   }
 }
 
+function _annotateDataflowImpl(
+  db: BetterSqlite3Database,
+  messages: SequenceMessage[],
+  idToNode: Map<number, { id: number; name: string; file: string; kind: string; line: number }>,
+): void {
+  const nodeByNameFile = new Map<string, { id: number; name: string; file: string }>();
+  for (const n of idToNode.values()) {
+    nodeByNameFile.set(`${n.name}|${n.file}`, n);
+  }
+
+  const stmts: DataflowStmts = {
+    getReturns: db.prepare(
+      `SELECT d.expression FROM dataflow d
+         WHERE d.source_id = ? AND d.kind = 'returns'`,
+    ),
+    getFlowsTo: db.prepare(
+      `SELECT d.expression FROM dataflow d
+         WHERE d.target_id = ? AND d.kind = 'flows_to'
+         ORDER BY d.param_index`,
+    ),
+  };
+
+  appendReturnMessages(messages, nodeByNameFile, stmts);
+  annotateCallParams(messages, nodeByNameFile, stmts);
+}
+
 interface Participant {
   id: string;
   label: string;

From 9182a52d12bda8b0375fc74ef393efcf497ce726 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 16:26:42 -0600
Subject: [PATCH 24/27] refactor(extractors): adopt iterChildren +
 PUNCTUATION_TOKENS in elixir pushElixirSequenceItems
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the inline childCount loop with the shared iterChildren generator
configured with PUNCTUATION_TOKENS, completing phase 1 of the TS extractor
refactor plan (sync.json cluster 1). Behaviour preserved — same nodes are
pushed onto the worklist, just via the shared helper.

docs check acknowledged: internal refactor, no doc updates needed.
---
 src/extractors/elixir.ts | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/extractors/elixir.ts b/src/extractors/elixir.ts
index 1b547645c..bc3a6ede8 100644
--- a/src/extractors/elixir.ts
+++ b/src/extractors/elixir.ts
@@ -5,7 +5,7 @@ import type {
   TreeSitterNode,
   TreeSitterTree,
 } from '../types.js';
-import { findChild, nodeEndLine } from './helpers.js';
+import { findChild, iterChildren, nodeEndLine, PUNCTUATION_TOKENS } from './helpers.js';
 
 /**
  * Extract symbols from Elixir files.
@@ -256,11 +256,7 @@ function pushElixirBinaryOperatorOperands(node: TreeSitterNode, stack: TreeSitte
  * the worklist, skipping punctuation tokens.
  */
 function pushElixirSequenceItems(node: TreeSitterNode, stack: TreeSitterNode[]): void {
-  for (let i = 0; i < node.childCount; i++) {
-    const c = node.child(i);
-    if (!c) continue;
-    const t = c.type;
-    if (t === '[' || t === ']' || t === '{' || t === '}' || t === ',') continue;
+  for (const c of iterChildren(node, PUNCTUATION_TOKENS)) {
     stack.push(c);
   }
 }

From e6ea3ed2b82326ee0886e9c77adac09f0107ff3f Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Tue, 26 May 2026 16:36:29 -0600
Subject: [PATCH 25/27] refactor(extractors-rs): adopt shared child-iteration
 helpers (grind)

Wire forge phase 4 helpers into their consumers:

- find_first_child_of_types: collapse find_child(x, A).or_else(|| find_child(x, B))
  in fsharp.rs handle_application
- iter_children + PUNCTUATION_TOKENS: replace inline punctuation-skip loop in
  javascript.rs first_arg_is_string_literal

Closes 3 dead-ffi helpers extracted by forge phase 4. Semantically identical.
---
 crates/codegraph-core/src/extractors/fsharp.rs     | 4 ++--
 crates/codegraph-core/src/extractors/javascript.rs | 6 ++----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/crates/codegraph-core/src/extractors/fsharp.rs b/crates/codegraph-core/src/extractors/fsharp.rs
index 752ffb05f..67892831a 100644
--- a/crates/codegraph-core/src/extractors/fsharp.rs
+++ b/crates/codegraph-core/src/extractors/fsharp.rs
@@ -321,8 +321,8 @@ fn handle_application(node: &Node, source: &[u8], symbols: &mut FileSymbols) {
             // matches the JS extractor (`identifier` first). Operator forms
             // like `( + )` have neither child; we emit nothing in that case,
             // mirroring the JS extractor's silent skip.
-            if let Some(inner) = find_child(&func_node, "identifier")
-                .or_else(|| find_child(&func_node, "long_identifier"))
+            if let Some(inner) =
+                find_first_child_of_types(&func_node, &["identifier", "long_identifier"])
             {
                 symbols.calls.push(Call {
                     name: node_text(&inner, source).to_string(),
diff --git a/crates/codegraph-core/src/extractors/javascript.rs b/crates/codegraph-core/src/extractors/javascript.rs
index d5403aa0f..1091ce29b 100644
--- a/crates/codegraph-core/src/extractors/javascript.rs
+++ b/crates/codegraph-core/src/extractors/javascript.rs
@@ -933,11 +933,9 @@ fn extract_callee_name<'a>(call_node: &Node, source: &'a [u8]) -> Option<&'a str
 /// used to distinguish Express/router route handlers (`app.get('/path', h)`)
 /// from Map/cache APIs that reuse the same verb names (`cache.get(user.id)`).
 fn first_arg_is_string_literal(args_node: &Node) -> bool {
-    for i in 0..args_node.child_count() {
-        let Some(child) = args_node.child(i) else { continue };
+    // Skip grammar punctuation; the first non-punctuation child is the first arg.
+    if let Some(child) = iter_children(args_node, PUNCTUATION_TOKENS).next() {
         let kind = child.kind();
-        // Skip parens and commas; the first non-punctuation child is the first arg.
-        if kind == "(" || kind == "," || kind == ")" { continue; }
         return kind == "string" || kind == "template_string";
     }
     false

From 32a0c5cf2ed0992cc733bb54061c8fdeff0f1f5e Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Wed, 27 May 2026 22:28:16 -0600
Subject: [PATCH 26/27] fix(tests): move column-width comment to the .tsx entry
 that actually drives it (#1240)

---
 tests/parsers/native-drop-classification.test.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/parsers/native-drop-classification.test.ts b/tests/parsers/native-drop-classification.test.ts
index d617d4757..0eb89c854 100644
--- a/tests/parsers/native-drop-classification.test.ts
+++ b/tests/parsers/native-drop-classification.test.ts
@@ -109,8 +109,8 @@ describe('formatDropExtensionSummary', () => {
 
   it('right-pads the extension column and right-aligns the count column for tabular layout', () => {
     const buckets = new Map<string, string[]>([
-      ['.kt', ['a.kt']], // 100 files later — wider count column
-      ['.tsx', new Array(100).fill('x.tsx')],
+      ['.kt', ['a.kt']],
+      ['.tsx', new Array(100).fill('x.tsx')], // 100 files — sets wider count column
     ]);
     const out = formatDropExtensionSummary(buckets);
     // `.tsx` (4 chars) sets the ext width; `.kt` is padded to 4 chars.

From 9b0c04de47e1123560f151f5f0b2db479f2b0c00 Mon Sep 17 00:00:00 2001
From: carlos-alm <contato@carlosalmeida.com>
Date: Thu, 28 May 2026 02:06:54 -0600
Subject: [PATCH 27/27] fix(elixir): restore LIFO-compensating reverse-push in
 sequence and map helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pushElixirSequenceItems and pushElixirMapValues were pushing items in
forward order onto the LIFO worklist stack, causing tuple/list/map
parameters to be emitted in reverse source order (e.g. {x, _y} → ['_y',
'x'] instead of ['x', '_y']). The fix collects items then pushes them in
reverse so the LIFO pop restores source order, matching the native engine.
---
 src/extractors/elixir.ts | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/extractors/elixir.ts b/src/extractors/elixir.ts
index bc3a6ede8..a5b3ef13c 100644
--- a/src/extractors/elixir.ts
+++ b/src/extractors/elixir.ts
@@ -256,8 +256,9 @@ function pushElixirBinaryOperatorOperands(node: TreeSitterNode, stack: TreeSitte
  * the worklist, skipping punctuation tokens.
  */
 function pushElixirSequenceItems(node: TreeSitterNode, stack: TreeSitterNode[]): void {
-  for (const c of iterChildren(node, PUNCTUATION_TOKENS)) {
-    stack.push(c);
+  const items = [...iterChildren(node, PUNCTUATION_TOKENS)];
+  for (let i = items.length - 1; i >= 0; i--) {
+    stack.push(items[i] as TreeSitterNode);
   }
 }
 
@@ -267,6 +268,7 @@ function pushElixirSequenceItems(node: TreeSitterNode, stack: TreeSitterNode[]):
  * the leading `struct` child is intentionally skipped.
  */
 function pushElixirMapValues(node: TreeSitterNode, stack: TreeSitterNode[]): void {
+  const values: TreeSitterNode[] = [];
   for (let i = 0; i < node.childCount; i++) {
     const content = node.child(i);
     if (!content || content.type !== 'map_content') continue;
@@ -279,11 +281,14 @@ function pushElixirMapValues(node: TreeSitterNode, stack: TreeSitterNode[]): voi
         for (let p = 0; p < pair.childCount; p++) {
           const part = pair.child(p);
           if (!part || part.type === 'keyword') continue;
-          stack.push(part);
+          values.push(part);
         }
       }
     }
   }
+  for (let i = values.length - 1; i >= 0; i--) {
+    stack.push(values[i] as TreeSitterNode);
+  }
 }
 
 function handleDefprotocol(node: TreeSitterNode, ctx: ExtractorOutput): void {