diff --git a/crates/codegraph-core/src/build_pipeline.rs b/crates/codegraph-core/src/build_pipeline.rs index 7690d903c..38c71518a 100644 --- a/crates/codegraph-core/src/build_pipeline.rs +++ b/crates/codegraph-core/src/build_pipeline.rs @@ -658,8 +658,10 @@ fn reparse_barrel_candidates( if !barrel_paths_to_parse.is_empty() { barrel_paths_to_parse.sort(); barrel_paths_to_parse.dedup(); - // Barrel files are re-export-only — no function bodies or dataflow, - // so skip dataflow/AST analysis to avoid unnecessary overhead. + // Re-parse barrel candidates — these may be hybrid barrels (reexports + // AND local definitions / call sites, see #979). Dataflow/AST analysis + // is skipped because the barrel is not itself a "changed" file; Stage 7 + // will reconstruct all outgoing edge kinds from the fresh parse. let barrel_parsed = parallel::parse_files_parallel( &barrel_paths_to_parse, root_dir, @@ -669,11 +671,23 @@ fn reparse_barrel_candidates( for mut sym in barrel_parsed { let rel = relative_path(root_dir, &sym.file); sym.file = rel.clone(); - // Delete outgoing import/reexport edges for barrel files being re-parsed - // (scoped to import-related kinds to avoid dropping calls edges) + // Delete every outgoing edge kind that Stage 7 re-emits for re-parsed + // barrel candidates. Previously only 'imports' and 'reexports' were + // purged, so 'calls', 'receiver', 'extends', 'implements', + // 'imports-type', and 'dynamic-imports' accumulated duplicates on + // every incremental rebuild (#979). + // + // Use a negative filter (`NOT IN`) rather than an allowlist so any + // future edge kind added to Stage 7 is automatically covered. Only + // 'contains' and 'parameter_of' must be preserved: those are emitted + // by Stage 5 (insert_nodes) which only runs on the original + // file_symbols (changed + reverse-deps). Barrel candidates are + // merged into file_symbols here in Stage 6b *after* Stage 5 has + // already run, so wiping contains/parameter_of would permanently + // drop them. let _ = conn.execute( "DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = ?1) \ - AND kind IN ('imports', 'reexports')", + AND kind NOT IN ('contains', 'parameter_of')", rusqlite::params![&rel], ); // Re-resolve imports for the barrel file diff --git a/tests/fixtures/issue-979-hybrid-barrel/app.js b/tests/fixtures/issue-979-hybrid-barrel/app.js new file mode 100644 index 000000000..40fc2ef42 --- /dev/null +++ b/tests/fixtures/issue-979-hybrid-barrel/app.js @@ -0,0 +1,5 @@ +import { run } from './consumers/driver.js'; + +export function main(inputs) { + return run(inputs); +} diff --git a/tests/fixtures/issue-979-hybrid-barrel/consumers/driver.js b/tests/fixtures/issue-979-hybrid-barrel/consumers/driver.js new file mode 100644 index 000000000..02674c01f --- /dev/null +++ b/tests/fixtures/issue-979-hybrid-barrel/consumers/driver.js @@ -0,0 +1,5 @@ +import { processAll } from '../core/index.js'; + +export function run(inputs) { + return processAll(inputs); +} diff --git a/tests/fixtures/issue-979-hybrid-barrel/core/helpers.js b/tests/fixtures/issue-979-hybrid-barrel/core/helpers.js new file mode 100644 index 000000000..73f86e3f4 --- /dev/null +++ b/tests/fixtures/issue-979-hybrid-barrel/core/helpers.js @@ -0,0 +1,9 @@ +export function clampValue(v, lo, hi) { + if (v < lo) return lo; + if (v > hi) return hi; + return v; +} + +export function doubleValue(v) { + return v * 2; +} diff --git a/tests/fixtures/issue-979-hybrid-barrel/core/index.js b/tests/fixtures/issue-979-hybrid-barrel/core/index.js new file mode 100644 index 000000000..c2485c3c3 --- /dev/null +++ b/tests/fixtures/issue-979-hybrid-barrel/core/index.js @@ -0,0 +1,12 @@ +// Hybrid barrel: re-exports from helpers.js AND has local definitions that call helpers. +export { doubleValue } from './helpers.js'; + +import { clampValue, doubleValue } from './helpers.js'; + +export function processValue(v) { + return doubleValue(clampValue(v, 0, 100)); +} + +export function processAll(values) { + return values.map((v) => processValue(v)); +} diff --git a/tests/integration/incremental-edge-duplication.test.ts b/tests/integration/incremental-edge-duplication.test.ts new file mode 100644 index 000000000..4c0aff5bc --- /dev/null +++ b/tests/integration/incremental-edge-duplication.test.ts @@ -0,0 +1,100 @@ +/** + * Regression test for #979: incremental rebuilds leak duplicate edges. + * + * Root cause: when `reparse_barrel_candidates` (Stage 6b, native engine) picks + * up a file imported by a reverse-dep, it used to purge only the 'imports' and + * 'reexports' edge kinds before Stage 7 re-emitted every edge kind, so every + * rebuild appended new copies of 'calls', 'receiver', 'extends', 'implements', + * 'imports-type', and 'dynamic-imports' edges. + * + * This test modifies a source file multiple times in a row and asserts: + * 1. The total edge count does not grow across incremental rebuilds. + * 2. The count of `(source_id, target_id, kind)` rows never exceeds the + * pre-existing duplicates from a fresh full build (i.e. incremental + * does not introduce new duplicates). + */ +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import Database from 'better-sqlite3'; +import { describe, expect, it } from 'vitest'; +import { buildGraph } from '../../src/domain/graph/builder.js'; + +const FIXTURE_DIR = path.join(import.meta.dirname, '..', 'fixtures', 'issue-979-hybrid-barrel'); + +function copyDirSync(src: string, dest: string) { + fs.mkdirSync(dest, { recursive: true }); + for (const entry of fs.readdirSync(src, { withFileTypes: true })) { + const s = path.join(src, entry.name); + const d = path.join(dest, entry.name); + if (entry.isDirectory()) copyDirSync(s, d); + else fs.copyFileSync(s, d); + } +} + +function edgeStats(dbPath: string) { + const db = new Database(dbPath, { readonly: true }); + try { + const total = (db.prepare('SELECT COUNT(*) AS c FROM edges').get() as { c: number }).c; + const duplicates = ( + db + .prepare( + `SELECT source_id, target_id, kind, COUNT(*) AS c FROM edges + GROUP BY source_id, target_id, kind HAVING c > 1`, + ) + .all() as Array<{ c: number }> + ).reduce((sum, row) => sum + row.c - 1, 0); + return { total, duplicates }; + } finally { + db.close(); + } +} + +describe('Issue #979: incremental edges do not duplicate', () => { + it('3 incremental rebuilds produce stable edge counts with no new duplicates', async () => { + const tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'cg-979-')); + const fullDir = path.join(tmpBase, 'full'); + const incrDir = path.join(tmpBase, 'incr'); + + try { + copyDirSync(FIXTURE_DIR, fullDir); + copyDirSync(FIXTURE_DIR, incrDir); + + // Baseline full build on the incr copy so subsequent rebuilds are truly incremental. + await buildGraph(incrDir, { incremental: false, skipRegistry: true }); + + // Apply 3 rounds of "change one file" + incremental rebuild, recording + // edge totals and duplicate counts after each rebuild. + const history: Array<{ total: number; duplicates: number }> = []; + for (let i = 0; i < 3; i++) { + fs.appendFileSync(path.join(incrDir, 'consumers', 'driver.js'), `\n// bump ${i}\n`); + await buildGraph(incrDir, { incremental: true, skipRegistry: true }); + history.push(edgeStats(path.join(incrDir, '.codegraph', 'graph.db'))); + } + + // Mirror all 3 mutations on the full copy, then do a single clean full build. + for (let i = 0; i < 3; i++) { + fs.appendFileSync(path.join(fullDir, 'consumers', 'driver.js'), `\n// bump ${i}\n`); + } + await buildGraph(fullDir, { incremental: false, skipRegistry: true }); + const freshFull = edgeStats(path.join(fullDir, '.codegraph', 'graph.db')); + + // Invariant 1: incremental edge count must not grow across rebuilds. + expect(history[1].total).toBe(history[0].total); + expect(history[2].total).toBe(history[0].total); + + // Invariant 2: incremental must not introduce new duplicates beyond the + // pre-existing duplicates present in a clean full build. + expect(history[2].duplicates).toBeLessThanOrEqual(freshFull.duplicates); + + // Invariant 3: after applying all 3 bumps, both dirs describe the same + // code, so the incremental edge total must match a clean full build. + // This catches stale edges that survive the scoped DELETE (e.g. edges + // pointing at orphaned node ids) which would not be flagged as + // (source, target, kind) duplicates. + expect(history[2].total).toBe(freshFull.total); + } finally { + fs.rmSync(tmpBase, { recursive: true, force: true }); + } + }, 60_000); +});