Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
08d254f
refactor(parity): render orchestrator-drop summary as a per-extension…
carlos-alm May 26, 2026
bde22ec
Merge remote-tracking branch 'origin/main' into refactor/orchestrator…
carlos-alm May 26, 2026
9c8be55
refactor(extractors): extend shared helpers for identifier and symbol…
carlos-alm May 26, 2026
9c3d016
refactor(extractors): adopt shared helpers across language extractors
carlos-alm May 26, 2026
5abe6ad
refactor(extractors): break elixir param/map binding cycle
carlos-alm May 26, 2026
0d687c4
refactor(extractors-rs): extend shared helpers for identifier and sym…
carlos-alm May 26, 2026
f10fcab
refactor(extractors-rs): adopt shared helpers across language extractors
carlos-alm May 26, 2026
d9bbc8f
refactor(extractors-rs): break elixir param/map binding cycle
carlos-alm May 26, 2026
24c8cf5
refactor(ast-analysis): break visitor-utils destructuring cycle
carlos-alm May 26, 2026
4f34034
refactor(ast-analysis): decompose engine and visitors
carlos-alm May 26, 2026
dab4dcf
refactor(builder): break pipeline cycle by extracting orchestrator-se…
carlos-alm May 26, 2026
6637066
refactor(builder): decompose builder stages and adopt shared helpers
carlos-alm May 26, 2026
40d418d
refactor(graph): extract helpers in cycles and journal
carlos-alm May 26, 2026
b3c36f4
refactor(core-rs): collapse walker mutual recursion into single-entry…
carlos-alm May 26, 2026
b49cab5
refactor(core-rs): decompose pipeline, read queries, and edge builders
carlos-alm May 26, 2026
d2eab30
refactor(parser): extract LANGUAGE_REGISTRY iteration and worker boun…
carlos-alm May 26, 2026
6819cd6
refactor(analysis): decompose module-map and reduce complexity in fn-…
carlos-alm May 26, 2026
4f34404
refactor(search): decompose generator and reduce complexity in semant…
carlos-alm May 26, 2026
0a12e8c
refactor(features): decompose complexity, structure, graph-enrichment…
carlos-alm May 26, 2026
76bd476
refactor(features): reduce complexity in cfg and cochange
carlos-alm May 26, 2026
4240dfe
refactor(graph): decompose Leiden optimiser and roles classifier
carlos-alm May 26, 2026
900af10
refactor(presentation): extract shared rendering helpers in cfg and flow
carlos-alm May 26, 2026
88bb711
refactor(scripts): separate config from execution in benchmarking scr…
carlos-alm May 26, 2026
02efaeb
refactor(features): reduce warning-level complexity in feature warnin…
carlos-alm May 26, 2026
9182a52
refactor(extractors): adopt iterChildren + PUNCTUATION_TOKENS in elix…
carlos-alm May 26, 2026
e6ea3ed
refactor(extractors-rs): adopt shared child-iteration helpers (grind)
carlos-alm May 26, 2026
fad2140
fix: resolve merge conflicts with main
carlos-alm May 28, 2026
32a0c5c
fix(tests): move column-width comment to the .tsx entry that actually…
carlos-alm May 28, 2026
9b0c04d
fix(elixir): restore LIFO-compensating reverse-push in sequence and m…
carlos-alm May 28, 2026
7cc64d7
fix: resolve merge conflicts with main
carlos-alm May 28, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 85 additions & 76 deletions src/domain/graph/builder/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,108 +76,117 @@ export function passesIncludeExclude(
return true;
}

/** Per-walk state computed once at the top-level invocation. */
interface CollectContext {
readonly rootDir: string;
readonly includeRegexes: readonly RegExp[];
readonly excludeRegexes: readonly RegExp[];
readonly hasGlobFilters: boolean;
readonly extraIgnore: Set<string> | null;
readonly visited: Set<string>;
}

/** Detect a symlink loop for `dir`. Returns true if `dir` was already visited. */
function isSymlinkLoop(dir: string, visited: Set<string>): boolean {
let realDir: string;
try {
realDir = fs.realpathSync(dir);
} catch {
return true;
}
if (visited.has(realDir)) {
warn(`Symlink loop detected, skipping: ${dir}`);
return true;
}
visited.add(realDir);
return false;
}

/** Read directory entries, returning null on error (already logged). */
function readDirSafe(dir: string): fs.Dirent[] | null {
try {
return fs.readdirSync(dir, { withFileTypes: true });
} catch (err: unknown) {
warn(`Cannot read directory ${dir}: ${(err as Error).message}`);
return null;
}
}

/** True if `entry` is a source file we should collect under `ctx`. */
function isCollectableSourceFile(full: string, entry: fs.Dirent, ctx: CollectContext): boolean {
if (!EXTENSIONS.has(path.extname(entry.name))) return false;
if (!ctx.hasGlobFilters) return true;
const rel = normalizePath(path.relative(ctx.rootDir, full));
return passesIncludeExclude(rel, ctx.includeRegexes, ctx.excludeRegexes);
}

function walkCollect(
dir: string,
files: string[],
directories: Set<string> | null,
ctx: CollectContext,
): void {
if (isSymlinkLoop(dir, ctx.visited)) return;

const entries = readDirSafe(dir);
if (!entries) return;

let hasFiles = false;
for (const entry of entries) {
if (shouldSkipEntry(entry, ctx.extraIgnore)) continue;

const full = path.join(dir, entry.name);
if (entry.isDirectory()) {
walkCollect(full, files, directories, ctx);
} else if (isCollectableSourceFile(full, entry, ctx)) {
files.push(full);
hasFiles = true;
}
}
if (directories && hasFiles) {
directories.add(dir);
}
}

/**
* Recursively collect all source files under `dir`.
* When `directories` is a Set, also tracks which directories contain files.
*
* The first invocation establishes `dir` as the project root against which
* `config.include` / `config.exclude` globs are matched.
* `dir` establishes the project root against which `config.include` /
* `config.exclude` globs are matched.
*/
export function collectFiles(
dir: string,
files: string[],
config: Partial<CodegraphConfig>,
directories: Set<string>,
_visited?: Set<string>,
_rootDir?: string,
_includeRegexes?: readonly RegExp[],
_excludeRegexes?: readonly RegExp[],
): { files: string[]; directories: Set<string> };
export function collectFiles(
dir: string,
files?: string[],
config?: Partial<CodegraphConfig>,
directories?: null,
_visited?: Set<string>,
_rootDir?: string,
_includeRegexes?: readonly RegExp[],
_excludeRegexes?: readonly RegExp[],
): string[];
export function collectFiles(
dir: string,
files: string[] = [],
config: Partial<CodegraphConfig> = {},
directories: Set<string> | null = null,
_visited: Set<string> = new Set(),
_rootDir?: string,
_includeRegexes?: readonly RegExp[],
_excludeRegexes?: readonly RegExp[],
): string[] | { files: string[]; directories: Set<string> } {
const trackDirs = directories instanceof Set;
let hasFiles = false;

// First call: compute root and compile include/exclude patterns once,
// then pass them down recursive calls so we don't recompile per directory.
const rootDir = _rootDir ?? dir;
const includeRegexes = _includeRegexes ?? compileGlobs(config.include);
const excludeRegexes = _excludeRegexes ?? compileGlobs(config.exclude);
const hasGlobFilters = includeRegexes.length > 0 || excludeRegexes.length > 0;

// Merge config ignoreDirs with defaults
const extraIgnore = config.ignoreDirs ? new Set(config.ignoreDirs) : null;

// Detect symlink loops (before I/O to avoid wasted readdirSync)
let realDir: string;
try {
realDir = fs.realpathSync(dir);
} catch {
return trackDirs ? { files, directories: directories as Set<string> } : files;
}
if (_visited.has(realDir)) {
warn(`Symlink loop detected, skipping: ${dir}`);
return trackDirs ? { files, directories: directories as Set<string> } : files;
}
_visited.add(realDir);

let entries: fs.Dirent[];
try {
entries = fs.readdirSync(dir, { withFileTypes: true });
} catch (err: unknown) {
warn(`Cannot read directory ${dir}: ${(err as Error).message}`);
return trackDirs ? { files, directories: directories as Set<string> } : files;
}
const includeRegexes = compileGlobs(config.include);
const excludeRegexes = compileGlobs(config.exclude);
const ctx: CollectContext = {
rootDir: dir,
includeRegexes,
excludeRegexes,
hasGlobFilters: includeRegexes.length > 0 || excludeRegexes.length > 0,
extraIgnore: config.ignoreDirs ? new Set(config.ignoreDirs) : null,
visited: new Set(),
};

for (const entry of entries) {
if (shouldSkipEntry(entry, extraIgnore)) continue;
walkCollect(dir, files, trackDirs ? (directories as Set<string>) : null, ctx);

const full = path.join(dir, entry.name);
if (entry.isDirectory()) {
if (trackDirs) {
collectFiles(
full,
files,
config,
directories as Set<string>,
_visited,
rootDir,
includeRegexes,
excludeRegexes,
);
} else {
collectFiles(full, files, config, null, _visited, rootDir, includeRegexes, excludeRegexes);
}
} else if (EXTENSIONS.has(path.extname(entry.name))) {
if (hasGlobFilters) {
const rel = normalizePath(path.relative(rootDir, full));
if (!passesIncludeExclude(rel, includeRegexes, excludeRegexes)) continue;
}
files.push(full);
hasFiles = true;
}
}
if (trackDirs && hasFiles) {
(directories as Set<string>).add(dir);
}
return trackDirs ? { files, directories: directories as Set<string> } : files;
}

Expand Down
Loading
Loading