leadforge-dev · shaypal5 · Apr 29, 2026 · Apr 29, 2026 · Apr 29, 2026 · Apr 29, 2026
diff --git a/.agent-plan.md b/.agent-plan.md
@@ -6,38 +6,47 @@
 
 ## Current System State
 
-**v0.4.0 in progress — Milestones 7–9 complete (PR open).** Full simulation engine + render/bundle
-layer + exposure filtering implemented. 545 tests passing.
+**v0.4.0 complete — Milestones 7–10 done.** Full simulation engine + render/bundle layer + exposure filtering + CLI commands implemented. 562 tests passing.
 
 ---
 
-## Next Up — Milestone 10: CLI `generate` command + `inspect` / `validate` stubs (v0.4.0)
+## Next Up — Milestone 11: Validation harness (v0.5.0)
 
-Goal: Wire `leadforge generate` CLI command end-to-end; implement `inspect` and `validate` output.
+Goal: Implement comprehensive bundle validation — invariant checks, realism heuristics, difficulty drift detection.
 
-- [ ] `cli/commands/generate.py` — parse flags, call `Generator.from_recipe().generate()`, call `.save()`
-- [ ] `cli/commands/inspect.py` — print manifest summary for a written bundle
-- [ ] `cli/commands/validate.py` — basic schema / FK / leakage checks on a written bundle
-- [ ] Tests for each command
+- [ ] `validation/invariants.py` — DAG acyclicity, FK integrity, determinism, exposure monotonicity
+- [ ] `validation/artifact_checks.py` — file presence, hash verification, schema conformance
+- [ ] `validation/realism.py` — distributional sanity checks (conversion rates, feature ranges)
+- [ ] `validation/difficulty.py` — difficulty profile adherence checks
+- [ ] `validation/drift.py` — cross-seed stability / drift detection
+- [ ] Wire into `cli/commands/validate.py` with richer output
+- [ ] Tests for each validation module
 
 ---
 
 ## Context Pointers
 
-- Milestone 8 scope: `docs/leadforge_implementation_plan.md` §10 "Milestone 8"
-- Render layer: `leadforge/render/` (snapshots, relational, tasks, manifests)
-- Bundle writer: `leadforge/api/bundle.py`
+- Milestone 11 scope: `docs/leadforge_implementation_plan.md` §10 "Milestone 11"
+- Current validate CLI: `leadforge/cli/commands/validate.py` (basic checks implemented in M10)
+- FK constraints: `leadforge/schema/relationships.py`
+- Feature spec: `leadforge/schema/features.py`
 
 ---
 
 ## Completed Phases
 
-### Milestone 9 — Exposure Filtering ✓ (v0.4.0 in PR)
-- `exposure/filters.py`: `BundleFilter` frozen dataclass; `FILTERS` dict keyed by `ExposureMode`; `get_filter()`
-- `exposure/redaction.py`: `write_metadata_dir()` — writes `metadata/` with `graph.json`, `graph.graphml`, `world_spec.json`, `latent_registry.json`, `mechanism_summary.json`
-- `exposure/modes.py`: `apply_exposure(bundle, root, mode)` — dispatch; skips `metadata/` for `student_public`
+### Milestone 10 — CLI Commands ✓ (v0.4.0)
+- `cli/commands/generate.py`: fully wired — parses all flags, calls `Generator.from_recipe().generate()`, writes bundle via `.save()`
+- `cli/commands/inspect.py`: reads `manifest.json` and prints summary (recipe, seed, mode, tables with row counts, task splits, metadata presence)
+- `cli/commands/validate.py`: checks manifest presence, required files, table row counts, SHA-256 hashes, task split integrity, FK constraints, leakage (unexpected columns)
+- 22 CLI tests (smoke, generate integration, inspect output, validate pass/fail/corrupt/missing); total 562 passing
+
+### Milestone 9 — Exposure Filtering ✓ (v0.4.0)
+- `exposure/filters.py`: `BundleFilter` frozen dataclass; `FILTERS` dict keyed by `ExposureMode`; `get_filter()` accepts `str | ExposureMode`
+- `exposure/metadata.py`: `write_metadata_dir()` — writes `metadata/` with `graph.json`, `graph.graphml`, `world_spec.json`, `latent_registry.json`, `mechanism_summary.json`
+- `exposure/modes.py`: `apply_exposure(bundle, root, mode)` — dispatch; removes stale `metadata/` for `student_public`
 - Wired into `api/bundle.py` between dataset card and manifest steps
-- 24 new tests; total 545 passing
+- 22 exposure tests; total 547 passing
 
 ### Milestone 8 — Render / Bundle Layer ✓ (v0.4.0 in PR)
 - `render/relational.py`: `to_dataframes()` — 9-table dict of typed DataFrames from SimulationResult + PopulationResult

diff --git a/leadforge/cli/commands/generate.py b/leadforge/cli/commands/generate.py
@@ -1,7 +1,13 @@
 """leadforge generate command."""
 
+from __future__ import annotations
+
+from pathlib import Path
+
 import typer
 
+from leadforge.core.exceptions import LeadforgeError
+
 
 def generate(
     recipe: str = typer.Option(..., "--recipe", "-r", help="Recipe ID to use."),
@@ -28,8 +34,53 @@ def generate(
     ),
 ) -> None:
     """Generate a synthetic CRM dataset bundle from a recipe."""
-    typer.echo(
-        "The 'generate' command is not yet implemented. Coming in v0.2.0.",
-        err=True,
-    )
-    raise typer.Exit(1)
+    from leadforge.api.generator import Generator
+    from leadforge.core.serialization import load_yaml
+
+    override_dict: dict | None = None
+    if override is not None:
+        override_path = Path(override)
+        if not override_path.exists():
+            typer.echo(f"Error: override file not found: {override_path}", err=True)
+            raise typer.Exit(1)
+        try:
+            loaded = load_yaml(override_path)
+        except LeadforgeError as exc:
+            typer.echo(f"Error: {exc}", err=True)
+            raise typer.Exit(1) from None
+        if loaded is not None and not isinstance(loaded, dict):
+            typer.echo(
+                "Error: override file must contain a YAML mapping at the top level.",
+                err=True,
+            )
+            raise typer.Exit(1)
+        override_dict = loaded
+
+    try:
+        gen = Generator.from_recipe(
+            recipe,
+            seed=seed,
+            exposure_mode=mode,
+            difficulty=difficulty,
+            n_accounts=n_accounts,
+            n_contacts=n_contacts,
+            n_leads=n_leads,
+            horizon_days=horizon_days,
+            override=override_dict,
+        )
+    except (LeadforgeError, ValueError) as exc:
+        typer.echo(f"Error: {exc}", err=True)
+        raise typer.Exit(1) from None
+
+    typer.echo(f"Generating bundle with recipe '{recipe}', seed={seed}, mode={mode} ...")
+
+    try:
+        bundle = gen.generate()
+    except (LeadforgeError, RuntimeError) as exc:
+        typer.echo(f"Error during generation: {exc}", err=True)
+        raise typer.Exit(1) from None
+
+    typer.echo(f"Writing bundle to {out} ...")
+    bundle.save(out)
+
+    typer.echo(f"Done. Bundle written to {out}")
diff --git a/leadforge/cli/commands/inspect.py b/leadforge/cli/commands/inspect.py
@@ -1,14 +1,81 @@
 """leadforge inspect command."""
 
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
 import typer
 
+from leadforge.core.exceptions import LeadforgeError
+from leadforge.core.serialization import load_json
+
 
 def inspect(
     bundle_path: str = typer.Argument(..., help="Path to a generated bundle directory."),
 ) -> None:
     """Inspect a generated dataset bundle and print a summary."""
-    typer.echo(
-        "The 'inspect' command is not yet implemented. Coming in v0.4.0.",
-        err=True,
-    )
-    raise typer.Exit(1)
+    root = Path(bundle_path)
+
+    if not root.exists():
+        typer.echo(f"Error: path does not exist: {root}", err=True)
+        raise typer.Exit(1)
+    if not root.is_dir():
+        typer.echo(f"Error: not a directory (expected a bundle dir): {root}", err=True)
+        raise typer.Exit(1)
+
+    manifest_path = root / "manifest.json"
+    if not manifest_path.exists():
+        typer.echo(f"Error: no manifest.json found in {root}", err=True)
+        raise typer.Exit(1)
+
+    try:
+        manifest = load_json(manifest_path)
+    except LeadforgeError as exc:
+        typer.echo(f"Error: {exc}", err=True)
+        raise typer.Exit(1) from None
+
+    if not isinstance(manifest, dict):
+        typer.echo("Error: manifest.json is not a JSON object", err=True)
+        raise typer.Exit(1)
+
+    typer.echo(f"Bundle: {root}")
+    typer.echo(f"  Recipe:        {manifest.get('recipe_id', '?')}")
+    typer.echo(f"  Seed:          {manifest.get('seed', '?')}")
+    typer.echo(f"  Mode:          {manifest.get('exposure_mode', '?')}")
+    typer.echo(f"  Difficulty:    {manifest.get('difficulty', '?')}")
+    typer.echo(f"  Horizon days:  {manifest.get('horizon_days', '?')}")
+    typer.echo(f"  Generated at:  {manifest.get('generation_timestamp', '?')}")
+    typer.echo(f"  Package:       leadforge {manifest.get('package_version', '?')}")
+    typer.echo(f"  Schema ver:    {manifest.get('bundle_schema_version', '?')}")
+    typer.echo(f"  Motif family:  {manifest.get('motif_family', '?')}")
+
+    typer.echo("")
+    typer.echo("Tables:")
+    tables = manifest.get("tables", {})
+    if isinstance(tables, dict):
+        for name, info in tables.items():
+            row_count = _safe_get(info, "row_count", "?")
+            typer.echo(f"  {name:25s}  {row_count:>8} rows")
-            typer.echo(f"  {name:25s}  {row_count:>8} rows")
+            typer.echo(f"  {name:25s}  {str(row_count):>8} rows")
-            typer.echo(f"  {name:25s}  {row_count:>8} rows")
+            typer.echo(f"  {name:25s}  {str(row_count):>8} rows")
+
+    tasks = manifest.get("tasks", {})
+    if isinstance(tasks, dict) and tasks:
+        typer.echo("")
+        typer.echo("Tasks:")
+        for task_id, info in tasks.items():
+            train = _safe_get(info, "train_rows", "?")
+            valid = _safe_get(info, "valid_rows", "?")
+            test = _safe_get(info, "test_rows", "?")
+            typer.echo(f"  {task_id}")
+            typer.echo(f"    train={train}  valid={valid}  test={test}")
+
+    has_metadata = (root / "metadata").is_dir()
+    typer.echo("")
+    typer.echo(f"Metadata dir:    {'present' if has_metadata else 'absent'}")
+
+
+def _safe_get(obj: Any, key: str, default: str = "?") -> Any:
+    """Get a key from *obj* if it's a dict, else return *default*."""
+    if isinstance(obj, dict):
+        return obj.get(key, default)
+    return default
diff --git a/leadforge/cli/commands/validate.py b/leadforge/cli/commands/validate.py
@@ -1,14 +1,42 @@
 """leadforge validate command."""
 
+from __future__ import annotations
+
+from pathlib import Path
+
 import typer
 
+from leadforge.core.exceptions import LeadforgeError
+
 
 def validate(
     bundle_path: str = typer.Argument(..., help="Path to a generated bundle directory."),
 ) -> None:
     """Run schema and artifact validation on a generated bundle."""
-    typer.echo(
-        "The 'validate' command is not yet implemented. Coming in v0.5.0.",
-        err=True,
-    )
-    raise typer.Exit(1)
+    from leadforge.validation.bundle_checks import validate_bundle
+
+    root = Path(bundle_path)
+
+    if not root.exists():
+        typer.echo(f"FAIL: path does not exist: {root}", err=True)
+        raise typer.Exit(1)
+    if not root.is_dir():
+        typer.echo(f"FAIL: not a directory: {root}", err=True)
+        raise typer.Exit(1)
+    if not (root / "manifest.json").exists():
+        typer.echo(f"FAIL: no manifest.json in {root}", err=True)
+        raise typer.Exit(1)
+
+    try:
+        errors = validate_bundle(root)
+    except LeadforgeError as exc:
+        typer.echo(f"FAIL: {exc}", err=True)
+        raise typer.Exit(1) from None
+
+    if errors:
+        typer.echo(f"FAIL: {len(errors)} validation error(s):", err=True)
+        for e in errors:
+            typer.echo(f"  - {e}", err=True)
+        raise typer.Exit(1)
+
+    typer.echo(f"OK: bundle at {root} passed all checks.")
diff --git a/leadforge/core/hashing.py b/leadforge/core/hashing.py
@@ -1,13 +1,17 @@
-"""Deterministic config hashing for manifest identity.
+"""Deterministic config hashing and file digest helpers.
 
 A config hash uniquely identifies a (recipe, config, seed, version) tuple and
 is embedded in every generated manifest so that bundles can be traced back to
 the exact parameters that produced them.
+
+:func:`file_sha256` provides a reusable SHA-256 file digest used by the
+manifest builder and the bundle validator.
 """
 
 import hashlib
 import json
 from dataclasses import asdict
+from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
 if TYPE_CHECKING:
@@ -26,6 +30,15 @@ def _canonical(obj: Any) -> Any:
     return obj
 
 
+def file_sha256(path: Path) -> str:
+    """Return the hex-encoded SHA-256 digest of the file at *path*."""
+    h = hashlib.sha256()
+    with path.open("rb") as fh:
+        for chunk in iter(lambda: fh.read(65536), b""):
+            h.update(chunk)
+    return h.hexdigest()
+
+
 def hash_config(config: "GenerationConfig") -> str:
     """Return a stable hex-encoded SHA-256 digest of *config*.
 

diff --git a/leadforge/render/manifests.py b/leadforge/render/manifests.py
@@ -8,12 +8,13 @@
 
 from __future__ import annotations
 
-import hashlib
 import json
 from datetime import UTC, datetime
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
+from leadforge.core.hashing import file_sha256
+
 if TYPE_CHECKING:
     from leadforge.core.models import GenerationConfig
     from leadforge.structure.graph import WorldGraph
@@ -55,7 +56,7 @@ def build_manifest(
     for table_name, row_count in table_row_counts.items():
         rel_path = f"tables/{table_name}.parquet"
         abs_path = bundle_root / rel_path
-        sha = _sha256(abs_path)
+        sha = file_sha256(abs_path)
         tables[table_name] = {"row_count": row_count, "file": rel_path, "sha256": sha}
 
     # Build task entries.
@@ -65,7 +66,7 @@ def build_manifest(
         for split_name, row_count in split_counts.items():
             rel_path = f"tasks/{task_id}/{split_name}.parquet"
             abs_path = bundle_root / rel_path
-            sha = _sha256(abs_path)
+            sha = file_sha256(abs_path)
             entry[f"{split_name}_rows"] = row_count
             entry[f"{split_name}_sha256"] = sha
         tasks[task_id] = entry
@@ -93,12 +94,3 @@ def write_manifest(manifest: dict[str, Any], bundle_root: Path) -> Path:
     path = bundle_root / "manifest.json"
     path.write_text(json.dumps(manifest, indent=2))
     return path
-
-
-def _sha256(path: Path) -> str:
-    """Return the hex-encoded SHA-256 digest of *path*."""
-    h = hashlib.sha256()
-    with path.open("rb") as fh:
-        for chunk in iter(lambda: fh.read(65536), b""):
-            h.update(chunk)
-    return h.hexdigest()