leadforge-dev · shaypal5 · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026
diff --git a/.agent-plan.md b/.agent-plan.md
@@ -39,12 +39,12 @@ bundle schema version 5 → 6; **peer generation-scheme abstraction extracted
 early against the known-good lead-scoring path + physical reorg into
 `schemes/`**. (Framing follows Google `lifetime_value`/ZILN and Voyantis pLTV.)
 
-Status: `LTV-M0` planning landed (#102) + reframed to pLTV regression (#103) +
-reframed to peer schemes. `LTV-M1`: `LTV-Pb` (lifecycle entity rows +
-registries) opened as **#104** (awaiting review). Next is `LTV-M2`
-(`GenerationScheme` abstraction + physical reorg, `LTV-Pd…Pf`) — can start in
-parallel since it only touches the existing lead-scoring path; `LTV-Pc`
-(pLTV feature/task specs) also outstanding in `LTV-M1`.
+Status: `LTV-M0` landed (#102, #103, #106). `LTV-M1`: `LTV-Pb` merged (#104);
+`LTV-Pc` (pLTV feature/task specs) still outstanding. `LTV-M2`: `LTV-Pd`
+(`GenerationScheme` protocol + registry) opened as **#107** (awaiting review,
+verified byte-identical). Next in M2: `LTV-Pe` (physically move lead-scoring
+pipeline into `schemes/lead_scoring/`), then `LTV-Pf` (scaffold
+`schemes/lifecycle/`).
 
 ---
 

diff --git a/docs/ltv/roadmap.md b/docs/ltv/roadmap.md
@@ -88,12 +88,13 @@ Total: ~18 PRs across 9 milestones.
 > keeps lead-scoring output byte-identical (hash-determinism) and the public
 > API stable.
 
-- [ ] **`LTV-Pd`** — `refactor(api): GenerationScheme protocol + registry`.
-  Add `schemes/base.py` (`GenerationScheme` protocol + `SCHEME_REGISTRY`). Wrap
-  the **existing** lead-scoring pipeline as `LeadScoringScheme` *in place* (no
-  file moves yet); route `Generator.generate()` through the registry; recipes
-  gain a `scheme:` field (defaulting to `lead_scoring`). Output byte-identical.
-  - Tests: registry lookup, dispatch, hash-determinism, full suite green.
+- [x] **`LTV-Pd`** — `refactor(api): GenerationScheme protocol + registry`
+  (**PR #107**). Added `schemes/base.py` (`GenerationScheme` protocol +
+  `SCHEME_REGISTRY`) and `schemes/lead_scoring/` wrapping the existing pipeline
+  *in place*; `Generator.generate()` routes through the registry; `Recipe` and
+  `WorldSpec` gain a `scheme` field (default `lead_scoring`). Verified
+  byte-identical (all 14 files of a pinned-timestamp bundle hash identically,
+  main vs branch).
   - Labels: `type: refactor`, `layer: api`, `layer: core`
 - [ ] **`LTV-Pe`** — `refactor: move lead-scoring pipeline to schemes/lead_scoring/`.
   Physically relocate the lead-scoring population/engine/state/mechanisms/

diff --git a/leadforge/api/generator.py b/leadforge/api/generator.py
@@ -5,7 +5,7 @@
 from typing import Any
 
 from leadforge.core.enums import DifficultyProfile, ExposureMode
-from leadforge.core.models import DifficultyParams, GenerationConfig, WorldBundle, WorldSpec
+from leadforge.core.models import GenerationConfig, WorldBundle, WorldSpec
 from leadforge.core.rng import RNGRoot
 from leadforge.core.sentinels import _MISSING
 
@@ -117,7 +117,7 @@ def from_recipe(
 
         narrative_data = recipe.load_narrative()
         narrative = NarrativeSpec.from_dict(narrative_data) if narrative_data else None
-        world_spec = WorldSpec(config=config, narrative=narrative)
+        world_spec = WorldSpec(config=config, narrative=narrative, scheme=recipe.scheme)
 
         return cls(world_spec)
 
@@ -151,9 +151,7 @@ def generate(
         """
         import dataclasses
 
-        from leadforge.simulation.engine import simulate_world
-        from leadforge.simulation.population import build_population
-        from leadforge.structure.sampler import sample_hidden_graph
+        from leadforge.schemes import get_scheme
 
         config = self._world_spec.config
 
@@ -180,69 +178,7 @@ def generate(
                 "Generator.from_recipe() to resolve the narrative."
             )
 
-        rng_root = RNGRoot(config.seed)
-        world_graph = sample_hidden_graph(rng_root)
-
-        # Load category-latent correlations from difficulty profile if available.
-        from leadforge.api.recipes import Recipe
-        from leadforge.recipes.registry import load_recipe
-
-        category_latent_correlations = None
-        try:
-            raw = load_recipe(config.recipe_id)
-            recipe = Recipe.from_dict(raw)
-            profiles = recipe.load_difficulty_profiles()
-            profile = profiles.get(config.difficulty.value, {})
-            category_latent_correlations = profile.get("category_latent_correlations")
-
-            # Construct DifficultyParams from profile and attach to config.
-            # All keys are required — a missing key indicates a malformed profile
-            # YAML and should fail loudly rather than silently defaulting.
-            required_keys = (
-                "signal_strength",
-                "noise_scale",
-                "missing_rate",
-                "outlier_rate",
-                "conversion_rate_range",
-                "committee_friction",
-            )
-            missing = [k for k in required_keys if k not in profile]
-            if missing:
-                from leadforge.core.exceptions import InvalidRecipeError
-
-                raise InvalidRecipeError(
-                    f"Difficulty profile '{config.difficulty.value}' is missing "
-                    f"required keys: {missing}"
-                )
-            cr_range = profile["conversion_rate_range"]
-            difficulty_params = DifficultyParams(
-                signal_strength=profile["signal_strength"],
-                noise_scale=profile["noise_scale"],
-                missing_rate=profile["missing_rate"],
-                outlier_rate=profile["outlier_rate"],
-                conversion_rate_lo=cr_range[0],
-                conversion_rate_hi=cr_range[1],
-                committee_friction=profile["committee_friction"],
-            )
-            config = dataclasses.replace(config, difficulty_params=difficulty_params)
-        except (FileNotFoundError, KeyError):
-            category_latent_correlations = None
-
-        population = build_population(
-            config,
-            narrative,
-            world_graph,
-            category_latent_correlations=category_latent_correlations,
-        )
-        latent_touch_intensity = kwargs.pop("latent_touch_intensity", False)
-        result = simulate_world(
-            config, population, world_graph, latent_touch_intensity=latent_touch_intensity
-        )
-
-        spec = WorldSpec(config=config, narrative=narrative)
-        return WorldBundle(
-            spec=spec,
-            population=population,
-            simulation_result=result,
-            world_graph=world_graph,
-        )
+        # Dispatch to the scheme: it owns structure sampling, difficulty
+        # interpretation, population, simulation, and bundle assembly.
+        scheme = get_scheme(self._world_spec.scheme)
+        return scheme.build_world(config, narrative, **kwargs)
diff --git a/leadforge/api/recipes.py b/leadforge/api/recipes.py
@@ -21,6 +21,7 @@
 
 from leadforge.core.enums import DifficultyProfile, ExposureMode
 from leadforge.core.exceptions import InvalidRecipeError
+from leadforge.core.models import DEFAULT_SCHEME
 from leadforge.core.sentinels import _MISSING
 from leadforge.core.serialization import load_yaml
 
@@ -42,6 +43,9 @@ class Recipe:
     horizon_days: int
     label_window_days: int | None = None
     snapshot_day: int | None = None
+    # Which generation scheme this recipe runs (see leadforge.schemes).
+    # Defaults to the lead-scoring scheme so existing recipes need no change.
+    scheme: str = DEFAULT_SCHEME
 
     # ------------------------------------------------------------------ #
     # Construction
@@ -119,6 +123,10 @@ def from_dict(cls, data: dict[str, Any]) -> Recipe:
                 )
             snapshot_day = raw_sd
 
+        scheme = data.get("scheme", DEFAULT_SCHEME)
+        if not isinstance(scheme, str) or not scheme:
+            raise InvalidRecipeError(f"'scheme' must be a non-empty string, got {scheme!r}")
+
         return cls(
             id=data["id"],
             title=data["title"],
@@ -131,6 +139,7 @@ def from_dict(cls, data: dict[str, Any]) -> Recipe:
             horizon_days=horizon_days,
             label_window_days=label_window_days,
             snapshot_day=snapshot_day,
+            scheme=scheme,
         )
 
     # ------------------------------------------------------------------ #

diff --git a/leadforge/core/models.py b/leadforge/core/models.py
@@ -16,6 +16,13 @@
     from leadforge.structure.graph import WorldGraph
 
 
+# Default generation scheme when a recipe/world does not declare one.  Kept here
+# (the shared core layer) because ``leadforge.core`` must not import
+# ``leadforge.schemes`` (the scheme package depends on core, not the reverse).
+# ``LeadScoringScheme.name`` must equal this value; a test guards the match.
+DEFAULT_SCHEME = "lead_scoring"
+
+
 @dataclass(frozen=True)
 class DifficultyParams:
     """Numeric parameters from a difficulty profile.
@@ -144,6 +151,9 @@ class WorldSpec:
 
     config: GenerationConfig = field(default_factory=GenerationConfig)
     narrative: NarrativeSpec | None = None
+    # Generation scheme this world runs (see leadforge.schemes).  Defaults to
+    # the lead-scoring pipeline so direct WorldSpec construction is unchanged.
+    scheme: str = DEFAULT_SCHEME
 
 
 @dataclass

diff --git a/leadforge/recipes/b2b_saas_procurement_v1/recipe.yaml b/leadforge/recipes/b2b_saas_procurement_v1/recipe.yaml
@@ -1,6 +1,10 @@
 id: b2b_saas_procurement_v1
 title: "Mid-market B2B SaaS — Procurement & AP Automation"
 vertical: mid_market_b2b_saas
+# Generation scheme this recipe runs (see leadforge.schemes).  Lead scoring is
+# the default; declared explicitly here for clarity now that leadforge hosts
+# multiple peer schemes.
+scheme: lead_scoring
 description: >
   A mid-market B2B SaaS company selling procurement and AP workflow
   automation software to 200–2,000 employee firms in the US and UK,

diff --git a/leadforge/schemes/__init__.py b/leadforge/schemes/__init__.py
@@ -0,0 +1,28 @@
+"""Generation-scheme registry.
+
+Importing this package registers the built-in schemes as a side effect, so
+``from leadforge.schemes import get_scheme`` is always sufficient to resolve any
+shipped scheme.  See ``leadforge.schemes.base`` and ``docs/ltv/design.md`` §2.5.
+"""
+
+from __future__ import annotations
+
+# Import built-in scheme modules for their registration side effects.
+from leadforge.schemes import lead_scoring as _lead_scoring  # noqa: F401
+from leadforge.schemes.base import (
+    SCHEME_REGISTRY,
+    GenerationScheme,
+    UnknownSchemeError,
+    available_schemes,
+    get_scheme,
+    register_scheme,
+)
+
+__all__ = [
+    "SCHEME_REGISTRY",
+    "GenerationScheme",
+    "UnknownSchemeError",
+    "available_schemes",
+    "get_scheme",
+    "register_scheme",
+]
diff --git a/leadforge/schemes/base.py b/leadforge/schemes/base.py
@@ -0,0 +1,112 @@
+"""Generation-scheme abstraction — the registry of peer dataset pipelines.
+
+leadforge hosts multiple *generation schemes* as peers (e.g. ``lead_scoring``
+and, from the LTV workstream, ``lifecycle``).  Each scheme owns one end-to-end
+pipeline shape — population → simulation → render → tasks — while the outer
+envelope (RNG, config resolution, bundle layout, manifest, exposure dispatch,
+CLI) is shared.  See ``docs/ltv/design.md`` §2.5.
+
+A scheme is a small object registered by ``name`` in :data:`SCHEME_REGISTRY`
+and resolved via :func:`get_scheme`.  The recipe declares which scheme it runs
+via its ``scheme:`` field; :class:`~leadforge.api.generator.Generator` looks the
+scheme up and runs its pipeline rather than branching on a recipe type.
+
+Where the seam sits
+-------------------
+A scheme owns the **whole** generation pipeline from ``(config, narrative)`` to
+in-memory world artifacts: structure/graph sampling, difficulty interpretation,
+population, simulation, and :class:`~leadforge.core.models.WorldBundle`
+assembly.  These steps differ between schemes (the lead-scoring hidden DAG,
+``DifficultyParams``, and touch emission are all lead-scoring-specific), so the
+boundary is the single :meth:`GenerationScheme.build_world` method rather than a
+set of lead-scoring-shaped sub-steps.  This keeps
+:meth:`~leadforge.api.generator.Generator.generate` genuinely scheme-agnostic.
+
+Scheme-specific options are passed through ``Generator.generate(**kwargs)`` to
+``build_world`` and consumed by the scheme that understands them (e.g.
+``latent_touch_intensity`` for lead scoring).
+
+Scope note
+----------
+Render dispatch (``to_dataframes`` / snapshots / task splits, currently in
+``WorldBundle.save`` → the bundle writer) is folded into the scheme as the
+lifecycle scheme is built out (see ``docs/ltv/roadmap.md`` — LTV-M6).
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable
+
+from leadforge.core.exceptions import LeadforgeError
+
+if TYPE_CHECKING:
+    from leadforge.core.models import GenerationConfig, WorldBundle
+    from leadforge.narrative.spec import NarrativeSpec
+
+
+class UnknownSchemeError(LeadforgeError):
+    """Raised when a generation-scheme name is not present in the registry."""
+
+
+@runtime_checkable
+class GenerationScheme(Protocol):
+    """One end-to-end dataset generation pipeline shape.
+
+    Implementations are registered by :attr:`name` and resolved at generation
+    time.  :meth:`build_world` must be deterministic given ``(config,
+    narrative, options)`` per the package's RNG contract.
+    """
+
+    name: str
+
+    def build_world(
+        self,
+        config: GenerationConfig,
+        narrative: NarrativeSpec,
+        **options: Any,
+    ) -> WorldBundle:
+        """Run the scheme's full pipeline and return an in-memory bundle.
+
+        Implementations own structure sampling, difficulty interpretation,
+        population, simulation, and bundle assembly.  ``options`` carries
+        scheme-specific flags forwarded from ``Generator.generate(**kwargs)``;
+        a scheme ignores options it does not recognise.
+        """
+        ...
+
+
+# Name → scheme instance.  Populated by importing ``leadforge.schemes`` (its
+# package ``__init__`` imports each built-in scheme module, which self-register).
+SCHEME_REGISTRY: dict[str, GenerationScheme] = {}
+
+
+def register_scheme(scheme: GenerationScheme) -> None:
+    """Register *scheme* under its ``name``.
+
+    Idempotent for the same instance; raises if a *different* scheme is already
+    registered under the same name (guards against accidental shadowing).
+    """
+    name = scheme.name
+    existing = SCHEME_REGISTRY.get(name)
+    if existing is not None and existing is not scheme:
+        raise ValueError(f"A different generation scheme named {name!r} is already registered")
+    SCHEME_REGISTRY[name] = scheme
+
+
+def get_scheme(name: str) -> GenerationScheme:
+    """Return the registered scheme named *name*.
+
+    Raises:
+        UnknownSchemeError: if no scheme is registered under *name*.
+    """
+    try:
+        return SCHEME_REGISTRY[name]
+    except KeyError:
+        raise UnknownSchemeError(
+            f"Unknown generation scheme {name!r}. Registered schemes: {sorted(SCHEME_REGISTRY)}"
+        ) from None
+
+
+def available_schemes() -> tuple[str, ...]:
+    """Return the names of all registered schemes, sorted."""
+    return tuple(sorted(SCHEME_REGISTRY))