From 6f61b4118cc57b826c75c2c0bc0e04c7f2ce93e8 Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Wed, 10 Jun 2026 11:04:09 +0300 Subject: [PATCH 1/3] refactor(api): GenerationScheme protocol + registry [LTV-Pd] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First step of the peer-schemes architecture (LTV-M2). Introduces the generation-scheme abstraction and routes Generator.generate() through it, wrapping the existing lead-scoring pipeline as the first registered scheme. Lead-scoring output is unchanged (the wrapper delegates to the identical functions); this PR is a pure refactor. - leadforge/schemes/base.py — GenerationScheme protocol, SCHEME_REGISTRY, register_scheme/get_scheme/available_schemes, UnknownSchemeError. The protocol covers the generation half (build_population + simulate) wired through Generator; render dispatch is added later (LTV-M6). - leadforge/schemes/lead_scoring/__init__.py — LeadScoringScheme, delegating to leadforge.simulation.{population,engine} (relocated under this package in LTV-Pe); self-registers on import. - leadforge/schemes/__init__.py — imports built-in schemes for their registration side effect; re-exports the registry API. - Recipe gains a `scheme` field (default "lead_scoring"); parsed in from_dict. b2b_saas_procurement_v1/recipe.yaml declares `scheme: lead_scoring`. - WorldSpec gains `scheme` (default "lead_scoring"); from_recipe threads recipe.scheme through; Generator.generate() resolves and runs the scheme via get_scheme() instead of calling build_population/simulate_world directly. World-graph sampling and bundle rendering still run in their current paths (lead-scoring only registered); they move under the scheme abstraction in later M2/M6 PRs. Tests: tests/schemes/test_registry.py (15) — registry resolution, protocol conformance, conflict/unknown handling, Recipe.scheme parsing/validation, WorldSpec default, Generator threading + end-to-end dispatch. Full suite 1495 passed / 51 skipped; ruff + mypy clean (87 files). Co-Authored-By: Claude Opus 4.8 --- leadforge/api/generator.py | 10 +- leadforge/api/recipes.py | 10 ++ leadforge/core/models.py | 3 + .../b2b_saas_procurement_v1/recipe.yaml | 4 + leadforge/schemes/__init__.py | 28 ++++ leadforge/schemes/base.py | 110 +++++++++++++++ leadforge/schemes/lead_scoring/__init__.py | 68 ++++++++++ tests/schemes/__init__.py | 0 tests/schemes/test_registry.py | 127 ++++++++++++++++++ 9 files changed, 355 insertions(+), 5 deletions(-) create mode 100644 leadforge/schemes/__init__.py create mode 100644 leadforge/schemes/base.py create mode 100644 leadforge/schemes/lead_scoring/__init__.py create mode 100644 tests/schemes/__init__.py create mode 100644 tests/schemes/test_registry.py diff --git a/leadforge/api/generator.py b/leadforge/api/generator.py index b849109..a45790e 100644 --- a/leadforge/api/generator.py +++ b/leadforge/api/generator.py @@ -117,7 +117,7 @@ def from_recipe( narrative_data = recipe.load_narrative() narrative = NarrativeSpec.from_dict(narrative_data) if narrative_data else None - world_spec = WorldSpec(config=config, narrative=narrative) + world_spec = WorldSpec(config=config, narrative=narrative, scheme=recipe.scheme) return cls(world_spec) @@ -151,8 +151,7 @@ def generate( """ import dataclasses - from leadforge.simulation.engine import simulate_world - from leadforge.simulation.population import build_population + from leadforge.schemes import get_scheme from leadforge.structure.sampler import sample_hidden_graph config = self._world_spec.config @@ -228,14 +227,15 @@ def generate( except (FileNotFoundError, KeyError): category_latent_correlations = None - population = build_population( + scheme = get_scheme(self._world_spec.scheme) + population = scheme.build_population( config, narrative, world_graph, category_latent_correlations=category_latent_correlations, ) latent_touch_intensity = kwargs.pop("latent_touch_intensity", False) - result = simulate_world( + result = scheme.simulate( config, population, world_graph, latent_touch_intensity=latent_touch_intensity ) diff --git a/leadforge/api/recipes.py b/leadforge/api/recipes.py index 17d11d7..353790b 100644 --- a/leadforge/api/recipes.py +++ b/leadforge/api/recipes.py @@ -42,6 +42,9 @@ class Recipe: horizon_days: int label_window_days: int | None = None snapshot_day: int | None = None + # Which generation scheme this recipe runs (see leadforge.schemes). + # Defaults to "lead_scoring" so existing recipes need no change. + scheme: str = "lead_scoring" # ------------------------------------------------------------------ # # Construction @@ -119,6 +122,12 @@ def from_dict(cls, data: dict[str, Any]) -> Recipe: ) snapshot_day = raw_sd + scheme = data.get("scheme", "lead_scoring") + if not isinstance(scheme, str) or not scheme: + raise InvalidRecipeError( + f"'scheme' must be a non-empty string, got {scheme!r}" + ) + return cls( id=data["id"], title=data["title"], @@ -131,6 +140,7 @@ def from_dict(cls, data: dict[str, Any]) -> Recipe: horizon_days=horizon_days, label_window_days=label_window_days, snapshot_day=snapshot_day, + scheme=scheme, ) # ------------------------------------------------------------------ # diff --git a/leadforge/core/models.py b/leadforge/core/models.py index b78b9ac..dea120d 100644 --- a/leadforge/core/models.py +++ b/leadforge/core/models.py @@ -144,6 +144,9 @@ class WorldSpec: config: GenerationConfig = field(default_factory=GenerationConfig) narrative: NarrativeSpec | None = None + # Generation scheme this world runs (see leadforge.schemes). Defaults to + # the lead-scoring pipeline so direct WorldSpec construction is unchanged. + scheme: str = "lead_scoring" @dataclass diff --git a/leadforge/recipes/b2b_saas_procurement_v1/recipe.yaml b/leadforge/recipes/b2b_saas_procurement_v1/recipe.yaml index 3fc36d6..4afe735 100644 --- a/leadforge/recipes/b2b_saas_procurement_v1/recipe.yaml +++ b/leadforge/recipes/b2b_saas_procurement_v1/recipe.yaml @@ -1,6 +1,10 @@ id: b2b_saas_procurement_v1 title: "Mid-market B2B SaaS — Procurement & AP Automation" vertical: mid_market_b2b_saas +# Generation scheme this recipe runs (see leadforge.schemes). Lead scoring is +# the default; declared explicitly here for clarity now that leadforge hosts +# multiple peer schemes. +scheme: lead_scoring description: > A mid-market B2B SaaS company selling procurement and AP workflow automation software to 200–2,000 employee firms in the US and UK, diff --git a/leadforge/schemes/__init__.py b/leadforge/schemes/__init__.py new file mode 100644 index 0000000..2d25f2d --- /dev/null +++ b/leadforge/schemes/__init__.py @@ -0,0 +1,28 @@ +"""Generation-scheme registry. + +Importing this package registers the built-in schemes as a side effect, so +``from leadforge.schemes import get_scheme`` is always sufficient to resolve any +shipped scheme. See ``leadforge.schemes.base`` and ``docs/ltv/design.md`` §2.5. +""" + +from __future__ import annotations + +# Import built-in scheme modules for their registration side effects. +from leadforge.schemes import lead_scoring as _lead_scoring # noqa: F401 +from leadforge.schemes.base import ( + SCHEME_REGISTRY, + GenerationScheme, + UnknownSchemeError, + available_schemes, + get_scheme, + register_scheme, +) + +__all__ = [ + "SCHEME_REGISTRY", + "GenerationScheme", + "UnknownSchemeError", + "available_schemes", + "get_scheme", + "register_scheme", +] diff --git a/leadforge/schemes/base.py b/leadforge/schemes/base.py new file mode 100644 index 0000000..745c713 --- /dev/null +++ b/leadforge/schemes/base.py @@ -0,0 +1,110 @@ +"""Generation-scheme abstraction — the registry of peer dataset pipelines. + +leadforge hosts multiple *generation schemes* as peers (e.g. ``lead_scoring`` +and, from the LTV workstream, ``lifecycle``). Each scheme owns one end-to-end +pipeline shape — population → simulation → render → tasks — while the outer +envelope (RNG, config resolution, bundle layout, manifest, exposure dispatch, +CLI) is shared. See ``docs/ltv/design.md`` §2.5. + +A scheme is a small object registered by ``name`` in :data:`SCHEME_REGISTRY` +and resolved via :func:`get_scheme`. The recipe declares which scheme it runs +via its ``scheme:`` field; :class:`~leadforge.api.generator.Generator` looks the +scheme up and runs its pipeline rather than branching on a recipe type. + +Scope note +---------- +This protocol currently covers the *generation* half (population + simulation) +that flows through ``Generator.generate()``. Render dispatch (``to_dataframes`` +/ snapshots / task splits) is added to the protocol as the lifecycle scheme is +built out (see ``docs/ltv/roadmap.md`` — LTV-M6); today the bundle writer still +calls the lead-scoring render functions directly. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Protocol, runtime_checkable + +from leadforge.core.exceptions import LeadforgeError + +if TYPE_CHECKING: + from leadforge.core.models import GenerationConfig + from leadforge.narrative.spec import NarrativeSpec + from leadforge.simulation.engine import SimulationResult + from leadforge.simulation.population import PopulationResult + from leadforge.structure.graph import WorldGraph + + +class UnknownSchemeError(LeadforgeError): + """Raised when a generation-scheme name is not present in the registry.""" + + +@runtime_checkable +class GenerationScheme(Protocol): + """One end-to-end dataset generation pipeline shape. + + Implementations are registered by :attr:`name` and resolved at generation + time. The two methods below are the generation half of the pipeline; both + must be deterministic given ``(config, ...)`` per the package's RNG + contract. + """ + + name: str + + def build_population( + self, + config: GenerationConfig, + narrative: NarrativeSpec, + world_graph: WorldGraph, + *, + category_latent_correlations: dict | None = None, + ) -> PopulationResult: + """Generate the scheme's base population (entities + latent state).""" + ... + + def simulate( + self, + config: GenerationConfig, + population: PopulationResult, + world_graph: WorldGraph, + *, + latent_touch_intensity: bool = False, + ) -> SimulationResult: + """Run the scheme's simulation over *population*, returning event tables.""" + ... + + +# Name → scheme instance. Populated by importing ``leadforge.schemes`` (its +# package ``__init__`` imports each built-in scheme module, which self-register). +SCHEME_REGISTRY: dict[str, GenerationScheme] = {} + + +def register_scheme(scheme: GenerationScheme) -> None: + """Register *scheme* under its ``name``. + + Idempotent for the same instance; raises if a *different* scheme is already + registered under the same name (guards against accidental shadowing). + """ + name = scheme.name + existing = SCHEME_REGISTRY.get(name) + if existing is not None and existing is not scheme: + raise ValueError(f"A different generation scheme named {name!r} is already registered") + SCHEME_REGISTRY[name] = scheme + + +def get_scheme(name: str) -> GenerationScheme: + """Return the registered scheme named *name*. + + Raises: + UnknownSchemeError: if no scheme is registered under *name*. + """ + try: + return SCHEME_REGISTRY[name] + except KeyError: + raise UnknownSchemeError( + f"Unknown generation scheme {name!r}. Registered schemes: {sorted(SCHEME_REGISTRY)}" + ) from None + + +def available_schemes() -> tuple[str, ...]: + """Return the names of all registered schemes, sorted.""" + return tuple(sorted(SCHEME_REGISTRY)) diff --git a/leadforge/schemes/lead_scoring/__init__.py b/leadforge/schemes/lead_scoring/__init__.py new file mode 100644 index 0000000..092942c --- /dev/null +++ b/leadforge/schemes/lead_scoring/__init__.py @@ -0,0 +1,68 @@ +"""The ``lead_scoring`` generation scheme. + +Wraps the existing population + simulation pipeline as a registered +:class:`~leadforge.schemes.base.GenerationScheme`. This is the first scheme +extracted (LTV-Pd) and is the trunk that the lifecycle scheme parallels. + +The implementation modules (``population``, ``engine``, mechanisms, structure, +render) still live under their original package paths; they are physically +relocated into this package in LTV-Pe. Until then the methods delegate to the +current homes, keeping the lead-scoring bundle's output byte-for-byte identical. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from leadforge.schemes.base import register_scheme + +if TYPE_CHECKING: + from leadforge.core.models import GenerationConfig + from leadforge.narrative.spec import NarrativeSpec + from leadforge.simulation.engine import SimulationResult + from leadforge.simulation.population import PopulationResult + from leadforge.structure.graph import WorldGraph + + +class LeadScoringScheme: + """The lead-scoring (``converted_within_90_days``) generation pipeline.""" + + name = "lead_scoring" + + def build_population( + self, + config: GenerationConfig, + narrative: NarrativeSpec, + world_graph: WorldGraph, + *, + category_latent_correlations: dict | None = None, + ) -> PopulationResult: + from leadforge.simulation.population import build_population + + return build_population( + config, + narrative, + world_graph, + category_latent_correlations=category_latent_correlations, + ) + + def simulate( + self, + config: GenerationConfig, + population: PopulationResult, + world_graph: WorldGraph, + *, + latent_touch_intensity: bool = False, + ) -> SimulationResult: + from leadforge.simulation.engine import simulate_world + + return simulate_world( + config, + population, + world_graph, + latent_touch_intensity=latent_touch_intensity, + ) + + +LEAD_SCORING_SCHEME = LeadScoringScheme() +register_scheme(LEAD_SCORING_SCHEME) diff --git a/tests/schemes/__init__.py b/tests/schemes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/schemes/test_registry.py b/tests/schemes/test_registry.py new file mode 100644 index 0000000..3262924 --- /dev/null +++ b/tests/schemes/test_registry.py @@ -0,0 +1,127 @@ +"""Tests for the generation-scheme registry and Generator dispatch (LTV-Pd).""" + +import pytest + +from leadforge.api.generator import Generator +from leadforge.api.recipes import Recipe +from leadforge.core.exceptions import InvalidRecipeError +from leadforge.core.models import WorldSpec +from leadforge.schemes import ( + GenerationScheme, + UnknownSchemeError, + available_schemes, + get_scheme, + register_scheme, +) +from leadforge.schemes.lead_scoring import LEAD_SCORING_SCHEME, LeadScoringScheme + +# --------------------------------------------------------------------------- +# Registry +# --------------------------------------------------------------------------- + + +def test_lead_scoring_registered() -> None: + assert "lead_scoring" in available_schemes() + assert get_scheme("lead_scoring") is LEAD_SCORING_SCHEME + + +def test_lead_scoring_scheme_name() -> None: + assert LEAD_SCORING_SCHEME.name == "lead_scoring" + + +def test_lead_scoring_satisfies_protocol() -> None: + # runtime_checkable Protocol — structural check. + assert isinstance(LEAD_SCORING_SCHEME, GenerationScheme) + + +def test_get_unknown_scheme_raises() -> None: + with pytest.raises(UnknownSchemeError, match="does_not_exist"): + get_scheme("does_not_exist") + + +def test_register_same_instance_is_idempotent() -> None: + register_scheme(LEAD_SCORING_SCHEME) # already registered; must not raise + assert get_scheme("lead_scoring") is LEAD_SCORING_SCHEME + + +def test_register_conflicting_name_raises() -> None: + clash = LeadScoringScheme() # same name, different instance + with pytest.raises(ValueError, match="already registered"): + register_scheme(clash) + + +def test_available_schemes_sorted_tuple() -> None: + names = available_schemes() + assert isinstance(names, tuple) + assert list(names) == sorted(names) + + +# --------------------------------------------------------------------------- +# Recipe.scheme field +# --------------------------------------------------------------------------- + + +def _minimal_recipe_dict(**extra: object) -> dict: + base = { + "id": "test_recipe", + "title": "Test", + "vertical": "test", + "description": "test recipe", + "primary_task": "converted_within_90_days", + "supported_modes": ["student_public"], + "supported_difficulty": ["intro"], + "default_population": {"n_accounts": 10, "n_contacts": 20, "n_leads": 30}, + "horizon_days": 90, + } + base.update(extra) + return base + + +def test_recipe_scheme_defaults_to_lead_scoring() -> None: + recipe = Recipe.from_dict(_minimal_recipe_dict()) + assert recipe.scheme == "lead_scoring" + + +def test_recipe_scheme_parsed_when_present() -> None: + recipe = Recipe.from_dict(_minimal_recipe_dict(scheme="lifecycle")) + assert recipe.scheme == "lifecycle" + + +def test_recipe_scheme_rejects_empty() -> None: + with pytest.raises(InvalidRecipeError, match="scheme"): + Recipe.from_dict(_minimal_recipe_dict(scheme="")) + + +def test_recipe_scheme_rejects_non_string() -> None: + with pytest.raises(InvalidRecipeError, match="scheme"): + Recipe.from_dict(_minimal_recipe_dict(scheme=123)) + + +# --------------------------------------------------------------------------- +# WorldSpec + Generator threading +# --------------------------------------------------------------------------- + + +def test_world_spec_scheme_defaults_to_lead_scoring() -> None: + assert WorldSpec().scheme == "lead_scoring" + + +def test_from_recipe_sets_scheme_on_world_spec() -> None: + gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=42) + assert gen.world_spec.scheme == "lead_scoring" + + +def test_generate_runs_through_registered_scheme() -> None: + gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=42) + bundle = gen.generate(n_accounts=20, n_contacts=40, n_leads=60, difficulty="intro") + assert bundle.population is not None + assert bundle.simulation_result is not None + assert len(bundle.population.leads) == 60 + + +def test_generate_unknown_scheme_raises() -> None: + gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=42) + # Force an unregistered scheme onto the world spec to prove dispatch is live. + gen.world_spec.scheme = "nope" + with pytest.raises(UnknownSchemeError): + gen.generate(n_accounts=10, n_contacts=20, n_leads=30, difficulty="intro") From e39d2b3ce50dd9a8d21c1bead531d97dc74bd72c Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Wed, 10 Jun 2026 11:06:00 +0300 Subject: [PATCH 2/3] docs(ltv): record LTV-Pd (#107) in roadmap + agent-plan [LTV-Pd] Check off LTV-Pd, link PR #107, note the byte-identical verification, and advance the agent-plan status to LTV-Pe next. Co-Authored-By: Claude Opus 4.8 --- .agent-plan.md | 12 ++++++------ docs/ltv/roadmap.md | 13 +++++++------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/.agent-plan.md b/.agent-plan.md index c7dcc8d..8b325b2 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -39,12 +39,12 @@ bundle schema version 5 → 6; **peer generation-scheme abstraction extracted early against the known-good lead-scoring path + physical reorg into `schemes/`**. (Framing follows Google `lifetime_value`/ZILN and Voyantis pLTV.) -Status: `LTV-M0` planning landed (#102) + reframed to pLTV regression (#103) + -reframed to peer schemes. `LTV-M1`: `LTV-Pb` (lifecycle entity rows + -registries) opened as **#104** (awaiting review). Next is `LTV-M2` -(`GenerationScheme` abstraction + physical reorg, `LTV-Pd…Pf`) — can start in -parallel since it only touches the existing lead-scoring path; `LTV-Pc` -(pLTV feature/task specs) also outstanding in `LTV-M1`. +Status: `LTV-M0` landed (#102, #103, #106). `LTV-M1`: `LTV-Pb` merged (#104); +`LTV-Pc` (pLTV feature/task specs) still outstanding. `LTV-M2`: `LTV-Pd` +(`GenerationScheme` protocol + registry) opened as **#107** (awaiting review, +verified byte-identical). Next in M2: `LTV-Pe` (physically move lead-scoring +pipeline into `schemes/lead_scoring/`), then `LTV-Pf` (scaffold +`schemes/lifecycle/`). --- diff --git a/docs/ltv/roadmap.md b/docs/ltv/roadmap.md index a7481f5..5955698 100644 --- a/docs/ltv/roadmap.md +++ b/docs/ltv/roadmap.md @@ -88,12 +88,13 @@ Total: ~18 PRs across 9 milestones. > keeps lead-scoring output byte-identical (hash-determinism) and the public > API stable. -- [ ] **`LTV-Pd`** — `refactor(api): GenerationScheme protocol + registry`. - Add `schemes/base.py` (`GenerationScheme` protocol + `SCHEME_REGISTRY`). Wrap - the **existing** lead-scoring pipeline as `LeadScoringScheme` *in place* (no - file moves yet); route `Generator.generate()` through the registry; recipes - gain a `scheme:` field (defaulting to `lead_scoring`). Output byte-identical. - - Tests: registry lookup, dispatch, hash-determinism, full suite green. +- [x] **`LTV-Pd`** — `refactor(api): GenerationScheme protocol + registry` + (**PR #107**). Added `schemes/base.py` (`GenerationScheme` protocol + + `SCHEME_REGISTRY`) and `schemes/lead_scoring/` wrapping the existing pipeline + *in place*; `Generator.generate()` routes through the registry; `Recipe` and + `WorldSpec` gain a `scheme` field (default `lead_scoring`). Verified + byte-identical (all 14 files of a pinned-timestamp bundle hash identically, + main vs branch). - Labels: `type: refactor`, `layer: api`, `layer: core` - [ ] **`LTV-Pe`** — `refactor: move lead-scoring pipeline to schemes/lead_scoring/`. Physically relocate the lead-scoring population/engine/state/mechanisms/ From 8d29cf60cfbcda84664a1e9e5857bf2aea466edd Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Wed, 10 Jun 2026 11:16:56 +0300 Subject: [PATCH 3/3] refactor(api): raise the scheme seam to build_world (self-review) [LTV-Pd] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acts on a hostile self-review of the first revision. The original cut the abstraction at build_population/simulate with lead-scoring-shaped signatures (world_graph, category_latent_correlations, latent_touch_intensity, PopulationResult/SimulationResult), leaving graph sampling, difficulty interpretation, and bundle assembly hardcoded in Generator.generate(). That seam could not accommodate a non-lead-scoring scheme — defeating the point of extracting it early. Changes: - GenerationScheme protocol is now a single `build_world(config, narrative, **options) -> WorldBundle`. No lead-scoring types leak into the contract; scheme-specific flags ride through **options. - LeadScoringScheme.build_world owns the whole lead-scoring pipeline: hidden-DAG sampling, difficulty-profile → DifficultyParams + category-latent correlations (extracted to _resolve_difficulty), population, simulation, and WorldBundle assembly. Generator.generate() is now scheme-agnostic (override resolution + dispatch only). - Fix dropped-scheme bug: build_world sets spec.scheme=self.name, so bundle.spec.scheme reflects the actual scheme (was always the default). - DEFAULT_SCHEME constant in core.models removes the duplicated "lead_scoring" literal across Recipe/WorldSpec; a test guards LeadScoringScheme.name == DEFAULT_SCHEME. - Tests: regression for bundle.spec.scheme, determinism through the scheme path, name/default drift guard; protocol-conformance test annotated as the weak structural check it is. Verified still byte-identical to main (14/14 files of a pinned-timestamp bundle). Full suite 1498 passed / 51 skipped; ruff + mypy clean. Co-Authored-By: Claude Opus 4.8 --- leadforge/api/generator.py | 72 +----------- leadforge/api/recipes.py | 11 +- leadforge/core/models.py | 9 +- leadforge/schemes/base.py | 62 ++++++----- leadforge/schemes/lead_scoring/__init__.py | 122 ++++++++++++++++----- tests/schemes/test_registry.py | 43 +++++++- 6 files changed, 184 insertions(+), 135 deletions(-) diff --git a/leadforge/api/generator.py b/leadforge/api/generator.py index a45790e..ec6294e 100644 --- a/leadforge/api/generator.py +++ b/leadforge/api/generator.py @@ -5,7 +5,7 @@ from typing import Any from leadforge.core.enums import DifficultyProfile, ExposureMode -from leadforge.core.models import DifficultyParams, GenerationConfig, WorldBundle, WorldSpec +from leadforge.core.models import GenerationConfig, WorldBundle, WorldSpec from leadforge.core.rng import RNGRoot from leadforge.core.sentinels import _MISSING @@ -152,7 +152,6 @@ def generate( import dataclasses from leadforge.schemes import get_scheme - from leadforge.structure.sampler import sample_hidden_graph config = self._world_spec.config @@ -179,70 +178,7 @@ def generate( "Generator.from_recipe() to resolve the narrative." ) - rng_root = RNGRoot(config.seed) - world_graph = sample_hidden_graph(rng_root) - - # Load category-latent correlations from difficulty profile if available. - from leadforge.api.recipes import Recipe - from leadforge.recipes.registry import load_recipe - - category_latent_correlations = None - try: - raw = load_recipe(config.recipe_id) - recipe = Recipe.from_dict(raw) - profiles = recipe.load_difficulty_profiles() - profile = profiles.get(config.difficulty.value, {}) - category_latent_correlations = profile.get("category_latent_correlations") - - # Construct DifficultyParams from profile and attach to config. - # All keys are required — a missing key indicates a malformed profile - # YAML and should fail loudly rather than silently defaulting. - required_keys = ( - "signal_strength", - "noise_scale", - "missing_rate", - "outlier_rate", - "conversion_rate_range", - "committee_friction", - ) - missing = [k for k in required_keys if k not in profile] - if missing: - from leadforge.core.exceptions import InvalidRecipeError - - raise InvalidRecipeError( - f"Difficulty profile '{config.difficulty.value}' is missing " - f"required keys: {missing}" - ) - cr_range = profile["conversion_rate_range"] - difficulty_params = DifficultyParams( - signal_strength=profile["signal_strength"], - noise_scale=profile["noise_scale"], - missing_rate=profile["missing_rate"], - outlier_rate=profile["outlier_rate"], - conversion_rate_lo=cr_range[0], - conversion_rate_hi=cr_range[1], - committee_friction=profile["committee_friction"], - ) - config = dataclasses.replace(config, difficulty_params=difficulty_params) - except (FileNotFoundError, KeyError): - category_latent_correlations = None - + # Dispatch to the scheme: it owns structure sampling, difficulty + # interpretation, population, simulation, and bundle assembly. scheme = get_scheme(self._world_spec.scheme) - population = scheme.build_population( - config, - narrative, - world_graph, - category_latent_correlations=category_latent_correlations, - ) - latent_touch_intensity = kwargs.pop("latent_touch_intensity", False) - result = scheme.simulate( - config, population, world_graph, latent_touch_intensity=latent_touch_intensity - ) - - spec = WorldSpec(config=config, narrative=narrative) - return WorldBundle( - spec=spec, - population=population, - simulation_result=result, - world_graph=world_graph, - ) + return scheme.build_world(config, narrative, **kwargs) diff --git a/leadforge/api/recipes.py b/leadforge/api/recipes.py index 353790b..7ecad19 100644 --- a/leadforge/api/recipes.py +++ b/leadforge/api/recipes.py @@ -21,6 +21,7 @@ from leadforge.core.enums import DifficultyProfile, ExposureMode from leadforge.core.exceptions import InvalidRecipeError +from leadforge.core.models import DEFAULT_SCHEME from leadforge.core.sentinels import _MISSING from leadforge.core.serialization import load_yaml @@ -43,8 +44,8 @@ class Recipe: label_window_days: int | None = None snapshot_day: int | None = None # Which generation scheme this recipe runs (see leadforge.schemes). - # Defaults to "lead_scoring" so existing recipes need no change. - scheme: str = "lead_scoring" + # Defaults to the lead-scoring scheme so existing recipes need no change. + scheme: str = DEFAULT_SCHEME # ------------------------------------------------------------------ # # Construction @@ -122,11 +123,9 @@ def from_dict(cls, data: dict[str, Any]) -> Recipe: ) snapshot_day = raw_sd - scheme = data.get("scheme", "lead_scoring") + scheme = data.get("scheme", DEFAULT_SCHEME) if not isinstance(scheme, str) or not scheme: - raise InvalidRecipeError( - f"'scheme' must be a non-empty string, got {scheme!r}" - ) + raise InvalidRecipeError(f"'scheme' must be a non-empty string, got {scheme!r}") return cls( id=data["id"], diff --git a/leadforge/core/models.py b/leadforge/core/models.py index dea120d..e7acbe3 100644 --- a/leadforge/core/models.py +++ b/leadforge/core/models.py @@ -16,6 +16,13 @@ from leadforge.structure.graph import WorldGraph +# Default generation scheme when a recipe/world does not declare one. Kept here +# (the shared core layer) because ``leadforge.core`` must not import +# ``leadforge.schemes`` (the scheme package depends on core, not the reverse). +# ``LeadScoringScheme.name`` must equal this value; a test guards the match. +DEFAULT_SCHEME = "lead_scoring" + + @dataclass(frozen=True) class DifficultyParams: """Numeric parameters from a difficulty profile. @@ -146,7 +153,7 @@ class WorldSpec: narrative: NarrativeSpec | None = None # Generation scheme this world runs (see leadforge.schemes). Defaults to # the lead-scoring pipeline so direct WorldSpec construction is unchanged. - scheme: str = "lead_scoring" + scheme: str = DEFAULT_SCHEME @dataclass diff --git a/leadforge/schemes/base.py b/leadforge/schemes/base.py index 745c713..b0dd9da 100644 --- a/leadforge/schemes/base.py +++ b/leadforge/schemes/base.py @@ -11,27 +11,37 @@ via its ``scheme:`` field; :class:`~leadforge.api.generator.Generator` looks the scheme up and runs its pipeline rather than branching on a recipe type. +Where the seam sits +------------------- +A scheme owns the **whole** generation pipeline from ``(config, narrative)`` to +in-memory world artifacts: structure/graph sampling, difficulty interpretation, +population, simulation, and :class:`~leadforge.core.models.WorldBundle` +assembly. These steps differ between schemes (the lead-scoring hidden DAG, +``DifficultyParams``, and touch emission are all lead-scoring-specific), so the +boundary is the single :meth:`GenerationScheme.build_world` method rather than a +set of lead-scoring-shaped sub-steps. This keeps +:meth:`~leadforge.api.generator.Generator.generate` genuinely scheme-agnostic. + +Scheme-specific options are passed through ``Generator.generate(**kwargs)`` to +``build_world`` and consumed by the scheme that understands them (e.g. +``latent_touch_intensity`` for lead scoring). + Scope note ---------- -This protocol currently covers the *generation* half (population + simulation) -that flows through ``Generator.generate()``. Render dispatch (``to_dataframes`` -/ snapshots / task splits) is added to the protocol as the lifecycle scheme is -built out (see ``docs/ltv/roadmap.md`` — LTV-M6); today the bundle writer still -calls the lead-scoring render functions directly. +Render dispatch (``to_dataframes`` / snapshots / task splits, currently in +``WorldBundle.save`` → the bundle writer) is folded into the scheme as the +lifecycle scheme is built out (see ``docs/ltv/roadmap.md`` — LTV-M6). """ from __future__ import annotations -from typing import TYPE_CHECKING, Protocol, runtime_checkable +from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable from leadforge.core.exceptions import LeadforgeError if TYPE_CHECKING: - from leadforge.core.models import GenerationConfig + from leadforge.core.models import GenerationConfig, WorldBundle from leadforge.narrative.spec import NarrativeSpec - from leadforge.simulation.engine import SimulationResult - from leadforge.simulation.population import PopulationResult - from leadforge.structure.graph import WorldGraph class UnknownSchemeError(LeadforgeError): @@ -43,33 +53,25 @@ class GenerationScheme(Protocol): """One end-to-end dataset generation pipeline shape. Implementations are registered by :attr:`name` and resolved at generation - time. The two methods below are the generation half of the pipeline; both - must be deterministic given ``(config, ...)`` per the package's RNG - contract. + time. :meth:`build_world` must be deterministic given ``(config, + narrative, options)`` per the package's RNG contract. """ name: str - def build_population( + def build_world( self, config: GenerationConfig, narrative: NarrativeSpec, - world_graph: WorldGraph, - *, - category_latent_correlations: dict | None = None, - ) -> PopulationResult: - """Generate the scheme's base population (entities + latent state).""" - ... - - def simulate( - self, - config: GenerationConfig, - population: PopulationResult, - world_graph: WorldGraph, - *, - latent_touch_intensity: bool = False, - ) -> SimulationResult: - """Run the scheme's simulation over *population*, returning event tables.""" + **options: Any, + ) -> WorldBundle: + """Run the scheme's full pipeline and return an in-memory bundle. + + Implementations own structure sampling, difficulty interpretation, + population, simulation, and bundle assembly. ``options`` carries + scheme-specific flags forwarded from ``Generator.generate(**kwargs)``; + a scheme ignores options it does not recognise. + """ ... diff --git a/leadforge/schemes/lead_scoring/__init__.py b/leadforge/schemes/lead_scoring/__init__.py index 092942c..7f3cc65 100644 --- a/leadforge/schemes/lead_scoring/__init__.py +++ b/leadforge/schemes/lead_scoring/__init__.py @@ -1,27 +1,27 @@ """The ``lead_scoring`` generation scheme. -Wraps the existing population + simulation pipeline as a registered -:class:`~leadforge.schemes.base.GenerationScheme`. This is the first scheme -extracted (LTV-Pd) and is the trunk that the lifecycle scheme parallels. +Owns the lead-scoring pipeline — hidden-DAG sampling, difficulty interpretation, +population, simulation, and bundle assembly — behind the single +:meth:`~leadforge.schemes.base.GenerationScheme.build_world` entry point. This +is the first scheme extracted (LTV-Pd) and the trunk the lifecycle scheme +parallels. The implementation modules (``population``, ``engine``, mechanisms, structure, render) still live under their original package paths; they are physically -relocated into this package in LTV-Pe. Until then the methods delegate to the +relocated into this package in LTV-Pe. Until then ``build_world`` calls the current homes, keeping the lead-scoring bundle's output byte-for-byte identical. """ from __future__ import annotations -from typing import TYPE_CHECKING +import dataclasses +from typing import TYPE_CHECKING, Any from leadforge.schemes.base import register_scheme if TYPE_CHECKING: - from leadforge.core.models import GenerationConfig + from leadforge.core.models import GenerationConfig, WorldBundle from leadforge.narrative.spec import NarrativeSpec - from leadforge.simulation.engine import SimulationResult - from leadforge.simulation.population import PopulationResult - from leadforge.structure.graph import WorldGraph class LeadScoringScheme: @@ -29,40 +29,108 @@ class LeadScoringScheme: name = "lead_scoring" - def build_population( + def build_world( self, config: GenerationConfig, narrative: NarrativeSpec, - world_graph: WorldGraph, - *, - category_latent_correlations: dict | None = None, - ) -> PopulationResult: + **options: Any, + ) -> WorldBundle: + """Sample the hidden world, build the population, simulate, and assemble. + + Recognised ``options``: + latent_touch_intensity (bool): use the latent-driven touch + intensity mechanism instead of recency decay. Default ``False``. + """ + from leadforge.core.models import WorldBundle, WorldSpec + from leadforge.core.rng import RNGRoot + from leadforge.simulation.engine import simulate_world from leadforge.simulation.population import build_population + from leadforge.structure.sampler import sample_hidden_graph + + latent_touch_intensity = bool(options.get("latent_touch_intensity", False)) - return build_population( + rng_root = RNGRoot(config.seed) + world_graph = sample_hidden_graph(rng_root) + + config, category_latent_correlations = self._resolve_difficulty(config) + + population = build_population( config, narrative, world_graph, category_latent_correlations=category_latent_correlations, ) - - def simulate( - self, - config: GenerationConfig, - population: PopulationResult, - world_graph: WorldGraph, - *, - latent_touch_intensity: bool = False, - ) -> SimulationResult: - from leadforge.simulation.engine import simulate_world - - return simulate_world( + result = simulate_world( config, population, world_graph, latent_touch_intensity=latent_touch_intensity, ) + spec = WorldSpec(config=config, narrative=narrative, scheme=self.name) + return WorldBundle( + spec=spec, + population=population, + simulation_result=result, + world_graph=world_graph, + ) + + @staticmethod + def _resolve_difficulty( + config: GenerationConfig, + ) -> tuple[GenerationConfig, dict | None]: + """Attach :class:`DifficultyParams` to *config* and return category-latent + correlations from the active difficulty profile. + + Returns ``(config, None)`` unchanged if the recipe has no + difficulty-profiles file (e.g. ad-hoc configs in tests). + """ + from leadforge.api.recipes import Recipe + from leadforge.core.models import DifficultyParams + from leadforge.recipes.registry import load_recipe + + try: + raw = load_recipe(config.recipe_id) + recipe = Recipe.from_dict(raw) + profiles = recipe.load_difficulty_profiles() + except (FileNotFoundError, KeyError): + return config, None + + profile = profiles.get(config.difficulty.value, {}) + category_latent_correlations = profile.get("category_latent_correlations") + + # All keys are required — a missing key indicates a malformed profile + # YAML and should fail loudly rather than silently defaulting. + required_keys = ( + "signal_strength", + "noise_scale", + "missing_rate", + "outlier_rate", + "conversion_rate_range", + "committee_friction", + ) + missing = [k for k in required_keys if k not in profile] + if missing: + from leadforge.core.exceptions import InvalidRecipeError + + raise InvalidRecipeError( + f"Difficulty profile '{config.difficulty.value}' is missing " + f"required keys: {missing}" + ) + cr_range = profile["conversion_rate_range"] + difficulty_params = DifficultyParams( + signal_strength=profile["signal_strength"], + noise_scale=profile["noise_scale"], + missing_rate=profile["missing_rate"], + outlier_rate=profile["outlier_rate"], + conversion_rate_lo=cr_range[0], + conversion_rate_hi=cr_range[1], + committee_friction=profile["committee_friction"], + ) + return dataclasses.replace(config, difficulty_params=difficulty_params), ( + category_latent_correlations + ) + LEAD_SCORING_SCHEME = LeadScoringScheme() register_scheme(LEAD_SCORING_SCHEME) diff --git a/tests/schemes/test_registry.py b/tests/schemes/test_registry.py index 3262924..ad129e7 100644 --- a/tests/schemes/test_registry.py +++ b/tests/schemes/test_registry.py @@ -5,7 +5,7 @@ from leadforge.api.generator import Generator from leadforge.api.recipes import Recipe from leadforge.core.exceptions import InvalidRecipeError -from leadforge.core.models import WorldSpec +from leadforge.core.models import DEFAULT_SCHEME, WorldSpec from leadforge.schemes import ( GenerationScheme, UnknownSchemeError, @@ -15,6 +15,8 @@ ) from leadforge.schemes.lead_scoring import LEAD_SCORING_SCHEME, LeadScoringScheme +_SMALL = {"n_accounts": 20, "n_contacts": 40, "n_leads": 60, "difficulty": "intro"} + # --------------------------------------------------------------------------- # Registry # --------------------------------------------------------------------------- @@ -29,9 +31,18 @@ def test_lead_scoring_scheme_name() -> None: assert LEAD_SCORING_SCHEME.name == "lead_scoring" +def test_lead_scoring_name_matches_default_scheme() -> None: + # DEFAULT_SCHEME (core) and LeadScoringScheme.name (schemes) are declared in + # separate layers; guard against drift. + assert LeadScoringScheme.name == DEFAULT_SCHEME + + def test_lead_scoring_satisfies_protocol() -> None: - # runtime_checkable Protocol — structural check. + # runtime_checkable Protocol checks attribute *names* only (name, + # build_world), not signatures — a weak structural check, not full + # conformance. End-to-end behaviour is covered by the generate() tests. assert isinstance(LEAD_SCORING_SCHEME, GenerationScheme) + assert callable(LEAD_SCORING_SCHEME.build_world) def test_get_unknown_scheme_raises() -> None: @@ -113,12 +124,38 @@ def test_from_recipe_sets_scheme_on_world_spec() -> None: def test_generate_runs_through_registered_scheme() -> None: gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=42) - bundle = gen.generate(n_accounts=20, n_contacts=40, n_leads=60, difficulty="intro") + bundle = gen.generate(**_SMALL) assert bundle.population is not None assert bundle.simulation_result is not None assert len(bundle.population.leads) == 60 +def test_generate_records_scheme_on_bundle_spec() -> None: + # Regression: generate() must thread the scheme through to the returned + # bundle's spec (an earlier revision rebuilt WorldSpec without it, so + # bundle.spec.scheme silently fell back to the default). + gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=42) + bundle = gen.generate(**_SMALL) + assert bundle.spec.scheme == "lead_scoring" + + +def test_generate_is_deterministic_through_scheme() -> None: + # Locks the byte-identity intent of LTV-Pd: the scheme path is deterministic + # given (recipe, config, seed). + a = Generator.from_recipe("b2b_saas_procurement_v1", seed=42).generate(**_SMALL) + b = Generator.from_recipe("b2b_saas_procurement_v1", seed=42).generate(**_SMALL) + assert a.simulation_result is not None + assert b.simulation_result is not None + lead_outcomes_a = { + lead.lead_id: lead.converted_within_90_days for lead in a.simulation_result.leads + } + lead_outcomes_b = { + lead.lead_id: lead.converted_within_90_days for lead in b.simulation_result.leads + } + assert lead_outcomes_a == lead_outcomes_b + assert len(a.simulation_result.touches) == len(b.simulation_result.touches) + + def test_generate_unknown_scheme_raises() -> None: gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=42) # Force an unregistered scheme onto the world spec to prove dispatch is live.