Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .agent-plan.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ bundle schema version 5 → 6; **peer generation-scheme abstraction extracted
early against the known-good lead-scoring path + physical reorg into
`schemes/`**. (Framing follows Google `lifetime_value`/ZILN and Voyantis pLTV.)

Status: `LTV-M0` planning landed (#102) + reframed to pLTV regression (#103) +
reframed to peer schemes. `LTV-M1`: `LTV-Pb` (lifecycle entity rows +
registries) opened as **#104** (awaiting review). Next is `LTV-M2`
(`GenerationScheme` abstraction + physical reorg, `LTV-Pd…Pf`) — can start in
parallel since it only touches the existing lead-scoring path; `LTV-Pc`
(pLTV feature/task specs) also outstanding in `LTV-M1`.
Status: `LTV-M0` landed (#102, #103, #106). `LTV-M1`: `LTV-Pb` merged (#104);
`LTV-Pc` (pLTV feature/task specs) still outstanding. `LTV-M2`: `LTV-Pd`
(`GenerationScheme` protocol + registry) opened as **#107** (awaiting review,
verified byte-identical). Next in M2: `LTV-Pe` (physically move lead-scoring
pipeline into `schemes/lead_scoring/`), then `LTV-Pf` (scaffold
`schemes/lifecycle/`).

---

Expand Down
13 changes: 7 additions & 6 deletions docs/ltv/roadmap.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,13 @@ Total: ~18 PRs across 9 milestones.
> keeps lead-scoring output byte-identical (hash-determinism) and the public
> API stable.

- [ ] **`LTV-Pd`** — `refactor(api): GenerationScheme protocol + registry`.
Add `schemes/base.py` (`GenerationScheme` protocol + `SCHEME_REGISTRY`). Wrap
the **existing** lead-scoring pipeline as `LeadScoringScheme` *in place* (no
file moves yet); route `Generator.generate()` through the registry; recipes
gain a `scheme:` field (defaulting to `lead_scoring`). Output byte-identical.
- Tests: registry lookup, dispatch, hash-determinism, full suite green.
- [x] **`LTV-Pd`** — `refactor(api): GenerationScheme protocol + registry`
(**PR #107**). Added `schemes/base.py` (`GenerationScheme` protocol +
`SCHEME_REGISTRY`) and `schemes/lead_scoring/` wrapping the existing pipeline
*in place*; `Generator.generate()` routes through the registry; `Recipe` and
`WorldSpec` gain a `scheme` field (default `lead_scoring`). Verified
byte-identical (all 14 files of a pinned-timestamp bundle hash identically,
main vs branch).
- Labels: `type: refactor`, `layer: api`, `layer: core`
- [ ] **`LTV-Pe`** — `refactor: move lead-scoring pipeline to schemes/lead_scoring/`.
Physically relocate the lead-scoring population/engine/state/mechanisms/
Expand Down
78 changes: 7 additions & 71 deletions leadforge/api/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import Any

from leadforge.core.enums import DifficultyProfile, ExposureMode
from leadforge.core.models import DifficultyParams, GenerationConfig, WorldBundle, WorldSpec
from leadforge.core.models import GenerationConfig, WorldBundle, WorldSpec
from leadforge.core.rng import RNGRoot
from leadforge.core.sentinels import _MISSING

Expand Down Expand Up @@ -117,7 +117,7 @@ def from_recipe(

narrative_data = recipe.load_narrative()
narrative = NarrativeSpec.from_dict(narrative_data) if narrative_data else None
world_spec = WorldSpec(config=config, narrative=narrative)
world_spec = WorldSpec(config=config, narrative=narrative, scheme=recipe.scheme)

return cls(world_spec)

Expand Down Expand Up @@ -151,9 +151,7 @@ def generate(
"""
import dataclasses

from leadforge.simulation.engine import simulate_world
from leadforge.simulation.population import build_population
from leadforge.structure.sampler import sample_hidden_graph
from leadforge.schemes import get_scheme

config = self._world_spec.config

Expand All @@ -180,69 +178,7 @@ def generate(
"Generator.from_recipe() to resolve the narrative."
)

rng_root = RNGRoot(config.seed)
world_graph = sample_hidden_graph(rng_root)

# Load category-latent correlations from difficulty profile if available.
from leadforge.api.recipes import Recipe
from leadforge.recipes.registry import load_recipe

category_latent_correlations = None
try:
raw = load_recipe(config.recipe_id)
recipe = Recipe.from_dict(raw)
profiles = recipe.load_difficulty_profiles()
profile = profiles.get(config.difficulty.value, {})
category_latent_correlations = profile.get("category_latent_correlations")

# Construct DifficultyParams from profile and attach to config.
# All keys are required — a missing key indicates a malformed profile
# YAML and should fail loudly rather than silently defaulting.
required_keys = (
"signal_strength",
"noise_scale",
"missing_rate",
"outlier_rate",
"conversion_rate_range",
"committee_friction",
)
missing = [k for k in required_keys if k not in profile]
if missing:
from leadforge.core.exceptions import InvalidRecipeError

raise InvalidRecipeError(
f"Difficulty profile '{config.difficulty.value}' is missing "
f"required keys: {missing}"
)
cr_range = profile["conversion_rate_range"]
difficulty_params = DifficultyParams(
signal_strength=profile["signal_strength"],
noise_scale=profile["noise_scale"],
missing_rate=profile["missing_rate"],
outlier_rate=profile["outlier_rate"],
conversion_rate_lo=cr_range[0],
conversion_rate_hi=cr_range[1],
committee_friction=profile["committee_friction"],
)
config = dataclasses.replace(config, difficulty_params=difficulty_params)
except (FileNotFoundError, KeyError):
category_latent_correlations = None

population = build_population(
config,
narrative,
world_graph,
category_latent_correlations=category_latent_correlations,
)
latent_touch_intensity = kwargs.pop("latent_touch_intensity", False)
result = simulate_world(
config, population, world_graph, latent_touch_intensity=latent_touch_intensity
)

spec = WorldSpec(config=config, narrative=narrative)
return WorldBundle(
spec=spec,
population=population,
simulation_result=result,
world_graph=world_graph,
)
# Dispatch to the scheme: it owns structure sampling, difficulty
# interpretation, population, simulation, and bundle assembly.
scheme = get_scheme(self._world_spec.scheme)
return scheme.build_world(config, narrative, **kwargs)
9 changes: 9 additions & 0 deletions leadforge/api/recipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from leadforge.core.enums import DifficultyProfile, ExposureMode
from leadforge.core.exceptions import InvalidRecipeError
from leadforge.core.models import DEFAULT_SCHEME
from leadforge.core.sentinels import _MISSING
from leadforge.core.serialization import load_yaml

Expand All @@ -42,6 +43,9 @@ class Recipe:
horizon_days: int
label_window_days: int | None = None
snapshot_day: int | None = None
# Which generation scheme this recipe runs (see leadforge.schemes).
# Defaults to the lead-scoring scheme so existing recipes need no change.
scheme: str = DEFAULT_SCHEME

# ------------------------------------------------------------------ #
# Construction
Expand Down Expand Up @@ -119,6 +123,10 @@ def from_dict(cls, data: dict[str, Any]) -> Recipe:
)
snapshot_day = raw_sd

scheme = data.get("scheme", DEFAULT_SCHEME)
if not isinstance(scheme, str) or not scheme:
raise InvalidRecipeError(f"'scheme' must be a non-empty string, got {scheme!r}")

return cls(
id=data["id"],
title=data["title"],
Expand All @@ -131,6 +139,7 @@ def from_dict(cls, data: dict[str, Any]) -> Recipe:
horizon_days=horizon_days,
label_window_days=label_window_days,
snapshot_day=snapshot_day,
scheme=scheme,
)

# ------------------------------------------------------------------ #
Expand Down
10 changes: 10 additions & 0 deletions leadforge/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@
from leadforge.structure.graph import WorldGraph


# Default generation scheme when a recipe/world does not declare one. Kept here
# (the shared core layer) because ``leadforge.core`` must not import
# ``leadforge.schemes`` (the scheme package depends on core, not the reverse).
# ``LeadScoringScheme.name`` must equal this value; a test guards the match.
DEFAULT_SCHEME = "lead_scoring"


@dataclass(frozen=True)
class DifficultyParams:
"""Numeric parameters from a difficulty profile.
Expand Down Expand Up @@ -144,6 +151,9 @@ class WorldSpec:

config: GenerationConfig = field(default_factory=GenerationConfig)
narrative: NarrativeSpec | None = None
# Generation scheme this world runs (see leadforge.schemes). Defaults to
# the lead-scoring pipeline so direct WorldSpec construction is unchanged.
scheme: str = DEFAULT_SCHEME


@dataclass
Expand Down
4 changes: 4 additions & 0 deletions leadforge/recipes/b2b_saas_procurement_v1/recipe.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
id: b2b_saas_procurement_v1
title: "Mid-market B2B SaaS — Procurement & AP Automation"
vertical: mid_market_b2b_saas
# Generation scheme this recipe runs (see leadforge.schemes). Lead scoring is
# the default; declared explicitly here for clarity now that leadforge hosts
# multiple peer schemes.
scheme: lead_scoring
description: >
A mid-market B2B SaaS company selling procurement and AP workflow
automation software to 200–2,000 employee firms in the US and UK,
Expand Down
28 changes: 28 additions & 0 deletions leadforge/schemes/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Generation-scheme registry.

Importing this package registers the built-in schemes as a side effect, so
``from leadforge.schemes import get_scheme`` is always sufficient to resolve any
shipped scheme. See ``leadforge.schemes.base`` and ``docs/ltv/design.md`` §2.5.
"""

from __future__ import annotations

# Import built-in scheme modules for their registration side effects.
from leadforge.schemes import lead_scoring as _lead_scoring # noqa: F401
from leadforge.schemes.base import (
SCHEME_REGISTRY,
GenerationScheme,
UnknownSchemeError,
available_schemes,
get_scheme,
register_scheme,
)

__all__ = [
"SCHEME_REGISTRY",
"GenerationScheme",
"UnknownSchemeError",
"available_schemes",
"get_scheme",
"register_scheme",
]
112 changes: 112 additions & 0 deletions leadforge/schemes/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""Generation-scheme abstraction — the registry of peer dataset pipelines.

leadforge hosts multiple *generation schemes* as peers (e.g. ``lead_scoring``
and, from the LTV workstream, ``lifecycle``). Each scheme owns one end-to-end
pipeline shape — population → simulation → render → tasks — while the outer
envelope (RNG, config resolution, bundle layout, manifest, exposure dispatch,
CLI) is shared. See ``docs/ltv/design.md`` §2.5.

A scheme is a small object registered by ``name`` in :data:`SCHEME_REGISTRY`
and resolved via :func:`get_scheme`. The recipe declares which scheme it runs
via its ``scheme:`` field; :class:`~leadforge.api.generator.Generator` looks the
scheme up and runs its pipeline rather than branching on a recipe type.

Where the seam sits
-------------------
A scheme owns the **whole** generation pipeline from ``(config, narrative)`` to
in-memory world artifacts: structure/graph sampling, difficulty interpretation,
population, simulation, and :class:`~leadforge.core.models.WorldBundle`
assembly. These steps differ between schemes (the lead-scoring hidden DAG,
``DifficultyParams``, and touch emission are all lead-scoring-specific), so the
boundary is the single :meth:`GenerationScheme.build_world` method rather than a
set of lead-scoring-shaped sub-steps. This keeps
:meth:`~leadforge.api.generator.Generator.generate` genuinely scheme-agnostic.

Scheme-specific options are passed through ``Generator.generate(**kwargs)`` to
``build_world`` and consumed by the scheme that understands them (e.g.
``latent_touch_intensity`` for lead scoring).

Scope note
----------
Render dispatch (``to_dataframes`` / snapshots / task splits, currently in
``WorldBundle.save`` → the bundle writer) is folded into the scheme as the
lifecycle scheme is built out (see ``docs/ltv/roadmap.md`` — LTV-M6).
"""

from __future__ import annotations

from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable

from leadforge.core.exceptions import LeadforgeError

if TYPE_CHECKING:
from leadforge.core.models import GenerationConfig, WorldBundle
from leadforge.narrative.spec import NarrativeSpec


class UnknownSchemeError(LeadforgeError):
"""Raised when a generation-scheme name is not present in the registry."""


@runtime_checkable
class GenerationScheme(Protocol):
"""One end-to-end dataset generation pipeline shape.

Implementations are registered by :attr:`name` and resolved at generation
time. :meth:`build_world` must be deterministic given ``(config,
narrative, options)`` per the package's RNG contract.
"""

name: str

def build_world(
self,
config: GenerationConfig,
narrative: NarrativeSpec,
**options: Any,
) -> WorldBundle:
"""Run the scheme's full pipeline and return an in-memory bundle.

Implementations own structure sampling, difficulty interpretation,
population, simulation, and bundle assembly. ``options`` carries
scheme-specific flags forwarded from ``Generator.generate(**kwargs)``;
a scheme ignores options it does not recognise.
"""
...


# Name → scheme instance. Populated by importing ``leadforge.schemes`` (its
# package ``__init__`` imports each built-in scheme module, which self-register).
SCHEME_REGISTRY: dict[str, GenerationScheme] = {}


def register_scheme(scheme: GenerationScheme) -> None:
"""Register *scheme* under its ``name``.

Idempotent for the same instance; raises if a *different* scheme is already
registered under the same name (guards against accidental shadowing).
"""
name = scheme.name
existing = SCHEME_REGISTRY.get(name)
if existing is not None and existing is not scheme:
raise ValueError(f"A different generation scheme named {name!r} is already registered")
SCHEME_REGISTRY[name] = scheme


def get_scheme(name: str) -> GenerationScheme:
"""Return the registered scheme named *name*.

Raises:
UnknownSchemeError: if no scheme is registered under *name*.
"""
try:
return SCHEME_REGISTRY[name]
except KeyError:
raise UnknownSchemeError(
f"Unknown generation scheme {name!r}. Registered schemes: {sorted(SCHEME_REGISTRY)}"
) from None


def available_schemes() -> tuple[str, ...]:
"""Return the names of all registered schemes, sorted."""
return tuple(sorted(SCHEME_REGISTRY))
Loading
Loading