diff --git a/.agent-plan.md b/.agent-plan.md index 3bd62fe..a049bd0 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -6,42 +6,58 @@ ## Current System State -**v0.1.0 shipped.** Installable package, full CLI skeleton (`list-recipes` implemented, others stubbed), core enums/exceptions/models, recipe registry, CI workflow, pre-commit config. All 20 tests pass. +**v0.2.0 in progress.** Typed `Recipe` model, `GenerationConfig` with full validation, config +precedence system, `RNGRoot` with deterministic substreams, `Generator.from_recipe()` fully +implemented, `core/hashing.py`, `core/serialization.py`, and recipe narrative/difficulty-profile +assets for `b2b_saas_procurement_v1`. 59 tests passing. --- -## Active Task Breakdown — Milestone 1: Canonical Config, Recipe & Model Objects (v0.2.0) +## Active Task Breakdown — Milestone 2: Narrative Layer (v0.2.0 cont.) -Goal: Establish the typed configuration and recipe system that all simulation work will depend on. +Goal: Build the concrete company/product/market story objects that anchor all later simulation. -- [ ] **1. Core models + RNG utilities** - - Implement `leadforge/core/rng.py`: seeded RNG root + deterministic named substreams - - Flesh out `GenerationConfig` with full validation (config precedence rules) - - Implement `leadforge/core/hashing.py`: deterministic config hashing for manifest identity +- [ ] **1. Narrative models** + - Implement typed dataclasses in `narrative/`: `CompanySpec`, `ProductSpec`, `MarketSpec`, + `PersonaSpec`, `FunnelSpec` + - Loader: parse `narrative.yaml` into these models with validation -- [ ] **2. Recipe registry and loading** - - Implement full `Recipe` typed model (dataclass) in `leadforge/api/recipes.py` - - Implement config precedence: CLI flags > override file > recipe defaults > package defaults - - Implement `leadforge/core/serialization.py`: JSON/YAML read-write helpers +- [ ] **2. WorldSpec population** + - Flesh out `WorldSpec` to hold a resolved `NarrativeSpec` + - Wire into `Generator.from_recipe()` so `gen.world_spec` is populated after construction -- [ ] **3. Recipe assets + validation tests** - - Add `narrative.yaml`, `difficulty_profiles.yaml` to `b2b_saas_procurement_v1/` - - Implement `Generator.from_recipe(...)` skeleton (no simulation yet) - - Tests: recipe validation, config precedence, RNG determinism, `from_recipe` round-trip +- [ ] **3. Dataset card generation** + - Implement `narrative/dataset_card.py`: render a Markdown dataset card from `WorldSpec` + - Tests: round-trip model → YAML → model, dataset-card text contains expected fields --- ## Context Pointers -- Milestone 1 scope: `docs/leadforge_implementation_plan.md` §5 "Milestone 1" +- Milestone 2 scope: `docs/leadforge_implementation_plan.md` §5 "Milestone 2" - Full milestone dependency graph: `docs/leadforge_implementation_plan.md` §6 -- Public API contract: `docs/leadforge_architecture_spec.md` §6 -- Config precedence rules: `docs/leadforge_architecture_spec.md` §24 +- Narrative spec: `docs/leadforge_architecture_spec.md` §7 +- Recipe assets: `leadforge/recipes/b2b_saas_procurement_v1/narrative.yaml` --- ## Completed Phases +### Milestone 1 — Canonical Config, Recipe & Model Objects ✓ (v0.2.0 in PR) +- `leadforge/core/rng.py`: `RNGRoot` with SHA-256-derived named substreams +- `leadforge/core/hashing.py`: `hash_config()` — stable SHA-256 digest of `GenerationConfig` +- `leadforge/core/serialization.py`: `load_yaml`, `load_json`, `dump_json` +- `leadforge/core/models.py`: `GenerationConfig` with `__post_init__` validation + `package_version` +- `leadforge/api/recipes.py`: typed `Recipe` dataclass, `from_dict`, `resolve_config` (full + precedence: explicit kwargs > override dict > recipe defaults > package defaults) +- `leadforge/api/generator.py`: `Generator.from_recipe()` fully implemented (skeleton — no + simulation); `generate()` stubs to v0.3.0 +- `leadforge/recipes/b2b_saas_procurement_v1/narrative.yaml`: company, product, market, GTM, + personas, funnel stages +- `leadforge/recipes/b2b_saas_procurement_v1/difficulty_profiles.yaml`: intro / intermediate / + advanced signal-noise profiles +- 39 new tests (rng, hashing, recipes, generator); total 59 passing + ### Milestone 0 — Project Foundation ✓ (v0.1.0) - `pyproject.toml`, `README.md`, `LICENSE`, `.pre-commit-config.yaml` - Full package skeleton with `__init__.py` stubs for all submodules diff --git a/leadforge/api/__init__.py b/leadforge/api/__init__.py index 47e54c9..37d0877 100644 --- a/leadforge/api/__init__.py +++ b/leadforge/api/__init__.py @@ -1,5 +1,7 @@ """leadforge public Python API.""" from leadforge.api.generator import Generator +from leadforge.api.recipes import Recipe +from leadforge.recipes.registry import list_recipes -__all__ = ["Generator"] +__all__ = ["Generator", "Recipe", "list_recipes"] diff --git a/leadforge/api/generator.py b/leadforge/api/generator.py index 77f7af5..6f76f36 100644 --- a/leadforge/api/generator.py +++ b/leadforge/api/generator.py @@ -1,9 +1,4 @@ -"""Public Generator API — stub for Milestone 1. - -The Generator class is the primary entry point for programmatic dataset -generation. It is fully specified in the architecture doc (§6) and will -be implemented across Milestones 1–9. -""" +"""Public Generator API.""" from __future__ import annotations @@ -11,12 +6,14 @@ from leadforge.core.enums import DifficultyProfile, ExposureMode from leadforge.core.models import GenerationConfig, WorldBundle +from leadforge.core.rng import RNGRoot +from leadforge.core.sentinels import _MISSING class Generator: """High-level entry point for generating a synthetic CRM dataset bundle. - Usage (once implemented):: + Usage:: gen = Generator.from_recipe( "b2b_saas_procurement_v1", @@ -26,28 +23,77 @@ class Generator: bundle = gen.generate(n_leads=5000, difficulty="intermediate") bundle.save("./out/demo_bundle") - Implemented in Milestone 1 (config/recipe) through Milestone 9 (rendering). + ``from_recipe`` is implemented in Milestone 1. Full generation + (``generate``) is implemented across Milestones 2–9. """ def __init__(self, config: GenerationConfig) -> None: self._config = config + self._rng = RNGRoot(config.seed) + + @property + def config(self) -> GenerationConfig: + return self._config @classmethod def from_recipe( cls, recipe_id: str, *, - seed: int = 42, - exposure_mode: str | ExposureMode = ExposureMode.student_public, - **kwargs: Any, + seed: int = _MISSING, # type: ignore[assignment] + exposure_mode: str | ExposureMode = _MISSING, # type: ignore[assignment] + difficulty: str | DifficultyProfile = _MISSING, # type: ignore[assignment] + n_accounts: int | None = None, + n_contacts: int | None = None, + n_leads: int | None = None, + horizon_days: int | None = None, + output_path: str = _MISSING, # type: ignore[assignment] + override: dict[str, Any] | None = None, ) -> Generator: - """Create a Generator from a recipe ID. + """Create a :class:`Generator` from a recipe ID, applying config precedence. + + Args: + recipe_id: Identifier of a registered recipe (e.g. + ``"b2b_saas_procurement_v1"``). + seed: Master RNG seed. Defaults to the package default (42). + exposure_mode: ``"student_public"`` or ``"research_instructor"``. + Defaults to the package default (``student_public``). + difficulty: ``"intro"``, ``"intermediate"``, or ``"advanced"``. + Defaults to the package default (``intermediate``). + n_accounts: Override recipe default account count. + n_contacts: Override recipe default contact count. + n_leads: Override recipe default lead count. + horizon_days: Override recipe default simulation horizon. + output_path: Directory where the bundle will be saved. + override: Optional dict of overrides (mirrors a ``--override`` file). + Applied after recipe defaults but before explicit kwargs. - Not yet implemented — available in v0.2.0. + Returns: + A configured :class:`Generator` instance ready to call + :meth:`generate` on. + + Raises: + :class:`~leadforge.core.exceptions.InvalidRecipeError`: if the + recipe does not exist, is malformed, or the requested + exposure mode / difficulty is not supported. """ - raise NotImplementedError( - "Generator.from_recipe() is not yet implemented. Coming in v0.2.0." + from leadforge.api.recipes import Recipe + from leadforge.recipes.registry import load_recipe + + raw = load_recipe(recipe_id) + recipe = Recipe.from_dict(raw) + config = recipe.resolve_config( + seed=seed, + exposure_mode=exposure_mode, + difficulty=difficulty, + n_accounts=n_accounts, + n_contacts=n_contacts, + n_leads=n_leads, + horizon_days=horizon_days, + output_path=output_path, + override=override, ) + return cls(config) def generate( self, @@ -60,6 +106,6 @@ def generate( ) -> WorldBundle: """Run the world simulation and return a bundle. - Not yet implemented — available in v0.2.0. + Not yet implemented — available in v0.3.0+. """ - raise NotImplementedError("Generator.generate() is not yet implemented. Coming in v0.2.0.") + raise NotImplementedError("Generator.generate() is not yet implemented. Coming in v0.3.0.") diff --git a/leadforge/api/recipes.py b/leadforge/api/recipes.py new file mode 100644 index 0000000..57e8358 --- /dev/null +++ b/leadforge/api/recipes.py @@ -0,0 +1,268 @@ +"""Typed Recipe model and config-resolution logic. + +A ``Recipe`` is the canonical user-facing generation preset. It binds a +vertical, narrative defaults, difficulty profiles, available tasks, and +supported exposure modes into a single validated object. + +Config precedence (highest → lowest): + 1. Explicit kwargs passed to ``from_recipe`` / ``resolve_config`` + 2. Override dict (e.g. loaded from a ``--override`` YAML/JSON file) + 3. Recipe defaults (``default_population``, ``horizon_days``) + 4. Package defaults (defined in ``GenerationConfig`` field defaults — + the single source of truth; never duplicated here) +""" + +from __future__ import annotations + +import dataclasses +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from leadforge.core.enums import DifficultyProfile, ExposureMode +from leadforge.core.exceptions import InvalidRecipeError +from leadforge.core.sentinels import _MISSING +from leadforge.core.serialization import load_yaml + +_RECIPES_DIR = Path(__file__).parent.parent / "recipes" + + +@dataclass(frozen=True) +class Recipe: + """Fully parsed and validated recipe object.""" + + id: str + title: str + vertical: str + description: str + primary_task: str + supported_modes: tuple[ExposureMode, ...] + supported_difficulty: tuple[DifficultyProfile, ...] + default_population: dict[str, int] + horizon_days: int + + # ------------------------------------------------------------------ # + # Construction + # ------------------------------------------------------------------ # + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> Recipe: + """Build a ``Recipe`` from a raw YAML/dict payload. + + Raises: + InvalidRecipeError: if required keys are missing or values are invalid. + """ + required = { + "id", + "title", + "vertical", + "description", + "primary_task", + "supported_modes", + "supported_difficulty", + "default_population", + "horizon_days", + } + missing = required - data.keys() + if missing: + raise InvalidRecipeError(f"Recipe dict is missing required keys: {sorted(missing)}") + + try: + supported_modes = tuple(ExposureMode(m) for m in data["supported_modes"]) + except ValueError as exc: + raise InvalidRecipeError(f"Invalid exposure mode in recipe: {exc}") from exc + + try: + supported_difficulty = tuple(DifficultyProfile(d) for d in data["supported_difficulty"]) + except ValueError as exc: + raise InvalidRecipeError(f"Invalid difficulty profile in recipe: {exc}") from exc + + pop = data["default_population"] + if not isinstance(pop, dict) or not all( + isinstance(v, int) and not isinstance(v, bool) for v in pop.values() + ): + raise InvalidRecipeError( + f"'default_population' must be a mapping of str→int, got: {pop!r}" + ) + + horizon_days = data["horizon_days"] + if isinstance(horizon_days, bool) or not isinstance(horizon_days, int): + raise InvalidRecipeError( + f"'horizon_days' must be a positive int, got {type(horizon_days).__name__!r}" + ) + if horizon_days <= 0: + raise InvalidRecipeError(f"'horizon_days' must be positive, got {horizon_days}") + + return cls( + id=data["id"], + title=data["title"], + vertical=data["vertical"], + description=data["description"], + primary_task=data["primary_task"], + supported_modes=supported_modes, + supported_difficulty=supported_difficulty, + default_population=dict(pop), + horizon_days=horizon_days, + ) + + # ------------------------------------------------------------------ # + # Config resolution + # ------------------------------------------------------------------ # + + def resolve_config( + self, + *, + seed: int = _MISSING, # type: ignore[assignment] + exposure_mode: str | ExposureMode = _MISSING, # type: ignore[assignment] + difficulty: str | DifficultyProfile = _MISSING, # type: ignore[assignment] + n_accounts: int | None = None, + n_contacts: int | None = None, + n_leads: int | None = None, + horizon_days: int | None = None, + output_path: str = _MISSING, # type: ignore[assignment] + override: dict[str, Any] | None = None, + ) -> GenerationConfig: + """Resolve a :class:`GenerationConfig` applying config precedence rules. + + Precedence (highest → lowest): + 1. Explicit kwargs — only values *actually passed* by the caller win. + 2. *override* dict — beats recipe and package defaults. + 3. Recipe defaults — ``default_population`` keys and ``horizon_days``. + 4. Package defaults — ``GenerationConfig`` field defaults (authoritative + source; never duplicated in this file). + """ + from leadforge.core.models import GenerationConfig # avoid circular import + + # Layer 4 — package defaults: read directly from GenerationConfig fields. + pkg: dict[str, Any] = { + f.name: f.default + for f in dataclasses.fields(GenerationConfig) + if f.default is not dataclasses.MISSING + } + resolved: dict[str, Any] = { + "seed": pkg["seed"], + "exposure_mode": pkg["exposure_mode"], + "difficulty": pkg["difficulty"], + "output_path": pkg["output_path"], + "n_accounts": pkg["n_accounts"], + "n_contacts": pkg["n_contacts"], + "n_leads": pkg["n_leads"], + "horizon_days": pkg["horizon_days"], + } + + # Layer 3 — recipe defaults + pop = self.default_population + for key in ("n_accounts", "n_contacts", "n_leads"): + if key in pop: + resolved[key] = pop[key] + resolved["horizon_days"] = self.horizon_days + + # Layer 2 — override dict (beats recipe/package defaults) + if override: + for key in ( + "n_accounts", + "n_contacts", + "n_leads", + "horizon_days", + "seed", + "output_path", + "exposure_mode", + "difficulty", + ): + if key in override: + resolved[key] = override[key] + + # Layer 1 — explicit kwargs: only apply when the caller actually passed + # the argument (sentinel guards all params that have package defaults). + if seed is not _MISSING: + resolved["seed"] = seed + if exposure_mode is not _MISSING: + resolved["exposure_mode"] = exposure_mode + if difficulty is not _MISSING: + resolved["difficulty"] = difficulty + if output_path is not _MISSING: + resolved["output_path"] = output_path + if n_accounts is not None: + resolved["n_accounts"] = n_accounts + if n_contacts is not None: + resolved["n_contacts"] = n_contacts + if n_leads is not None: + resolved["n_leads"] = n_leads + if horizon_days is not None: + resolved["horizon_days"] = horizon_days + + try: + mode = ExposureMode(resolved["exposure_mode"]) + except ValueError as exc: + raise InvalidRecipeError( + f"Invalid exposure_mode {resolved['exposure_mode']!r} for recipe '{self.id}'. " + f"Supported values: {[m.value for m in ExposureMode]}" + ) from exc + try: + diff = DifficultyProfile(resolved["difficulty"]) + except ValueError as exc: + raise InvalidRecipeError( + f"Invalid difficulty {resolved['difficulty']!r} for recipe '{self.id}'. " + f"Supported values: {[d.value for d in DifficultyProfile]}" + ) from exc + + if mode not in self.supported_modes: + raise InvalidRecipeError( + f"Exposure mode '{mode}' is not supported by recipe '{self.id}'. " + f"Supported: {[m.value for m in self.supported_modes]}" + ) + if diff not in self.supported_difficulty: + raise InvalidRecipeError( + f"Difficulty '{diff}' is not supported by recipe '{self.id}'. " + f"Supported: {[d.value for d in self.supported_difficulty]}" + ) + + return GenerationConfig( + recipe_id=self.id, + seed=resolved["seed"], + exposure_mode=mode, + difficulty=diff, + n_accounts=resolved["n_accounts"], + n_contacts=resolved["n_contacts"], + n_leads=resolved["n_leads"], + horizon_days=resolved["horizon_days"], + output_path=resolved["output_path"], + ) + + # ------------------------------------------------------------------ # + # Helpers + # ------------------------------------------------------------------ # + + def load_narrative(self) -> dict[str, Any]: + """Load the ``narrative.yaml`` for this recipe, if present.""" + path = _RECIPES_DIR / self.id / "narrative.yaml" + if not path.exists(): + return {} + data = load_yaml(path) + if data is None: + return {} + if not isinstance(data, dict): + raise InvalidRecipeError( + f"narrative.yaml for recipe '{self.id}' must be a YAML mapping, " + f"got {type(data).__name__!r}" + ) + return data # type: ignore[return-value] + + def load_difficulty_profiles(self) -> dict[str, Any]: + """Load the ``difficulty_profiles.yaml`` for this recipe, if present.""" + path = _RECIPES_DIR / self.id / "difficulty_profiles.yaml" + if not path.exists(): + return {} + data = load_yaml(path) + if data is None: + return {} + if not isinstance(data, dict): + raise InvalidRecipeError( + f"difficulty_profiles.yaml for recipe '{self.id}' must be a YAML mapping, " + f"got {type(data).__name__!r}" + ) + return data # type: ignore[return-value] + + +# Forward reference resolution — GenerationConfig is used as a return type above. +from leadforge.core.models import GenerationConfig as GenerationConfig # noqa: E402,F401 diff --git a/leadforge/core/hashing.py b/leadforge/core/hashing.py new file mode 100644 index 0000000..513d70e --- /dev/null +++ b/leadforge/core/hashing.py @@ -0,0 +1,38 @@ +"""Deterministic config hashing for manifest identity. + +A config hash uniquely identifies a (recipe, config, seed, version) tuple and +is embedded in every generated manifest so that bundles can be traced back to +the exact parameters that produced them. +""" + +import hashlib +import json +from dataclasses import asdict +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from leadforge.core.models import GenerationConfig + + +def _canonical(obj: Any) -> Any: + """Recursively convert to a JSON-stable form (sorted keys, enums → str).""" + if isinstance(obj, dict): + return {k: _canonical(v) for k, v in sorted(obj.items())} + if isinstance(obj, (list, tuple)): + return [_canonical(v) for v in obj] + # StrEnum values are already strings; this handles plain Enum too + if hasattr(obj, "value"): + return obj.value + return obj + + +def hash_config(config: "GenerationConfig") -> str: + """Return a stable hex-encoded SHA-256 digest of *config*. + + The digest is derived from a canonicalised JSON representation of the + dataclass fields, ensuring it is stable across Python processes and + platform endianness. + """ + canonical = _canonical(asdict(config)) + payload = json.dumps(canonical, separators=(",", ":"), ensure_ascii=True) + return hashlib.sha256(payload.encode()).hexdigest() diff --git a/leadforge/core/models.py b/leadforge/core/models.py index 57be139..a7c0c73 100644 --- a/leadforge/core/models.py +++ b/leadforge/core/models.py @@ -1,16 +1,35 @@ -"""Top-level typed configuration and result models. +"""Top-level typed configuration and result models.""" -WorldSpec and WorldBundle are stubs in M0; they will be populated in M1+. -""" +from __future__ import annotations from dataclasses import dataclass, field +from typing import Any from leadforge.core.enums import DifficultyProfile, ExposureMode +from leadforge.core.exceptions import InvalidConfigError +from leadforge.version import __version__ + + +def _require_positive_int(value: Any, name: str) -> None: + """Raise ``InvalidConfigError`` unless *value* is a positive plain ``int``. + + ``bool`` is rejected because it is an ``int`` subclass and would otherwise + silently pass numeric comparisons (``True > 0`` is ``True``). + """ + if isinstance(value, bool) or not isinstance(value, int): + raise InvalidConfigError(f"{name} must be a positive int, got {type(value).__name__!r}") + if value <= 0: + raise InvalidConfigError(f"{name} must be positive, got {value}") @dataclass class GenerationConfig: - """Fully resolved configuration for a single generation run.""" + """Fully resolved configuration for a single generation run. + + All fields are validated in ``__post_init__``. Instances are produced + via :meth:`leadforge.api.recipes.Recipe.resolve_config` which applies + the config precedence rules (CLI flags > override > recipe > package). + """ recipe_id: str = "b2b_saas_procurement_v1" seed: int = 42 @@ -21,13 +40,41 @@ class GenerationConfig: n_leads: int = 5000 horizon_days: int = 90 output_path: str = "./out" + package_version: str = field(default_factory=lambda: __version__) + + def __post_init__(self) -> None: + if isinstance(self.seed, bool) or not isinstance(self.seed, int): + raise InvalidConfigError(f"seed must be an int, got {type(self.seed).__name__!r}") + if self.seed < 0: + raise InvalidConfigError(f"seed must be non-negative, got {self.seed}") + _require_positive_int(self.n_accounts, "n_accounts") + _require_positive_int(self.n_contacts, "n_contacts") + _require_positive_int(self.n_leads, "n_leads") + _require_positive_int(self.horizon_days, "horizon_days") + # Coerce string enums supplied as plain strings + if not isinstance(self.exposure_mode, ExposureMode): + try: + self.exposure_mode = ExposureMode(self.exposure_mode) + except ValueError as exc: + raise InvalidConfigError( + f"exposure_mode has invalid value {self.exposure_mode!r}. " + f"Valid values: {[m.value for m in ExposureMode]}" + ) from exc + if not isinstance(self.difficulty, DifficultyProfile): + try: + self.difficulty = DifficultyProfile(self.difficulty) + except ValueError as exc: + raise InvalidConfigError( + f"difficulty has invalid value {self.difficulty!r}. " + f"Valid values: {[d.value for d in DifficultyProfile]}" + ) from exc @dataclass class WorldSpec: """Fully instantiated hidden world specification (post-sampling, pre-simulation). - Populated in Milestone 1 (config/recipe) through Milestone 6 (mechanisms). + Populated in Milestone 2 (narrative/schema) through Milestone 6 (mechanisms). """ config: GenerationConfig = field(default_factory=GenerationConfig) diff --git a/leadforge/core/rng.py b/leadforge/core/rng.py index 879b3de..aff0942 100644 --- a/leadforge/core/rng.py +++ b/leadforge/core/rng.py @@ -1,6 +1,49 @@ """Seeded RNG root and deterministic substream utilities. -Implemented in Milestone 1. Every stochastic component in leadforge must -derive its RNG from a single seeded root so that (recipe, config, seed, -version) fully determines all outputs. +Every stochastic component in leadforge must derive its RNG from a single +seeded root so that (recipe, config, seed, version) fully determines all outputs. + +Usage:: + + root = RNGRoot(seed=42) + account_rng = root.child("accounts") + contact_rng = root.child("contacts") + # Each child is an independent random.Random with a deterministically + # derived seed — re-creating from the same root seed always gives + # the same sequence. """ + +import hashlib +import random + + +class RNGRoot: + """Single seeded RNG root for a generation run. + + All stochastic substreams must be obtained via ``child(name)`` so that + the full generation is reproducible from the seed alone. + """ + + def __init__(self, seed: int) -> None: + if isinstance(seed, bool) or not isinstance(seed, int): + raise TypeError(f"seed must be an int, got {type(seed).__name__!r}") + if seed < 0: + raise ValueError(f"seed must be non-negative, got {seed}") + self._seed = seed + + @property + def seed(self) -> int: + return self._seed + + def child(self, name: str) -> random.Random: + """Return a deterministic ``random.Random`` instance for the named substream. + + The derived seed is SHA-256(``:``) truncated to 8 bytes, + ensuring each named stream is independent and reproducible. + """ + digest = hashlib.sha256(f"{self._seed}:{name}".encode()).digest() + derived_seed = int.from_bytes(digest[:8], "little") + return random.Random(derived_seed) # noqa: S311 + + def __repr__(self) -> str: + return f"RNGRoot(seed={self._seed})" diff --git a/leadforge/core/sentinels.py b/leadforge/core/sentinels.py new file mode 100644 index 0000000..6cd1403 --- /dev/null +++ b/leadforge/core/sentinels.py @@ -0,0 +1,36 @@ +"""Package-level sentinel objects. + +Sentinels are used to distinguish "kwarg was not explicitly provided by +the caller" from any real value (including the package default). This is +necessary in config-resolution functions where the override dict must be +able to supply a value that explicit kwargs can then beat, but where the +mere presence of a function-signature default must not silently win. +""" + + +class _MissingType: + """Type of the :data:`_MISSING` sentinel. + + A named class gives the sentinel a stable, readable representation in + ``help()`` output and generated documentation (```` rather than + the opaque ```` you get from a bare ``object()``). + """ + + _instance: "_MissingType | None" = None + + def __new__(cls) -> "_MissingType": + # Singleton — there is exactly one _MISSING value. + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __repr__(self) -> str: + return "" + + def __bool__(self) -> bool: + return False + + +# Single shared sentinel — import this rather than defining local object() +# sentinels to avoid tight coupling between modules. +_MISSING: _MissingType = _MissingType() diff --git a/leadforge/core/serialization.py b/leadforge/core/serialization.py new file mode 100644 index 0000000..eed4ab5 --- /dev/null +++ b/leadforge/core/serialization.py @@ -0,0 +1,54 @@ +"""JSON and YAML read/write helpers used across the package.""" + +import json +from pathlib import Path +from typing import Any + +import yaml + +from leadforge.core.exceptions import LeadforgeError + + +def load_yaml(path: Path) -> Any: + """Parse a YAML file and return the raw Python object.""" + try: + with path.open(encoding="utf-8") as fh: + return yaml.safe_load(fh) + except yaml.YAMLError as exc: + raise LeadforgeError(f"Failed to parse YAML at '{path}': {exc}") from exc + except OSError as exc: + raise LeadforgeError(f"Cannot read YAML file '{path}': {exc}") from exc + + +def load_json(path: Path) -> Any: + """Parse a JSON file and return the raw Python object.""" + try: + with path.open(encoding="utf-8") as fh: + return json.load(fh) + except json.JSONDecodeError as exc: + raise LeadforgeError(f"Failed to parse JSON at '{path}': {exc}") from exc + except OSError as exc: + raise LeadforgeError(f"Cannot read JSON file '{path}': {exc}") from exc + + +def _json_default(value: Any) -> str: + """Convert explicitly supported non-JSON types for ``json.dump``. + + Only ``pathlib.Path`` is handled. Any other non-serialisable type raises + ``TypeError`` immediately so bugs are caught at serialisation time rather + than producing silently coerced strings. + """ + if isinstance(value, Path): + return str(value) + raise TypeError(f"Object of type {type(value).__name__} is not JSON serializable") + + +def dump_json(data: Any, path: Path, *, indent: int = 2) -> None: + """Serialise *data* to *path* as pretty-printed JSON.""" + path.parent.mkdir(parents=True, exist_ok=True) + try: + with path.open("w", encoding="utf-8") as fh: + json.dump(data, fh, indent=indent, ensure_ascii=True, default=_json_default) + fh.write("\n") + except OSError as exc: + raise LeadforgeError(f"Cannot write JSON file '{path}': {exc}") from exc diff --git a/leadforge/recipes/b2b_saas_procurement_v1/difficulty_profiles.yaml b/leadforge/recipes/b2b_saas_procurement_v1/difficulty_profiles.yaml new file mode 100644 index 0000000..89900da --- /dev/null +++ b/leadforge/recipes/b2b_saas_procurement_v1/difficulty_profiles.yaml @@ -0,0 +1,44 @@ +# Difficulty profiles for b2b_saas_procurement_v1 +# --------------------------------------------------------------------------- +# Each profile controls the signal/noise characteristics of the generated +# dataset. Higher difficulty = more realistic noise, missing data, and class +# imbalance, making the supervised task harder. + +intro: + description: > + Clean signal, minimal noise. Suitable for learning basic ML concepts + and verifying that a pipeline runs end to end. + # Probability that the true mechanism drives the outcome (vs. noise) + signal_strength: 0.90 + # Scale multiplier applied to additive Gaussian noise in continuous features + noise_scale: 0.10 + # Fraction of feature values set to missing (NaN) + missing_rate: 0.02 + # Fraction of rows that are statistical outliers + outlier_rate: 0.01 + # Positive-class rate for converted_within_90_days + conversion_rate_range: [0.30, 0.45] + # Strength of buying-committee-friction effects + committee_friction: 0.10 + +intermediate: + description: > + Realistic signal-to-noise ratio. Suitable for portfolio projects, + courses, and kaggle-style competitions. + signal_strength: 0.70 + noise_scale: 0.30 + missing_rate: 0.08 + outlier_rate: 0.04 + conversion_rate_range: [0.18, 0.28] + committee_friction: 0.30 + +advanced: + description: > + High noise, realistic class imbalance, and significant missing data. + Suitable for ML research and realistic benchmark construction. + signal_strength: 0.50 + noise_scale: 0.55 + missing_rate: 0.18 + outlier_rate: 0.08 + conversion_rate_range: [0.08, 0.15] + committee_friction: 0.55 diff --git a/leadforge/recipes/b2b_saas_procurement_v1/narrative.yaml b/leadforge/recipes/b2b_saas_procurement_v1/narrative.yaml new file mode 100644 index 0000000..ea9eee2 --- /dev/null +++ b/leadforge/recipes/b2b_saas_procurement_v1/narrative.yaml @@ -0,0 +1,99 @@ +# Narrative defaults for b2b_saas_procurement_v1 +# --------------------------------------------------------------------------- +# These are the baseline "story facts" for the mid-market B2B SaaS procurement +# vertical. Simulation layers may override individual fields via config. + +company: + name: "Veridian Technologies" + founded_year: 2017 + hq_city: "Austin" + hq_country: "US" + stage: "Series B" + employee_range: [80, 150] + +product: + name: "Veridian Procure" + category: "Procurement & AP Automation" + deployment: "cloud_saas" + pricing_model: "per_seat_annual" + acv_range_usd: [18000, 120000] + contract_terms_months: [12, 24, 36] + free_trial_available: true + demo_available: true + +market: + icp_employee_range: [200, 2000] + icp_industries: + - manufacturing + - logistics + - professional_services + - healthcare_non_clinical + geographies: [US, UK] + avg_deal_size_usd: 42000 + avg_sales_cycle_days: 45 + +gtm_motion: + channels: + - inbound_marketing + - sdr_outbound + - partner_referral + inbound_share: 0.45 + outbound_share: 0.35 + partner_share: 0.20 + +personas: + - role: vp_finance + title_variants: + - "VP Finance" + - "CFO" + - "Head of Finance" + - "VP of Finance" + decision_authority: economic_buyer + typical_involvement: late_stage + + - role: ap_manager + title_variants: + - "AP Manager" + - "Accounts Payable Manager" + - "Finance Operations Manager" + - "AP Lead" + decision_authority: champion + typical_involvement: full_cycle + + - role: it_director + title_variants: + - "IT Director" + - "CTO" + - "Head of IT" + - "VP Engineering" + decision_authority: technical_evaluator + typical_involvement: mid_to_late + + - role: procurement_manager + title_variants: + - "Procurement Manager" + - "Director of Procurement" + - "Strategic Sourcing Manager" + - "Head of Procurement" + decision_authority: end_user + typical_involvement: early_to_mid + +funnel_stages: + - name: mql + label: "Marketing Qualified Lead" + - name: sal + label: "Sales Accepted Lead" + - name: sql + label: "Sales Qualified Lead" + - name: demo_scheduled + label: "Demo Scheduled" + - name: demo_completed + label: "Demo Completed" + - name: proposal_sent + label: "Proposal Sent" + - name: negotiation + label: "Negotiation" + - name: closed_won + label: "Closed Won" + - name: closed_lost + label: "Closed Lost" diff --git a/tests/api/__init__.py b/tests/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/api/test_generator.py b/tests/api/test_generator.py new file mode 100644 index 0000000..b7c2b1b --- /dev/null +++ b/tests/api/test_generator.py @@ -0,0 +1,125 @@ +"""Tests for leadforge.api.generator — Generator.from_recipe.""" + +import pytest + +from leadforge.api.generator import Generator +from leadforge.core.enums import DifficultyProfile, ExposureMode +from leadforge.core.exceptions import InvalidRecipeError + + +def test_from_recipe_returns_generator() -> None: + gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=42) + assert isinstance(gen, Generator) + + +def test_from_recipe_config_recipe_id() -> None: + gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=1) + assert gen.config.recipe_id == "b2b_saas_procurement_v1" + + +def test_from_recipe_seed_propagates() -> None: + gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=777) + assert gen.config.seed == 777 + + +def test_from_recipe_exposure_mode_propagates() -> None: + gen = Generator.from_recipe("b2b_saas_procurement_v1", exposure_mode="research_instructor") + assert gen.config.exposure_mode == ExposureMode.research_instructor + + +def test_from_recipe_difficulty_propagates() -> None: + gen = Generator.from_recipe("b2b_saas_procurement_v1", difficulty="advanced") + assert gen.config.difficulty == DifficultyProfile.advanced + + +def test_from_recipe_population_override() -> None: + gen = Generator.from_recipe("b2b_saas_procurement_v1", n_leads=123, n_accounts=50) + assert gen.config.n_leads == 123 + assert gen.config.n_accounts == 50 + + +def test_from_recipe_deterministic_config() -> None: + """Same args must produce identical configs.""" + from leadforge.core.hashing import hash_config + + gen1 = Generator.from_recipe("b2b_saas_procurement_v1", seed=42) + gen2 = Generator.from_recipe("b2b_saas_procurement_v1", seed=42) + assert hash_config(gen1.config) == hash_config(gen2.config) + + +def test_from_recipe_different_seeds_different_configs() -> None: + from leadforge.core.hashing import hash_config + + gen1 = Generator.from_recipe("b2b_saas_procurement_v1", seed=1) + gen2 = Generator.from_recipe("b2b_saas_procurement_v1", seed=2) + assert hash_config(gen1.config) != hash_config(gen2.config) + + +def test_from_recipe_invalid_id_raises() -> None: + with pytest.raises(InvalidRecipeError): + Generator.from_recipe("does_not_exist") + + +def test_generate_not_implemented() -> None: + gen = Generator.from_recipe("b2b_saas_procurement_v1") + with pytest.raises(NotImplementedError): + gen.generate() + + +def test_from_recipe_config_has_package_version() -> None: + gen = Generator.from_recipe("b2b_saas_procurement_v1") + assert gen.config.package_version # non-empty string + + +def test_from_recipe_override_dict() -> None: + gen = Generator.from_recipe( + "b2b_saas_procurement_v1", + override={"n_leads": 4242}, + # explicit kwarg wins + n_leads=9999, + ) + assert gen.config.n_leads == 9999 + + +def test_from_recipe_override_dict_applies_exposure_and_difficulty() -> None: + """Layer 2: override dict sets exposure_mode / difficulty when not explicit.""" + gen = Generator.from_recipe( + "b2b_saas_procurement_v1", + override={"exposure_mode": "research_instructor", "difficulty": "intro"}, + ) + assert gen.config.exposure_mode == ExposureMode.research_instructor + assert gen.config.difficulty == DifficultyProfile.intro + + +def test_from_recipe_explicit_exposure_beats_override() -> None: + """Layer 1: explicit exposure_mode / difficulty kwargs beat override dict.""" + gen = Generator.from_recipe( + "b2b_saas_procurement_v1", + override={"exposure_mode": "research_instructor", "difficulty": "intro"}, + exposure_mode="student_public", + difficulty="advanced", + ) + assert gen.config.exposure_mode == ExposureMode.student_public + assert gen.config.difficulty == DifficultyProfile.advanced + + +def test_from_recipe_override_dict_applies_seed_and_output_path() -> None: + """Layer 2: override dict should set seed / output_path when not explicitly passed.""" + gen = Generator.from_recipe( + "b2b_saas_procurement_v1", + override={"seed": 5678, "output_path": "/tmp/override"}, + ) + assert gen.config.seed == 5678 + assert gen.config.output_path == "/tmp/override" + + +def test_from_recipe_explicit_seed_beats_override_dict() -> None: + """Layer 1: explicit seed / output_path kwargs beat override dict.""" + gen = Generator.from_recipe( + "b2b_saas_procurement_v1", + override={"seed": 5678, "output_path": "/tmp/override"}, + seed=42, + output_path="/tmp/explicit", + ) + assert gen.config.seed == 42 + assert gen.config.output_path == "/tmp/explicit" diff --git a/tests/api/test_recipes.py b/tests/api/test_recipes.py new file mode 100644 index 0000000..d334d4b --- /dev/null +++ b/tests/api/test_recipes.py @@ -0,0 +1,241 @@ +"""Tests for leadforge.api.recipes — Recipe model and config resolution.""" + +import pytest + +from leadforge.api.recipes import Recipe +from leadforge.core.enums import DifficultyProfile, ExposureMode +from leadforge.core.exceptions import InvalidRecipeError +from leadforge.core.models import GenerationConfig + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +VALID_DICT = { + "id": "test_recipe_v1", + "title": "Test Recipe", + "vertical": "test_vertical", + "description": "A recipe for testing.", + "primary_task": "converted_within_90_days", + "supported_modes": ["student_public", "research_instructor"], + "supported_difficulty": ["intro", "intermediate", "advanced"], + "default_population": {"n_accounts": 100, "n_contacts": 300, "n_leads": 500}, + "horizon_days": 90, +} + + +# --------------------------------------------------------------------------- +# Recipe.from_dict +# --------------------------------------------------------------------------- + + +def test_from_dict_roundtrip() -> None: + recipe = Recipe.from_dict(VALID_DICT) + assert recipe.id == "test_recipe_v1" + assert recipe.primary_task == "converted_within_90_days" + assert ExposureMode.student_public in recipe.supported_modes + assert DifficultyProfile.intermediate in recipe.supported_difficulty + assert recipe.default_population["n_leads"] == 500 + assert recipe.horizon_days == 90 + + +def test_from_dict_missing_key_raises() -> None: + bad = {k: v for k, v in VALID_DICT.items() if k != "primary_task"} + with pytest.raises(InvalidRecipeError, match="missing required keys"): + Recipe.from_dict(bad) + + +def test_from_dict_invalid_mode_raises() -> None: + bad = {**VALID_DICT, "supported_modes": ["student_public", "not_a_mode"]} + with pytest.raises(InvalidRecipeError, match="Invalid exposure mode"): + Recipe.from_dict(bad) + + +def test_from_dict_invalid_difficulty_raises() -> None: + bad = {**VALID_DICT, "supported_difficulty": ["easy"]} + with pytest.raises(InvalidRecipeError, match="Invalid difficulty profile"): + Recipe.from_dict(bad) + + +def test_from_dict_invalid_population_raises() -> None: + bad = {**VALID_DICT, "default_population": {"n_leads": "five_thousand"}} + with pytest.raises(InvalidRecipeError, match="default_population"): + Recipe.from_dict(bad) + + +def test_from_dict_bool_horizon_days_raises() -> None: + """bool horizon_days must be rejected (True → 1 would pass silently).""" + bad = {**VALID_DICT, "horizon_days": True} + with pytest.raises(InvalidRecipeError, match="horizon_days"): + Recipe.from_dict(bad) + + +def test_from_dict_float_horizon_days_raises() -> None: + """Float horizon_days must be rejected (truncation would be silent).""" + bad = {**VALID_DICT, "horizon_days": 90.5} + with pytest.raises(InvalidRecipeError, match="horizon_days"): + Recipe.from_dict(bad) + + +def test_from_dict_nonpositive_horizon_days_raises() -> None: + bad = {**VALID_DICT, "horizon_days": 0} + with pytest.raises(InvalidRecipeError, match="horizon_days"): + Recipe.from_dict(bad) + + +def test_from_dict_bool_population_raises() -> None: + """bool values in default_population must be rejected (bool is int subclass).""" + bad = {**VALID_DICT, "default_population": {"n_leads": True}} + with pytest.raises(InvalidRecipeError, match="default_population"): + Recipe.from_dict(bad) + + +def test_resolve_config_invalid_override_mode_raises() -> None: + """Invalid exposure_mode in override must raise InvalidRecipeError, not ValueError.""" + recipe = Recipe.from_dict(VALID_DICT) + with pytest.raises(InvalidRecipeError, match="exposure_mode"): + recipe.resolve_config(override={"exposure_mode": "not_a_mode"}) + + +def test_resolve_config_invalid_override_difficulty_raises() -> None: + """Invalid difficulty in override must raise InvalidRecipeError, not ValueError.""" + recipe = Recipe.from_dict(VALID_DICT) + with pytest.raises(InvalidRecipeError, match="difficulty"): + recipe.resolve_config(override={"difficulty": "super_hard"}) + + +# --------------------------------------------------------------------------- +# Config resolution / precedence +# --------------------------------------------------------------------------- + + +def test_resolve_config_returns_generation_config() -> None: + recipe = Recipe.from_dict(VALID_DICT) + config = recipe.resolve_config() + assert isinstance(config, GenerationConfig) + + +def test_resolve_config_recipe_defaults_used() -> None: + """Layer 3: recipe default_population should flow into config.""" + recipe = Recipe.from_dict(VALID_DICT) + config = recipe.resolve_config() + assert config.n_accounts == 100 + assert config.n_contacts == 300 + assert config.n_leads == 500 + assert config.horizon_days == 90 + + +def test_resolve_config_explicit_kwargs_override_recipe() -> None: + """Layer 1: explicit kwargs win over recipe defaults.""" + recipe = Recipe.from_dict(VALID_DICT) + config = recipe.resolve_config(n_leads=9999, horizon_days=30) + assert config.n_leads == 9999 + assert config.horizon_days == 30 + # Non-overridden values still come from recipe + assert config.n_accounts == 100 + + +def test_resolve_config_override_dict_beats_recipe() -> None: + """Layer 2: override dict beats recipe defaults but loses to explicit kwargs.""" + recipe = Recipe.from_dict(VALID_DICT) + config = recipe.resolve_config( + override={"n_leads": 7777, "n_accounts": 50}, + n_leads=8888, # explicit kwargs win + ) + assert config.n_leads == 8888 + assert config.n_accounts == 50 # from override dict + + +def test_resolve_config_seed_propagates() -> None: + recipe = Recipe.from_dict(VALID_DICT) + config = recipe.resolve_config(seed=999) + assert config.seed == 999 + + +def test_resolve_config_override_dict_applies_seed_and_output_path() -> None: + """Layer 2: override dict should set seed / output_path when not explicitly passed.""" + recipe = Recipe.from_dict(VALID_DICT) + config = recipe.resolve_config(override={"seed": 1234, "output_path": "/tmp/override"}) + assert config.seed == 1234 + assert config.output_path == "/tmp/override" + + +def test_resolve_config_explicit_seed_beats_override_dict() -> None: + """Layer 1: explicit seed / output_path kwargs beat override dict.""" + recipe = Recipe.from_dict(VALID_DICT) + config = recipe.resolve_config( + override={"seed": 1234, "output_path": "/tmp/override"}, + seed=999, + output_path="/tmp/explicit", + ) + assert config.seed == 999 + assert config.output_path == "/tmp/explicit" + + +def test_resolve_config_override_dict_applies_exposure_and_difficulty() -> None: + """Layer 2: override dict sets exposure_mode / difficulty when not explicit.""" + recipe = Recipe.from_dict(VALID_DICT) + config = recipe.resolve_config( + override={"exposure_mode": "research_instructor", "difficulty": "intro"} + ) + assert config.exposure_mode == ExposureMode.research_instructor + assert config.difficulty == DifficultyProfile.intro + + +def test_resolve_config_explicit_exposure_and_difficulty_beat_override() -> None: + """Layer 1: explicit exposure_mode / difficulty kwargs beat override dict.""" + recipe = Recipe.from_dict(VALID_DICT) + config = recipe.resolve_config( + override={"exposure_mode": "research_instructor", "difficulty": "intro"}, + exposure_mode="student_public", + difficulty="advanced", + ) + assert config.exposure_mode == ExposureMode.student_public + assert config.difficulty == DifficultyProfile.advanced + + +def test_resolve_config_unsupported_mode_raises() -> None: + limited = {**VALID_DICT, "supported_modes": ["student_public"]} + recipe = Recipe.from_dict(limited) + with pytest.raises(InvalidRecipeError, match="not supported"): + recipe.resolve_config(exposure_mode="research_instructor") + + +def test_resolve_config_unsupported_difficulty_raises() -> None: + limited = {**VALID_DICT, "supported_difficulty": ["intermediate"]} + recipe = Recipe.from_dict(limited) + with pytest.raises(InvalidRecipeError, match="not supported"): + recipe.resolve_config(difficulty="advanced") + + +# --------------------------------------------------------------------------- +# Real recipe loading via registry +# --------------------------------------------------------------------------- + + +def test_real_recipe_loads_and_parses() -> None: + from leadforge.recipes.registry import load_recipe + + raw = load_recipe("b2b_saas_procurement_v1") + recipe = Recipe.from_dict(raw) + assert recipe.id == "b2b_saas_procurement_v1" + assert DifficultyProfile.intermediate in recipe.supported_difficulty + + +def test_real_recipe_narrative_loads() -> None: + from leadforge.recipes.registry import load_recipe + + recipe = Recipe.from_dict(load_recipe("b2b_saas_procurement_v1")) + narrative = recipe.load_narrative() + assert "company" in narrative + assert "personas" in narrative + + +def test_real_recipe_difficulty_profiles_load() -> None: + from leadforge.recipes.registry import load_recipe + + recipe = Recipe.from_dict(load_recipe("b2b_saas_procurement_v1")) + profiles = recipe.load_difficulty_profiles() + assert "intro" in profiles + assert "intermediate" in profiles + assert "advanced" in profiles diff --git a/tests/core/test_exceptions.py b/tests/core/test_exceptions.py index af777c4..ac76086 100644 --- a/tests/core/test_exceptions.py +++ b/tests/core/test_exceptions.py @@ -34,3 +34,52 @@ def test_exception_message_preserved() -> None: msg = "recipe 'foo' not found" exc = InvalidRecipeError(msg) assert str(exc) == msg + + +def test_invalid_config_error_on_string_count() -> None: + """__post_init__ must raise InvalidConfigError for non-int count fields.""" + from leadforge.core.exceptions import InvalidConfigError + from leadforge.core.models import GenerationConfig + + with pytest.raises(InvalidConfigError, match="n_leads"): + GenerationConfig(n_leads="five_thousand") # type: ignore[arg-type] + + +def test_invalid_config_error_on_bool_count() -> None: + """bool is an int subclass and must be explicitly rejected.""" + from leadforge.core.exceptions import InvalidConfigError + from leadforge.core.models import GenerationConfig + + with pytest.raises(InvalidConfigError, match="n_accounts"): + GenerationConfig(n_accounts=True) # type: ignore[arg-type] + + +def test_invalid_config_error_on_bad_exposure_mode() -> None: + """Invalid exposure_mode string must raise InvalidConfigError, not ValueError.""" + from leadforge.core.exceptions import InvalidConfigError + from leadforge.core.models import GenerationConfig + + with pytest.raises(InvalidConfigError, match="exposure_mode"): + GenerationConfig(exposure_mode="not_a_mode") # type: ignore[arg-type] + + +def test_invalid_config_error_on_bad_difficulty() -> None: + """Invalid difficulty string must raise InvalidConfigError, not ValueError.""" + from leadforge.core.exceptions import InvalidConfigError + from leadforge.core.models import GenerationConfig + + with pytest.raises(InvalidConfigError, match="difficulty"): + GenerationConfig(difficulty="super_hard") # type: ignore[arg-type] + + +def test_missing_sentinel_repr() -> None: + """_MISSING sentinel must have a readable repr for help() / docs.""" + from leadforge.core.sentinels import _MISSING + + assert repr(_MISSING) == "" + + +def test_missing_sentinel_is_singleton() -> None: + from leadforge.core.sentinels import _MISSING, _MissingType + + assert _MissingType() is _MISSING diff --git a/tests/core/test_hashing.py b/tests/core/test_hashing.py new file mode 100644 index 0000000..4547204 --- /dev/null +++ b/tests/core/test_hashing.py @@ -0,0 +1,38 @@ +"""Tests for leadforge.core.hashing.""" + +from leadforge.core.hashing import hash_config +from leadforge.core.models import GenerationConfig + + +def test_same_config_same_hash() -> None: + c1 = GenerationConfig(seed=42) + c2 = GenerationConfig(seed=42) + assert hash_config(c1) == hash_config(c2) + + +def test_different_seed_different_hash() -> None: + c1 = GenerationConfig(seed=42) + c2 = GenerationConfig(seed=99) + assert hash_config(c1) != hash_config(c2) + + +def test_hash_is_hex_string() -> None: + digest = hash_config(GenerationConfig()) + assert isinstance(digest, str) + assert len(digest) == 64 # SHA-256 → 32 bytes → 64 hex chars + int(digest, 16) # must be valid hex + + +def test_hash_stable_across_calls() -> None: + config = GenerationConfig(seed=7, n_leads=1000) + h1 = hash_config(config) + h2 = hash_config(config) + assert h1 == h2 + + +def test_different_exposure_mode_different_hash() -> None: + from leadforge.core.enums import ExposureMode + + c1 = GenerationConfig(exposure_mode=ExposureMode.student_public) + c2 = GenerationConfig(exposure_mode=ExposureMode.research_instructor) + assert hash_config(c1) != hash_config(c2) diff --git a/tests/core/test_rng.py b/tests/core/test_rng.py new file mode 100644 index 0000000..edfc6bb --- /dev/null +++ b/tests/core/test_rng.py @@ -0,0 +1,62 @@ +"""Tests for leadforge.core.rng.""" + +import pytest + +from leadforge.core.rng import RNGRoot + + +def test_rng_root_same_seed_same_sequence() -> None: + """The same seed must always produce the same sequence.""" + r1 = RNGRoot(42).child("accounts") + r2 = RNGRoot(42).child("accounts") + assert [r1.random() for _ in range(20)] == [r2.random() for _ in range(20)] + + +def test_rng_root_different_seeds_different_sequences() -> None: + r1 = RNGRoot(42).child("accounts") + r2 = RNGRoot(99).child("accounts") + seq1 = [r1.random() for _ in range(10)] + seq2 = [r2.random() for _ in range(10)] + assert seq1 != seq2 + + +def test_named_children_are_independent() -> None: + """Different child names must yield different sequences from the same root.""" + root = RNGRoot(42) + r1 = root.child("accounts") + r2 = root.child("contacts") + seq1 = [r1.random() for _ in range(10)] + seq2 = [r2.random() for _ in range(10)] + assert seq1 != seq2 + + +def test_child_reproducible_across_root_instances() -> None: + """child() must be deterministic — same root seed → same child sequence.""" + name = "leads" + seq1 = [RNGRoot(7).child(name).random() for _ in range(15)] + seq2 = [RNGRoot(7).child(name).random() for _ in range(15)] + assert seq1 == seq2 + + +def test_rng_root_seed_property() -> None: + root = RNGRoot(123) + assert root.seed == 123 + + +def test_rng_root_rejects_non_int_seed() -> None: + with pytest.raises(TypeError, match="seed must be an int"): + RNGRoot(3.14) # type: ignore[arg-type] + + +def test_rng_root_rejects_bool_seed() -> None: + with pytest.raises(TypeError, match="seed must be an int"): + RNGRoot(True) # type: ignore[arg-type] + + +def test_rng_root_rejects_negative_seed() -> None: + with pytest.raises(ValueError, match="non-negative"): + RNGRoot(-1) + + +def test_rng_root_repr() -> None: + assert repr(RNGRoot(42)) == "RNGRoot(seed=42)"