From 8b1b4623cd65d3498e1dc98b34a443071377fc8d Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Fri, 1 May 2026 09:52:05 +0300 Subject: [PATCH 1/3] feat: carry primary_task and label_window_days into WorldSpec for dataset card Add `primary_task` and `label_window_days` fields to `GenerationConfig` (with defaults preserving current behavior). Propagate through `Recipe.from_dict()`, `resolve_config()`, and `Generator.from_recipe()` so recipe YAML can override them. Update `render_dataset_card()` to read from `world_spec.config` instead of hard-coded string literals. Closes #6 Co-Authored-By: Claude Opus 4.6 --- leadforge/api/generator.py | 6 ++++ leadforge/api/recipes.py | 28 +++++++++++++++ leadforge/core/models.py | 7 ++++ leadforge/narrative/dataset_card.py | 6 ++-- tests/api/test_recipes.py | 51 ++++++++++++++++++++++++++++ tests/narrative/test_dataset_card.py | 21 ++++++++++++ 6 files changed, 116 insertions(+), 3 deletions(-) diff --git a/leadforge/api/generator.py b/leadforge/api/generator.py index 9ef87a4..59e259b 100644 --- a/leadforge/api/generator.py +++ b/leadforge/api/generator.py @@ -52,6 +52,8 @@ def from_recipe( n_contacts: int | None = None, n_leads: int | None = None, horizon_days: int | None = None, + primary_task: str | None = None, + label_window_days: int | None = None, output_path: str = _MISSING, # type: ignore[assignment] override: dict[str, Any] | None = None, ) -> Generator: @@ -69,6 +71,8 @@ def from_recipe( n_contacts: Override recipe default contact count. n_leads: Override recipe default lead count. horizon_days: Override recipe default simulation horizon. + primary_task: Override recipe default primary task name. + label_window_days: Override recipe default label window (days). output_path: Directory where the bundle will be saved. override: Optional dict of overrides (mirrors a ``--override`` file). Applied after recipe defaults but before explicit kwargs. @@ -96,6 +100,8 @@ def from_recipe( n_contacts=n_contacts, n_leads=n_leads, horizon_days=horizon_days, + primary_task=primary_task, + label_window_days=label_window_days, output_path=output_path, override=override, ) diff --git a/leadforge/api/recipes.py b/leadforge/api/recipes.py index 57e8358..8a39fb5 100644 --- a/leadforge/api/recipes.py +++ b/leadforge/api/recipes.py @@ -40,6 +40,7 @@ class Recipe: supported_difficulty: tuple[DifficultyProfile, ...] default_population: dict[str, int] horizon_days: int + label_window_days: int | None = None # ------------------------------------------------------------------ # # Construction @@ -93,6 +94,17 @@ def from_dict(cls, data: dict[str, Any]) -> Recipe: if horizon_days <= 0: raise InvalidRecipeError(f"'horizon_days' must be positive, got {horizon_days}") + label_window_days: int | None = None + raw_lwd = data.get("label_window_days") + if raw_lwd is not None: + if isinstance(raw_lwd, bool) or not isinstance(raw_lwd, int): + raise InvalidRecipeError( + f"'label_window_days' must be a positive int, got {type(raw_lwd).__name__!r}" + ) + if raw_lwd <= 0: + raise InvalidRecipeError(f"'label_window_days' must be positive, got {raw_lwd}") + label_window_days = raw_lwd + return cls( id=data["id"], title=data["title"], @@ -103,6 +115,7 @@ def from_dict(cls, data: dict[str, Any]) -> Recipe: supported_difficulty=supported_difficulty, default_population=dict(pop), horizon_days=horizon_days, + label_window_days=label_window_days, ) # ------------------------------------------------------------------ # @@ -119,6 +132,8 @@ def resolve_config( n_contacts: int | None = None, n_leads: int | None = None, horizon_days: int | None = None, + primary_task: str | None = None, + label_window_days: int | None = None, output_path: str = _MISSING, # type: ignore[assignment] override: dict[str, Any] | None = None, ) -> GenerationConfig: @@ -148,6 +163,8 @@ def resolve_config( "n_contacts": pkg["n_contacts"], "n_leads": pkg["n_leads"], "horizon_days": pkg["horizon_days"], + "primary_task": pkg["primary_task"], + "label_window_days": pkg["label_window_days"], } # Layer 3 — recipe defaults @@ -156,6 +173,9 @@ def resolve_config( if key in pop: resolved[key] = pop[key] resolved["horizon_days"] = self.horizon_days + resolved["primary_task"] = self.primary_task + if self.label_window_days is not None: + resolved["label_window_days"] = self.label_window_days # Layer 2 — override dict (beats recipe/package defaults) if override: @@ -164,6 +184,8 @@ def resolve_config( "n_contacts", "n_leads", "horizon_days", + "primary_task", + "label_window_days", "seed", "output_path", "exposure_mode", @@ -190,6 +212,10 @@ def resolve_config( resolved["n_leads"] = n_leads if horizon_days is not None: resolved["horizon_days"] = horizon_days + if primary_task is not None: + resolved["primary_task"] = primary_task + if label_window_days is not None: + resolved["label_window_days"] = label_window_days try: mode = ExposureMode(resolved["exposure_mode"]) @@ -226,6 +252,8 @@ def resolve_config( n_contacts=resolved["n_contacts"], n_leads=resolved["n_leads"], horizon_days=resolved["horizon_days"], + primary_task=resolved["primary_task"], + label_window_days=resolved["label_window_days"], output_path=resolved["output_path"], ) diff --git a/leadforge/core/models.py b/leadforge/core/models.py index cd0585b..955e96b 100644 --- a/leadforge/core/models.py +++ b/leadforge/core/models.py @@ -45,6 +45,8 @@ class GenerationConfig: n_contacts: int = 4200 n_leads: int = 5000 horizon_days: int = 90 + primary_task: str = "converted_within_90_days" + label_window_days: int = 90 output_path: str = "./out" package_version: str = field(default_factory=lambda: __version__) @@ -57,6 +59,11 @@ def __post_init__(self) -> None: _require_positive_int(self.n_contacts, "n_contacts") _require_positive_int(self.n_leads, "n_leads") _require_positive_int(self.horizon_days, "horizon_days") + _require_positive_int(self.label_window_days, "label_window_days") + if not isinstance(self.primary_task, str) or not self.primary_task: + raise InvalidConfigError( + f"primary_task must be a non-empty string, got {self.primary_task!r}" + ) # Coerce string enums supplied as plain strings if not isinstance(self.exposure_mode, ExposureMode): try: diff --git a/leadforge/narrative/dataset_card.py b/leadforge/narrative/dataset_card.py index aa61ffe..6e100d8 100644 --- a/leadforge/narrative/dataset_card.py +++ b/leadforge/narrative/dataset_card.py @@ -97,10 +97,10 @@ def render_dataset_card(world_spec: WorldSpec) -> str: lines += [ "## Primary task", "", - "**Task:** `converted_within_90_days`", + f"**Task:** `{cfg.primary_task}`", "", - "**Label definition:** A lead is considered converted if a `closed_won` event " - "is recorded within 90 days of the lead's snapshot anchor date. " + f"**Label definition:** A lead is considered converted if a `closed_won` event " + f"is recorded within {cfg.label_window_days} days of the lead's snapshot anchor date. " "The label is derived from simulated events — it is never sampled directly.", "", ] diff --git a/tests/api/test_recipes.py b/tests/api/test_recipes.py index d334d4b..1606650 100644 --- a/tests/api/test_recipes.py +++ b/tests/api/test_recipes.py @@ -208,6 +208,57 @@ def test_resolve_config_unsupported_difficulty_raises() -> None: recipe.resolve_config(difficulty="advanced") +def test_resolve_config_primary_task_defaults_from_recipe() -> None: + recipe = Recipe.from_dict(VALID_DICT) + cfg = recipe.resolve_config() + assert cfg.primary_task == "converted_within_90_days" + + +def test_resolve_config_primary_task_explicit_kwarg() -> None: + recipe = Recipe.from_dict(VALID_DICT) + cfg = recipe.resolve_config(primary_task="churned_within_60_days") + assert cfg.primary_task == "churned_within_60_days" + + +def test_resolve_config_label_window_days_defaults() -> None: + recipe = Recipe.from_dict(VALID_DICT) + cfg = recipe.resolve_config() + assert cfg.label_window_days == 90 + + +def test_resolve_config_label_window_days_from_recipe() -> None: + data = {**VALID_DICT, "label_window_days": 60} + recipe = Recipe.from_dict(data) + cfg = recipe.resolve_config() + assert cfg.label_window_days == 60 + + +def test_resolve_config_label_window_days_explicit_kwarg() -> None: + recipe = Recipe.from_dict(VALID_DICT) + cfg = recipe.resolve_config(label_window_days=30) + assert cfg.label_window_days == 30 + + +def test_resolve_config_override_dict_applies_task_fields() -> None: + recipe = Recipe.from_dict(VALID_DICT) + cfg = recipe.resolve_config( + override={"primary_task": "upsold_within_180_days", "label_window_days": 180} + ) + assert cfg.primary_task == "upsold_within_180_days" + assert cfg.label_window_days == 180 + + +def test_resolve_config_explicit_task_fields_beat_override() -> None: + recipe = Recipe.from_dict(VALID_DICT) + cfg = recipe.resolve_config( + primary_task="my_task", + label_window_days=45, + override={"primary_task": "other_task", "label_window_days": 120}, + ) + assert cfg.primary_task == "my_task" + assert cfg.label_window_days == 45 + + # --------------------------------------------------------------------------- # Real recipe loading via registry # --------------------------------------------------------------------------- diff --git a/tests/narrative/test_dataset_card.py b/tests/narrative/test_dataset_card.py index 104e46e..c4da70b 100644 --- a/tests/narrative/test_dataset_card.py +++ b/tests/narrative/test_dataset_card.py @@ -46,6 +46,27 @@ def test_card_contains_label_definition() -> None: assert "90 days" in card +def test_card_renders_custom_primary_task() -> None: + spec = _make_world_spec(primary_task="churned_within_60_days") + card = render_dataset_card(spec) + assert "`churned_within_60_days`" in card + assert "converted_within_90_days" not in card + + +def test_card_renders_custom_label_window_days() -> None: + spec = _make_world_spec(label_window_days=60) + card = render_dataset_card(spec) + assert "within 60 days" in card + assert "within 90 days" not in card + + +def test_card_renders_custom_task_and_window() -> None: + spec = _make_world_spec(primary_task="upgraded_within_30_days", label_window_days=30) + card = render_dataset_card(spec) + assert "`upgraded_within_30_days`" in card + assert "30 days" in card + + def test_card_contains_use_cases() -> None: card = render_dataset_card(_make_world_spec()) assert "use cases" in card.lower() From 1a06183569525b26a875ed596ba6a91762922bc7 Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Fri, 1 May 2026 09:52:18 +0300 Subject: [PATCH 2/3] docs: update .agent-plan.md for WorldSpec task fields (PR #36) Co-Authored-By: Claude Opus 4.6 --- .agent-plan.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.agent-plan.md b/.agent-plan.md index 1c039b8..4204152 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -120,6 +120,14 @@ Documentation + CI: - [x] `tests/mechanisms/test_mechanisms.py` — 6 tests for `LatentDecayIntensity` - [x] All 737 tests pass; lint + format clean +### WorldSpec task fields (PR #36, closes #6) + +- [x] `leadforge/core/models.py` — `primary_task: str` and `label_window_days: int` fields on `GenerationConfig` (defaults preserve current behavior) +- [x] `leadforge/api/recipes.py` — `label_window_days` optional field on `Recipe`; propagated through `from_dict()` and `resolve_config()` (all 4 precedence layers) +- [x] `leadforge/api/generator.py` — `primary_task` and `label_window_days` kwargs on `Generator.from_recipe()` +- [x] `leadforge/narrative/dataset_card.py` — renders task name and label window from `world_spec.config` instead of hard-coded literals +- [x] 10 new tests (3 dataset card + 7 config resolution); total 750 passing + ### Pipeline refactors (PR #34, closes #31 + #32) - [x] `leadforge/core/rng.py` — `numpy_child()` method on `RNGRoot` returning `np.random.RandomState` From 23352aea9f9a7357dac5931e576e7bf14fc3a0ce Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Fri, 1 May 2026 13:44:55 +0300 Subject: [PATCH 3/3] =?UTF-8?q?fix:=20address=20review=20feedback=20?= =?UTF-8?q?=E2=80=94=20tighten=20scope,=20add=20validation=20+=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review feedback addressed: - Remove primary_task/label_window_days as explicit kwargs from resolve_config() and Generator.from_recipe() — these fields are resolved from recipe YAML and override dict only, not casually overridable, since the generation pipeline doesn't yet support arbitrary task types (Copilot-1, Copilot-3, shaypal5 #1, #2) - Add label_window_days <= horizon_days validation in GenerationConfig.__post_init__ (Copilot-2, shaypal5 #3) - Add tests for invalid primary_task on GenerationConfig: empty string, non-string type (shaypal5 #6, pr-agent-context) - Add tests for invalid label_window_days on Recipe.from_dict: bool, non-positive, float (shaypal5 #7, pr-agent-context) - Add test for label_window_days > horizon_days rejection - Fix existing test using horizon_days=30 (now conflicts with default label_window_days=90) Co-Authored-By: Claude Opus 4.6 --- leadforge/api/generator.py | 6 --- leadforge/api/recipes.py | 6 --- leadforge/core/models.py | 5 +++ tests/api/test_recipes.py | 75 ++++++++++++++++++++++++-------------- 4 files changed, 53 insertions(+), 39 deletions(-) diff --git a/leadforge/api/generator.py b/leadforge/api/generator.py index 59e259b..9ef87a4 100644 --- a/leadforge/api/generator.py +++ b/leadforge/api/generator.py @@ -52,8 +52,6 @@ def from_recipe( n_contacts: int | None = None, n_leads: int | None = None, horizon_days: int | None = None, - primary_task: str | None = None, - label_window_days: int | None = None, output_path: str = _MISSING, # type: ignore[assignment] override: dict[str, Any] | None = None, ) -> Generator: @@ -71,8 +69,6 @@ def from_recipe( n_contacts: Override recipe default contact count. n_leads: Override recipe default lead count. horizon_days: Override recipe default simulation horizon. - primary_task: Override recipe default primary task name. - label_window_days: Override recipe default label window (days). output_path: Directory where the bundle will be saved. override: Optional dict of overrides (mirrors a ``--override`` file). Applied after recipe defaults but before explicit kwargs. @@ -100,8 +96,6 @@ def from_recipe( n_contacts=n_contacts, n_leads=n_leads, horizon_days=horizon_days, - primary_task=primary_task, - label_window_days=label_window_days, output_path=output_path, override=override, ) diff --git a/leadforge/api/recipes.py b/leadforge/api/recipes.py index 8a39fb5..2d98e1c 100644 --- a/leadforge/api/recipes.py +++ b/leadforge/api/recipes.py @@ -132,8 +132,6 @@ def resolve_config( n_contacts: int | None = None, n_leads: int | None = None, horizon_days: int | None = None, - primary_task: str | None = None, - label_window_days: int | None = None, output_path: str = _MISSING, # type: ignore[assignment] override: dict[str, Any] | None = None, ) -> GenerationConfig: @@ -212,10 +210,6 @@ def resolve_config( resolved["n_leads"] = n_leads if horizon_days is not None: resolved["horizon_days"] = horizon_days - if primary_task is not None: - resolved["primary_task"] = primary_task - if label_window_days is not None: - resolved["label_window_days"] = label_window_days try: mode = ExposureMode(resolved["exposure_mode"]) diff --git a/leadforge/core/models.py b/leadforge/core/models.py index 955e96b..2674821 100644 --- a/leadforge/core/models.py +++ b/leadforge/core/models.py @@ -64,6 +64,11 @@ def __post_init__(self) -> None: raise InvalidConfigError( f"primary_task must be a non-empty string, got {self.primary_task!r}" ) + if self.label_window_days > self.horizon_days: + raise InvalidConfigError( + f"label_window_days ({self.label_window_days}) must not exceed " + f"horizon_days ({self.horizon_days})" + ) # Coerce string enums supplied as plain strings if not isinstance(self.exposure_mode, ExposureMode): try: diff --git a/tests/api/test_recipes.py b/tests/api/test_recipes.py index 1606650..7e6ad10 100644 --- a/tests/api/test_recipes.py +++ b/tests/api/test_recipes.py @@ -4,7 +4,7 @@ from leadforge.api.recipes import Recipe from leadforge.core.enums import DifficultyProfile, ExposureMode -from leadforge.core.exceptions import InvalidRecipeError +from leadforge.core.exceptions import InvalidConfigError, InvalidRecipeError from leadforge.core.models import GenerationConfig # --------------------------------------------------------------------------- @@ -128,9 +128,9 @@ def test_resolve_config_recipe_defaults_used() -> None: def test_resolve_config_explicit_kwargs_override_recipe() -> None: """Layer 1: explicit kwargs win over recipe defaults.""" recipe = Recipe.from_dict(VALID_DICT) - config = recipe.resolve_config(n_leads=9999, horizon_days=30) + config = recipe.resolve_config(n_leads=9999, horizon_days=120) assert config.n_leads == 9999 - assert config.horizon_days == 30 + assert config.horizon_days == 120 # Non-overridden values still come from recipe assert config.n_accounts == 100 @@ -214,12 +214,6 @@ def test_resolve_config_primary_task_defaults_from_recipe() -> None: assert cfg.primary_task == "converted_within_90_days" -def test_resolve_config_primary_task_explicit_kwarg() -> None: - recipe = Recipe.from_dict(VALID_DICT) - cfg = recipe.resolve_config(primary_task="churned_within_60_days") - assert cfg.primary_task == "churned_within_60_days" - - def test_resolve_config_label_window_days_defaults() -> None: recipe = Recipe.from_dict(VALID_DICT) cfg = recipe.resolve_config() @@ -233,30 +227,57 @@ def test_resolve_config_label_window_days_from_recipe() -> None: assert cfg.label_window_days == 60 -def test_resolve_config_label_window_days_explicit_kwarg() -> None: - recipe = Recipe.from_dict(VALID_DICT) - cfg = recipe.resolve_config(label_window_days=30) - assert cfg.label_window_days == 30 - - def test_resolve_config_override_dict_applies_task_fields() -> None: recipe = Recipe.from_dict(VALID_DICT) cfg = recipe.resolve_config( - override={"primary_task": "upsold_within_180_days", "label_window_days": 180} + override={"primary_task": "upsold_within_90_days", "label_window_days": 60} ) - assert cfg.primary_task == "upsold_within_180_days" - assert cfg.label_window_days == 180 + assert cfg.primary_task == "upsold_within_90_days" + assert cfg.label_window_days == 60 -def test_resolve_config_explicit_task_fields_beat_override() -> None: - recipe = Recipe.from_dict(VALID_DICT) - cfg = recipe.resolve_config( - primary_task="my_task", - label_window_days=45, - override={"primary_task": "other_task", "label_window_days": 120}, - ) - assert cfg.primary_task == "my_task" - assert cfg.label_window_days == 45 +def test_from_dict_bool_label_window_days_raises() -> None: + """bool label_window_days must be rejected (True → 1 would pass silently).""" + bad = {**VALID_DICT, "label_window_days": True} + with pytest.raises(InvalidRecipeError, match="label_window_days"): + Recipe.from_dict(bad) + + +def test_from_dict_nonpositive_label_window_days_raises() -> None: + bad = {**VALID_DICT, "label_window_days": 0} + with pytest.raises(InvalidRecipeError, match="label_window_days"): + Recipe.from_dict(bad) + + +def test_from_dict_float_label_window_days_raises() -> None: + bad = {**VALID_DICT, "label_window_days": 30.5} + with pytest.raises(InvalidRecipeError, match="label_window_days"): + Recipe.from_dict(bad) + + +# --------------------------------------------------------------------------- +# GenerationConfig validation +# --------------------------------------------------------------------------- + + +def test_config_empty_primary_task_raises() -> None: + with pytest.raises(InvalidConfigError, match="primary_task"): + GenerationConfig(primary_task="") + + +def test_config_non_string_primary_task_raises() -> None: + with pytest.raises(InvalidConfigError, match="primary_task"): + GenerationConfig(primary_task=42) # type: ignore[arg-type] + + +def test_config_label_window_exceeds_horizon_raises() -> None: + with pytest.raises(InvalidConfigError, match="label_window_days.*must not exceed"): + GenerationConfig(horizon_days=30, label_window_days=90) + + +def test_config_label_window_equals_horizon_ok() -> None: + cfg = GenerationConfig(horizon_days=90, label_window_days=90) + assert cfg.label_window_days == 90 # ---------------------------------------------------------------------------