From bcc135eb57c451722c359a2fcff1a3b744216422 Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Fri, 1 May 2026 23:22:21 +0300 Subject: [PATCH 1/2] feat: generalize dataset card template prose for non-conversion tasks (#38) Add label_description field to TaskManifest so the dataset card's primary task section uses task-specific prose instead of hardcoded conversion language. Thread TaskManifest from write_bundle() to render_dataset_card(). Default case (converted_within_90_days) produces equivalent output. Co-Authored-By: Claude Opus 4.6 --- .agent-plan.md | 7 ++++ leadforge/api/bundle.py | 2 +- leadforge/narrative/dataset_card.py | 19 +++++++-- leadforge/schema/tasks.py | 13 +++++++ tests/narrative/test_dataset_card.py | 58 ++++++++++++++++++++++++++++ tests/schema/test_tasks.py | 1 + 6 files changed, 95 insertions(+), 5 deletions(-) diff --git a/.agent-plan.md b/.agent-plan.md index ff3fae8..ccc118b 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -141,6 +141,13 @@ Documentation + CI: - [x] `leadforge/pipelines/build_v6.py` — same `label_column` kwarg on `rename_and_select()` - [x] 16 new tests: `task_manifest_for_config`, bundle layout, manifest keys, validation with custom task, pipeline rename with custom label column; total 773 passing +### Generalize dataset card prose (PR #41, closes #38) + +- [x] `leadforge/schema/tasks.py` — `label_description` field added to `TaskManifest`; populated in `CONVERTED_WITHIN_90_DAYS` constant and `task_manifest_for_config()` factory +- [x] `leadforge/narrative/dataset_card.py` — `render_dataset_card()` accepts optional `task_manifest` kwarg; uses `label_description` for primary task prose when provided, falls back to hardcoded conversion prose otherwise +- [x] `leadforge/api/bundle.py` — threads `TaskManifest` from `write_bundle()` to `render_dataset_card()` +- [x] 4 new tests: custom label description, default equivalence, no-manifest fallback, empty-description fallback; total 780 passing + ### Parquet metadata row counts (PR #37, closes #17) - [x] `leadforge/validation/bundle_checks.py` — `_check_task_splits()` uses `pq.read_metadata().num_rows` instead of `pd.read_parquet()`; `_check_leakage()` uses `pq.read_schema().names` instead of `pd.read_parquet(columns=[])` diff --git a/leadforge/api/bundle.py b/leadforge/api/bundle.py index dbc3470..f4c97b4 100644 --- a/leadforge/api/bundle.py +++ b/leadforge/api/bundle.py @@ -80,7 +80,7 @@ def write_bundle( # ------------------------------------------------------------------ # 3. Dataset card and feature dictionary # ------------------------------------------------------------------ - (root / "dataset_card.md").write_text(render_dataset_card(bundle.spec)) + (root / "dataset_card.md").write_text(render_dataset_card(bundle.spec, task_manifest=task)) write_feature_dictionary(root / "feature_dictionary.csv") # ------------------------------------------------------------------ diff --git a/leadforge/narrative/dataset_card.py b/leadforge/narrative/dataset_card.py index 6e100d8..f3bac81 100644 --- a/leadforge/narrative/dataset_card.py +++ b/leadforge/narrative/dataset_card.py @@ -10,9 +10,13 @@ if TYPE_CHECKING: from leadforge.core.models import WorldSpec + from leadforge.schema.tasks import TaskManifest -def render_dataset_card(world_spec: WorldSpec) -> str: +def render_dataset_card( + world_spec: WorldSpec, + task_manifest: TaskManifest | None = None, +) -> str: """Return a Markdown dataset card string for *world_spec*. Sections present at all milestones: @@ -94,14 +98,21 @@ def render_dataset_card(world_spec: WorldSpec) -> str: # ------------------------------------------------------------------ # Primary task # ------------------------------------------------------------------ + if task_manifest is not None and task_manifest.label_description: + label_def = task_manifest.label_description + else: + label_def = ( + f"A lead is considered converted if a `closed_won` event " + f"is recorded within {cfg.label_window_days} days of the lead's " + f"snapshot anchor date. The label is derived from simulated events " + f"— it is never sampled directly." + ) lines += [ "## Primary task", "", f"**Task:** `{cfg.primary_task}`", "", - f"**Label definition:** A lead is considered converted if a `closed_won` event " - f"is recorded within {cfg.label_window_days} days of the lead's snapshot anchor date. " - "The label is derived from simulated events — it is never sampled directly.", + f"**Label definition:** {label_def}", "", ] diff --git a/leadforge/schema/tasks.py b/leadforge/schema/tasks.py index 1560bda..595ca55 100644 --- a/leadforge/schema/tasks.py +++ b/leadforge/schema/tasks.py @@ -69,6 +69,7 @@ class TaskManifest: split: SplitSpec task_type: str = "binary_classification" description: str = "" + label_description: str = "" def to_dict(self) -> dict[str, object]: """Return a JSON-serializable representation.""" @@ -84,6 +85,7 @@ def to_dict(self) -> dict[str, object]: "test": self.split.test, }, "description": self.description, + "label_description": self.label_description, } @@ -103,6 +105,11 @@ def to_dict(self) -> dict[str, object]: "of the snapshot anchor date. Label is event-derived — never sampled " "directly. All features are pre-anchor (leakage-free by construction)." ), + label_description=( + "A lead is considered converted if a `closed_won` event is recorded " + "within 90 days of the lead's snapshot anchor date. The label is " + "derived from simulated events — it is never sampled directly." + ), ) @@ -131,4 +138,10 @@ def task_manifest_for_config( f"event-derived — never sampled directly. All features are " f"pre-anchor (leakage-free by construction)." ), + label_description=( + f"A lead is considered converted if a `closed_won` event is recorded " + f"within {label_window_days} days of the lead's snapshot anchor date. " + f"The label is derived from simulated events — it is never sampled " + f"directly." + ), ) diff --git a/tests/narrative/test_dataset_card.py b/tests/narrative/test_dataset_card.py index c4da70b..1da12a0 100644 --- a/tests/narrative/test_dataset_card.py +++ b/tests/narrative/test_dataset_card.py @@ -3,6 +3,7 @@ from leadforge.api.generator import Generator from leadforge.core.models import GenerationConfig, WorldSpec from leadforge.narrative.dataset_card import render_dataset_card +from leadforge.schema.tasks import SplitSpec, TaskManifest, task_manifest_for_config def _make_world_spec(**kwargs: object) -> WorldSpec: @@ -131,3 +132,60 @@ def test_generator_world_spec_is_world_spec() -> None: gen = Generator.from_recipe("b2b_saas_procurement_v1") assert isinstance(gen.world_spec, WorldSpec) + + +# --------------------------------------------------------------------------- +# Task manifest threading (issue #38) +# --------------------------------------------------------------------------- + + +def test_card_uses_task_manifest_label_description() -> None: + """When a TaskManifest is provided, its label_description replaces hardcoded prose.""" + spec = _make_world_spec(primary_task="churned_within_60_days", label_window_days=60) + task = TaskManifest( + task_id="churned_within_60_days", + label_column="churned_within_60_days", + label_window_days=60, + primary_table="leads", + split=SplitSpec(train=0.7, valid=0.15, test=0.15), + label_description=( + "A lead is considered churned if a `churn` event is recorded " + "within 60 days of the snapshot anchor date." + ), + ) + card = render_dataset_card(spec, task_manifest=task) + assert "churned" in card + assert "churn" in card + assert "closed_won" not in card + + +def test_card_default_task_manifest_matches_hardcoded() -> None: + """Default task manifest produces equivalent prose to the old hardcoded text.""" + spec = _make_world_spec() + task = task_manifest_for_config() + card = render_dataset_card(spec, task_manifest=task) + assert "closed_won" in card + assert "90 days" in card + + +def test_card_without_task_manifest_falls_back() -> None: + """Without a TaskManifest, the card falls back to the conversion prose.""" + spec = _make_world_spec() + card = render_dataset_card(spec) + assert "closed_won" in card + assert "90 days" in card + + +def test_card_task_manifest_empty_label_description_falls_back() -> None: + """A TaskManifest with empty label_description falls back to default prose.""" + spec = _make_world_spec() + task = TaskManifest( + task_id="converted_within_90_days", + label_column="converted_within_90_days", + label_window_days=90, + primary_table="leads", + split=SplitSpec(train=0.7, valid=0.15, test=0.15), + label_description="", + ) + card = render_dataset_card(spec, task_manifest=task) + assert "closed_won" in card diff --git a/tests/schema/test_tasks.py b/tests/schema/test_tasks.py index 4039a6b..e69376a 100644 --- a/tests/schema/test_tasks.py +++ b/tests/schema/test_tasks.py @@ -87,6 +87,7 @@ def test_task_manifest_to_dict_keys() -> None: "primary_table", "split", "description", + "label_description", } assert set(d.keys()) == expected From d5f19ba461e2cf40a028698e8ab0d68bc7ad63a3 Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Fri, 1 May 2026 23:36:26 +0300 Subject: [PATCH 2/2] refactor: drop label_description, use description directly in dataset card Address self-review findings: - Remove label_description field (near-duplicate of description) - Use TaskManifest.description directly as dataset card label definition - Make task_manifest_for_config() produce generic prose for non-default tasks instead of hardcoding conversion-specific language for all tasks - Use task-agnostic fallback when no TaskManifest is provided - Update docstrings for TaskManifest and render_dataset_card() Co-Authored-By: Claude Opus 4.6 --- .agent-plan.md | 6 ++-- leadforge/narrative/dataset_card.py | 17 +++++---- leadforge/schema/tasks.py | 42 +++++++++++----------- tests/narrative/test_dataset_card.py | 54 +++++++++++++++++----------- tests/schema/test_tasks.py | 1 - 5 files changed, 68 insertions(+), 52 deletions(-) diff --git a/.agent-plan.md b/.agent-plan.md index ccc118b..7db8d38 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -143,10 +143,10 @@ Documentation + CI: ### Generalize dataset card prose (PR #41, closes #38) -- [x] `leadforge/schema/tasks.py` — `label_description` field added to `TaskManifest`; populated in `CONVERTED_WITHIN_90_DAYS` constant and `task_manifest_for_config()` factory -- [x] `leadforge/narrative/dataset_card.py` — `render_dataset_card()` accepts optional `task_manifest` kwarg; uses `label_description` for primary task prose when provided, falls back to hardcoded conversion prose otherwise +- [x] `leadforge/schema/tasks.py` — `TaskManifest.description` rewritten for dataset-card use; `task_manifest_for_config()` produces task-specific prose (conversion-specific for default task, generic for others) +- [x] `leadforge/narrative/dataset_card.py` — `render_dataset_card()` accepts optional `task_manifest` kwarg; uses `description` for primary task prose when provided, task-agnostic fallback otherwise - [x] `leadforge/api/bundle.py` — threads `TaskManifest` from `write_bundle()` to `render_dataset_card()` -- [x] 4 new tests: custom label description, default equivalence, no-manifest fallback, empty-description fallback; total 780 passing +- [x] 5 new tests: custom description, default conversion prose, generic fallback (no manifest), generic fallback (empty description), non-default task via factory; total 781 passing ### Parquet metadata row counts (PR #37, closes #17) diff --git a/leadforge/narrative/dataset_card.py b/leadforge/narrative/dataset_card.py index f3bac81..0e9d5b1 100644 --- a/leadforge/narrative/dataset_card.py +++ b/leadforge/narrative/dataset_card.py @@ -19,6 +19,12 @@ def render_dataset_card( ) -> str: """Return a Markdown dataset card string for *world_spec*. + Args: + world_spec: The world specification containing config and narrative. + task_manifest: Optional task manifest whose ``description`` is used + as the label definition prose. When ``None`` or when + ``description`` is empty, a generic fallback is rendered. + Sections present at all milestones: - Header (recipe id, version, seed, exposure mode) - Narrative summary (company, product, market, GTM) @@ -98,14 +104,13 @@ def render_dataset_card( # ------------------------------------------------------------------ # Primary task # ------------------------------------------------------------------ - if task_manifest is not None and task_manifest.label_description: - label_def = task_manifest.label_description + if task_manifest is not None and task_manifest.description: + label_def = task_manifest.description else: label_def = ( - f"A lead is considered converted if a `closed_won` event " - f"is recorded within {cfg.label_window_days} days of the lead's " - f"snapshot anchor date. The label is derived from simulated events " - f"— it is never sampled directly." + f"Binary label evaluated over a {cfg.label_window_days}-day window " + f"from the snapshot anchor date. The label is event-derived — never " + f"sampled directly." ) lines += [ "## Primary task", diff --git a/leadforge/schema/tasks.py b/leadforge/schema/tasks.py index 595ca55..b86bb34 100644 --- a/leadforge/schema/tasks.py +++ b/leadforge/schema/tasks.py @@ -54,12 +54,13 @@ class TaskManifest: label_column: Column name in the task Parquet files that holds the binary label. label_window_days: Number of days after the snapshot anchor date - within which a conversion event counts as positive. + within which the target event counts as positive. primary_table: The relational table the snapshot rows are derived from (usually ``"leads"``). split: Train/valid/test proportions. task_type: ML task type string (``"binary_classification"`` for v1). - description: Human-readable description of the task. + description: Human-readable description of the task, suitable for + display in dataset cards and documentation. """ task_id: str @@ -69,7 +70,6 @@ class TaskManifest: split: SplitSpec task_type: str = "binary_classification" description: str = "" - label_description: str = "" def to_dict(self) -> dict[str, object]: """Return a JSON-serializable representation.""" @@ -85,7 +85,6 @@ def to_dict(self) -> dict[str, object]: "test": self.split.test, }, "description": self.description, - "label_description": self.label_description, } @@ -101,14 +100,10 @@ def to_dict(self) -> dict[str, object]: split=SplitSpec(train=0.7, valid=0.15, test=0.15), task_type="binary_classification", description=( - "Predict whether a lead converts (closed_won event) within 90 days " - "of the snapshot anchor date. Label is event-derived — never sampled " - "directly. All features are pre-anchor (leakage-free by construction)." - ), - label_description=( "A lead is considered converted if a `closed_won` event is recorded " "within 90 days of the lead's snapshot anchor date. The label is " - "derived from simulated events — it is never sampled directly." + "event-derived — never sampled directly. All features are pre-anchor " + "(leakage-free by construction)." ), ) @@ -128,20 +123,23 @@ def task_manifest_for_config( manifest key. label_window_days: Label observation window in days. """ + if primary_task == CONVERTED_WITHIN_90_DAYS.task_id: + description = ( + f"A lead is considered converted if a `closed_won` event is recorded " + f"within {label_window_days} days of the lead's snapshot anchor date. " + f"The label is event-derived — never sampled directly. All features " + f"are pre-anchor (leakage-free by construction)." + ) + else: + description = ( + f"Binary label `{primary_task}` evaluated over a " + f"{label_window_days}-day window from the snapshot anchor date. " + f"The label is event-derived — never sampled directly. All features " + f"are pre-anchor (leakage-free by construction)." + ) return replace( CONVERTED_WITHIN_90_DAYS, task_id=primary_task, label_window_days=label_window_days, - description=( - f"Predict whether a lead converts (closed_won event) within " - f"{label_window_days} days of the snapshot anchor date. Label is " - f"event-derived — never sampled directly. All features are " - f"pre-anchor (leakage-free by construction)." - ), - label_description=( - f"A lead is considered converted if a `closed_won` event is recorded " - f"within {label_window_days} days of the lead's snapshot anchor date. " - f"The label is derived from simulated events — it is never sampled " - f"directly." - ), + description=description, ) diff --git a/tests/narrative/test_dataset_card.py b/tests/narrative/test_dataset_card.py index 1da12a0..e607b49 100644 --- a/tests/narrative/test_dataset_card.py +++ b/tests/narrative/test_dataset_card.py @@ -42,30 +42,33 @@ def test_card_contains_primary_task() -> None: def test_card_contains_label_definition() -> None: - card = render_dataset_card(_make_world_spec()) + task = task_manifest_for_config() + card = render_dataset_card(_make_world_spec(), task_manifest=task) assert "closed_won" in card assert "90 days" in card def test_card_renders_custom_primary_task() -> None: spec = _make_world_spec(primary_task="churned_within_60_days") - card = render_dataset_card(spec) + task = task_manifest_for_config("churned_within_60_days", 60) + card = render_dataset_card(spec, task_manifest=task) assert "`churned_within_60_days`" in card assert "converted_within_90_days" not in card def test_card_renders_custom_label_window_days() -> None: spec = _make_world_spec(label_window_days=60) - card = render_dataset_card(spec) - assert "within 60 days" in card - assert "within 90 days" not in card + task = task_manifest_for_config(label_window_days=60) + card = render_dataset_card(spec, task_manifest=task) + assert "60" in card def test_card_renders_custom_task_and_window() -> None: spec = _make_world_spec(primary_task="upgraded_within_30_days", label_window_days=30) - card = render_dataset_card(spec) + task = task_manifest_for_config("upgraded_within_30_days", 30) + card = render_dataset_card(spec, task_manifest=task) assert "`upgraded_within_30_days`" in card - assert "30 days" in card + assert "30" in card def test_card_contains_use_cases() -> None: @@ -139,8 +142,8 @@ def test_generator_world_spec_is_world_spec() -> None: # --------------------------------------------------------------------------- -def test_card_uses_task_manifest_label_description() -> None: - """When a TaskManifest is provided, its label_description replaces hardcoded prose.""" +def test_card_uses_task_manifest_description() -> None: + """When a TaskManifest is provided, its description replaces default prose.""" spec = _make_world_spec(primary_task="churned_within_60_days", label_window_days=60) task = TaskManifest( task_id="churned_within_60_days", @@ -148,7 +151,7 @@ def test_card_uses_task_manifest_label_description() -> None: label_window_days=60, primary_table="leads", split=SplitSpec(train=0.7, valid=0.15, test=0.15), - label_description=( + description=( "A lead is considered churned if a `churn` event is recorded " "within 60 days of the snapshot anchor date." ), @@ -159,8 +162,8 @@ def test_card_uses_task_manifest_label_description() -> None: assert "closed_won" not in card -def test_card_default_task_manifest_matches_hardcoded() -> None: - """Default task manifest produces equivalent prose to the old hardcoded text.""" +def test_card_default_task_manifest_has_conversion_prose() -> None: + """Default task manifest produces conversion-specific prose.""" spec = _make_world_spec() task = task_manifest_for_config() card = render_dataset_card(spec, task_manifest=task) @@ -168,16 +171,16 @@ def test_card_default_task_manifest_matches_hardcoded() -> None: assert "90 days" in card -def test_card_without_task_manifest_falls_back() -> None: - """Without a TaskManifest, the card falls back to the conversion prose.""" +def test_card_without_task_manifest_uses_generic_fallback() -> None: + """Without a TaskManifest, the card uses a task-agnostic fallback.""" spec = _make_world_spec() card = render_dataset_card(spec) - assert "closed_won" in card - assert "90 days" in card + assert "event-derived" in card + assert "closed_won" not in card -def test_card_task_manifest_empty_label_description_falls_back() -> None: - """A TaskManifest with empty label_description falls back to default prose.""" +def test_card_task_manifest_empty_description_uses_generic_fallback() -> None: + """A TaskManifest with empty description falls back to generic prose.""" spec = _make_world_spec() task = TaskManifest( task_id="converted_within_90_days", @@ -185,7 +188,18 @@ def test_card_task_manifest_empty_label_description_falls_back() -> None: label_window_days=90, primary_table="leads", split=SplitSpec(train=0.7, valid=0.15, test=0.15), - label_description="", + description="", ) card = render_dataset_card(spec, task_manifest=task) - assert "closed_won" in card + assert "event-derived" in card + assert "closed_won" not in card + + +def test_card_non_default_task_via_factory_has_generic_prose() -> None: + """task_manifest_for_config with non-default task produces generic description.""" + spec = _make_world_spec(primary_task="churned_within_60_days", label_window_days=60) + task = task_manifest_for_config("churned_within_60_days", 60) + card = render_dataset_card(spec, task_manifest=task) + assert "`churned_within_60_days`" in card + assert "60-day" in card + assert "closed_won" not in card diff --git a/tests/schema/test_tasks.py b/tests/schema/test_tasks.py index e69376a..4039a6b 100644 --- a/tests/schema/test_tasks.py +++ b/tests/schema/test_tasks.py @@ -87,7 +87,6 @@ def test_task_manifest_to_dict_keys() -> None: "primary_table", "split", "description", - "label_description", } assert set(d.keys()) == expected