From bcc135eb57c451722c359a2fcff1a3b744216422 Mon Sep 17 00:00:00 2001
From: Shay Palachy <shaypal5@users.noreply.github.com>
Date: Fri, 1 May 2026 23:22:21 +0300
Subject: [PATCH 1/2] feat: generalize dataset card template prose for
 non-conversion tasks (#38)

Add label_description field to TaskManifest so the dataset card's primary
task section uses task-specific prose instead of hardcoded conversion
language. Thread TaskManifest from write_bundle() to render_dataset_card().
Default case (converted_within_90_days) produces equivalent output.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .agent-plan.md                       |  7 ++++
 leadforge/api/bundle.py              |  2 +-
 leadforge/narrative/dataset_card.py  | 19 +++++++--
 leadforge/schema/tasks.py            | 13 +++++++
 tests/narrative/test_dataset_card.py | 58 ++++++++++++++++++++++++++++
 tests/schema/test_tasks.py           |  1 +
 6 files changed, 95 insertions(+), 5 deletions(-)

diff --git a/.agent-plan.md b/.agent-plan.md
index ff3fae8..ccc118b 100644
--- a/.agent-plan.md
+++ b/.agent-plan.md
@@ -141,6 +141,13 @@ Documentation + CI:
 - [x] `leadforge/pipelines/build_v6.py` — same `label_column` kwarg on `rename_and_select()`
 - [x] 16 new tests: `task_manifest_for_config`, bundle layout, manifest keys, validation with custom task, pipeline rename with custom label column; total 773 passing
 
+### Generalize dataset card prose (PR #41, closes #38)
+
+- [x] `leadforge/schema/tasks.py` — `label_description` field added to `TaskManifest`; populated in `CONVERTED_WITHIN_90_DAYS` constant and `task_manifest_for_config()` factory
+- [x] `leadforge/narrative/dataset_card.py` — `render_dataset_card()` accepts optional `task_manifest` kwarg; uses `label_description` for primary task prose when provided, falls back to hardcoded conversion prose otherwise
+- [x] `leadforge/api/bundle.py` — threads `TaskManifest` from `write_bundle()` to `render_dataset_card()`
+- [x] 4 new tests: custom label description, default equivalence, no-manifest fallback, empty-description fallback; total 780 passing
+
 ### Parquet metadata row counts (PR #37, closes #17)
 
 - [x] `leadforge/validation/bundle_checks.py` — `_check_task_splits()` uses `pq.read_metadata().num_rows` instead of `pd.read_parquet()`; `_check_leakage()` uses `pq.read_schema().names` instead of `pd.read_parquet(columns=[])`
diff --git a/leadforge/api/bundle.py b/leadforge/api/bundle.py
index dbc3470..f4c97b4 100644
--- a/leadforge/api/bundle.py
+++ b/leadforge/api/bundle.py
@@ -80,7 +80,7 @@ def write_bundle(
     # ------------------------------------------------------------------
     # 3. Dataset card and feature dictionary
     # ------------------------------------------------------------------
-    (root / "dataset_card.md").write_text(render_dataset_card(bundle.spec))
+    (root / "dataset_card.md").write_text(render_dataset_card(bundle.spec, task_manifest=task))
     write_feature_dictionary(root / "feature_dictionary.csv")
 
     # ------------------------------------------------------------------
diff --git a/leadforge/narrative/dataset_card.py b/leadforge/narrative/dataset_card.py
index 6e100d8..f3bac81 100644
--- a/leadforge/narrative/dataset_card.py
+++ b/leadforge/narrative/dataset_card.py
@@ -10,9 +10,13 @@
 
 if TYPE_CHECKING:
     from leadforge.core.models import WorldSpec
+    from leadforge.schema.tasks import TaskManifest
 
 
-def render_dataset_card(world_spec: WorldSpec) -> str:
+def render_dataset_card(
+    world_spec: WorldSpec,
+    task_manifest: TaskManifest | None = None,
+) -> str:
     """Return a Markdown dataset card string for *world_spec*.
 
     Sections present at all milestones:
@@ -94,14 +98,21 @@ def render_dataset_card(world_spec: WorldSpec) -> str:
     # ------------------------------------------------------------------
     # Primary task
     # ------------------------------------------------------------------
+    if task_manifest is not None and task_manifest.label_description:
+        label_def = task_manifest.label_description
+    else:
+        label_def = (
+            f"A lead is considered converted if a `closed_won` event "
+            f"is recorded within {cfg.label_window_days} days of the lead's "
+            f"snapshot anchor date. The label is derived from simulated events "
+            f"— it is never sampled directly."
+        )
     lines += [
         "## Primary task",
         "",
         f"**Task:** `{cfg.primary_task}`",
         "",
-        f"**Label definition:** A lead is considered converted if a `closed_won` event "
-        f"is recorded within {cfg.label_window_days} days of the lead's snapshot anchor date. "
-        "The label is derived from simulated events — it is never sampled directly.",
+        f"**Label definition:** {label_def}",
         "",
     ]
 
diff --git a/leadforge/schema/tasks.py b/leadforge/schema/tasks.py
index 1560bda..595ca55 100644
--- a/leadforge/schema/tasks.py
+++ b/leadforge/schema/tasks.py
@@ -69,6 +69,7 @@ class TaskManifest:
     split: SplitSpec
     task_type: str = "binary_classification"
     description: str = ""
+    label_description: str = ""
 
     def to_dict(self) -> dict[str, object]:
         """Return a JSON-serializable representation."""
@@ -84,6 +85,7 @@ def to_dict(self) -> dict[str, object]:
                 "test": self.split.test,
             },
             "description": self.description,
+            "label_description": self.label_description,
         }
 
 
@@ -103,6 +105,11 @@ def to_dict(self) -> dict[str, object]:
         "of the snapshot anchor date. Label is event-derived — never sampled "
         "directly. All features are pre-anchor (leakage-free by construction)."
     ),
+    label_description=(
+        "A lead is considered converted if a `closed_won` event is recorded "
+        "within 90 days of the lead's snapshot anchor date. The label is "
+        "derived from simulated events — it is never sampled directly."
+    ),
 )
 
 
@@ -131,4 +138,10 @@ def task_manifest_for_config(
             f"event-derived — never sampled directly. All features are "
             f"pre-anchor (leakage-free by construction)."
         ),
+        label_description=(
+            f"A lead is considered converted if a `closed_won` event is recorded "
+            f"within {label_window_days} days of the lead's snapshot anchor date. "
+            f"The label is derived from simulated events — it is never sampled "
+            f"directly."
+        ),
     )
diff --git a/tests/narrative/test_dataset_card.py b/tests/narrative/test_dataset_card.py
index c4da70b..1da12a0 100644
--- a/tests/narrative/test_dataset_card.py
+++ b/tests/narrative/test_dataset_card.py
@@ -3,6 +3,7 @@
 from leadforge.api.generator import Generator
 from leadforge.core.models import GenerationConfig, WorldSpec
 from leadforge.narrative.dataset_card import render_dataset_card
+from leadforge.schema.tasks import SplitSpec, TaskManifest, task_manifest_for_config
 
 
 def _make_world_spec(**kwargs: object) -> WorldSpec:
@@ -131,3 +132,60 @@ def test_generator_world_spec_is_world_spec() -> None:
 
     gen = Generator.from_recipe("b2b_saas_procurement_v1")
     assert isinstance(gen.world_spec, WorldSpec)
+
+
+# ---------------------------------------------------------------------------
+# Task manifest threading (issue #38)
+# ---------------------------------------------------------------------------
+
+
+def test_card_uses_task_manifest_label_description() -> None:
+    """When a TaskManifest is provided, its label_description replaces hardcoded prose."""
+    spec = _make_world_spec(primary_task="churned_within_60_days", label_window_days=60)
+    task = TaskManifest(
+        task_id="churned_within_60_days",
+        label_column="churned_within_60_days",
+        label_window_days=60,
+        primary_table="leads",
+        split=SplitSpec(train=0.7, valid=0.15, test=0.15),
+        label_description=(
+            "A lead is considered churned if a `churn` event is recorded "
+            "within 60 days of the snapshot anchor date."
+        ),
+    )
+    card = render_dataset_card(spec, task_manifest=task)
+    assert "churned" in card
+    assert "churn" in card
+    assert "closed_won" not in card
+
+
+def test_card_default_task_manifest_matches_hardcoded() -> None:
+    """Default task manifest produces equivalent prose to the old hardcoded text."""
+    spec = _make_world_spec()
+    task = task_manifest_for_config()
+    card = render_dataset_card(spec, task_manifest=task)
+    assert "closed_won" in card
+    assert "90 days" in card
+
+
+def test_card_without_task_manifest_falls_back() -> None:
+    """Without a TaskManifest, the card falls back to the conversion prose."""
+    spec = _make_world_spec()
+    card = render_dataset_card(spec)
+    assert "closed_won" in card
+    assert "90 days" in card
+
+
+def test_card_task_manifest_empty_label_description_falls_back() -> None:
+    """A TaskManifest with empty label_description falls back to default prose."""
+    spec = _make_world_spec()
+    task = TaskManifest(
+        task_id="converted_within_90_days",
+        label_column="converted_within_90_days",
+        label_window_days=90,
+        primary_table="leads",
+        split=SplitSpec(train=0.7, valid=0.15, test=0.15),
+        label_description="",
+    )
+    card = render_dataset_card(spec, task_manifest=task)
+    assert "closed_won" in card
diff --git a/tests/schema/test_tasks.py b/tests/schema/test_tasks.py
index 4039a6b..e69376a 100644
--- a/tests/schema/test_tasks.py
+++ b/tests/schema/test_tasks.py
@@ -87,6 +87,7 @@ def test_task_manifest_to_dict_keys() -> None:
         "primary_table",
         "split",
         "description",
+        "label_description",
     }
     assert set(d.keys()) == expected
 

From d5f19ba461e2cf40a028698e8ab0d68bc7ad63a3 Mon Sep 17 00:00:00 2001
From: Shay Palachy <shaypal5@users.noreply.github.com>
Date: Fri, 1 May 2026 23:36:26 +0300
Subject: [PATCH 2/2] refactor: drop label_description, use description
 directly in dataset card

Address self-review findings:
- Remove label_description field (near-duplicate of description)
- Use TaskManifest.description directly as dataset card label definition
- Make task_manifest_for_config() produce generic prose for non-default tasks
  instead of hardcoding conversion-specific language for all tasks
- Use task-agnostic fallback when no TaskManifest is provided
- Update docstrings for TaskManifest and render_dataset_card()

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .agent-plan.md                       |  6 ++--
 leadforge/narrative/dataset_card.py  | 17 +++++----
 leadforge/schema/tasks.py            | 42 +++++++++++-----------
 tests/narrative/test_dataset_card.py | 54 +++++++++++++++++-----------
 tests/schema/test_tasks.py           |  1 -
 5 files changed, 68 insertions(+), 52 deletions(-)

diff --git a/.agent-plan.md b/.agent-plan.md
index ccc118b..7db8d38 100644
--- a/.agent-plan.md
+++ b/.agent-plan.md
@@ -143,10 +143,10 @@ Documentation + CI:
 
 ### Generalize dataset card prose (PR #41, closes #38)
 
-- [x] `leadforge/schema/tasks.py` — `label_description` field added to `TaskManifest`; populated in `CONVERTED_WITHIN_90_DAYS` constant and `task_manifest_for_config()` factory
-- [x] `leadforge/narrative/dataset_card.py` — `render_dataset_card()` accepts optional `task_manifest` kwarg; uses `label_description` for primary task prose when provided, falls back to hardcoded conversion prose otherwise
+- [x] `leadforge/schema/tasks.py` — `TaskManifest.description` rewritten for dataset-card use; `task_manifest_for_config()` produces task-specific prose (conversion-specific for default task, generic for others)
+- [x] `leadforge/narrative/dataset_card.py` — `render_dataset_card()` accepts optional `task_manifest` kwarg; uses `description` for primary task prose when provided, task-agnostic fallback otherwise
 - [x] `leadforge/api/bundle.py` — threads `TaskManifest` from `write_bundle()` to `render_dataset_card()`
-- [x] 4 new tests: custom label description, default equivalence, no-manifest fallback, empty-description fallback; total 780 passing
+- [x] 5 new tests: custom description, default conversion prose, generic fallback (no manifest), generic fallback (empty description), non-default task via factory; total 781 passing
 
 ### Parquet metadata row counts (PR #37, closes #17)
 
diff --git a/leadforge/narrative/dataset_card.py b/leadforge/narrative/dataset_card.py
index f3bac81..0e9d5b1 100644
--- a/leadforge/narrative/dataset_card.py
+++ b/leadforge/narrative/dataset_card.py
@@ -19,6 +19,12 @@ def render_dataset_card(
 ) -> str:
     """Return a Markdown dataset card string for *world_spec*.
 
+    Args:
+        world_spec: The world specification containing config and narrative.
+        task_manifest: Optional task manifest whose ``description`` is used
+            as the label definition prose.  When ``None`` or when
+            ``description`` is empty, a generic fallback is rendered.
+
     Sections present at all milestones:
     - Header (recipe id, version, seed, exposure mode)
     - Narrative summary (company, product, market, GTM)
@@ -98,14 +104,13 @@ def render_dataset_card(
     # ------------------------------------------------------------------
     # Primary task
     # ------------------------------------------------------------------
-    if task_manifest is not None and task_manifest.label_description:
-        label_def = task_manifest.label_description
+    if task_manifest is not None and task_manifest.description:
+        label_def = task_manifest.description
     else:
         label_def = (
-            f"A lead is considered converted if a `closed_won` event "
-            f"is recorded within {cfg.label_window_days} days of the lead's "
-            f"snapshot anchor date. The label is derived from simulated events "
-            f"— it is never sampled directly."
+            f"Binary label evaluated over a {cfg.label_window_days}-day window "
+            f"from the snapshot anchor date. The label is event-derived — never "
+            f"sampled directly."
         )
     lines += [
         "## Primary task",
diff --git a/leadforge/schema/tasks.py b/leadforge/schema/tasks.py
index 595ca55..b86bb34 100644
--- a/leadforge/schema/tasks.py
+++ b/leadforge/schema/tasks.py
@@ -54,12 +54,13 @@ class TaskManifest:
         label_column: Column name in the task Parquet files that holds the
             binary label.
         label_window_days: Number of days after the snapshot anchor date
-            within which a conversion event counts as positive.
+            within which the target event counts as positive.
         primary_table: The relational table the snapshot rows are derived
             from (usually ``"leads"``).
         split: Train/valid/test proportions.
         task_type: ML task type string (``"binary_classification"`` for v1).
-        description: Human-readable description of the task.
+        description: Human-readable description of the task, suitable for
+            display in dataset cards and documentation.
     """
 
     task_id: str
@@ -69,7 +70,6 @@ class TaskManifest:
     split: SplitSpec
     task_type: str = "binary_classification"
     description: str = ""
-    label_description: str = ""
 
     def to_dict(self) -> dict[str, object]:
         """Return a JSON-serializable representation."""
@@ -85,7 +85,6 @@ def to_dict(self) -> dict[str, object]:
                 "test": self.split.test,
             },
             "description": self.description,
-            "label_description": self.label_description,
         }
 
 
@@ -101,14 +100,10 @@ def to_dict(self) -> dict[str, object]:
     split=SplitSpec(train=0.7, valid=0.15, test=0.15),
     task_type="binary_classification",
     description=(
-        "Predict whether a lead converts (closed_won event) within 90 days "
-        "of the snapshot anchor date. Label is event-derived — never sampled "
-        "directly. All features are pre-anchor (leakage-free by construction)."
-    ),
-    label_description=(
         "A lead is considered converted if a `closed_won` event is recorded "
         "within 90 days of the lead's snapshot anchor date. The label is "
-        "derived from simulated events — it is never sampled directly."
+        "event-derived — never sampled directly. All features are pre-anchor "
+        "(leakage-free by construction)."
     ),
 )
 
@@ -128,20 +123,23 @@ def task_manifest_for_config(
             manifest key.
         label_window_days: Label observation window in days.
     """
+    if primary_task == CONVERTED_WITHIN_90_DAYS.task_id:
+        description = (
+            f"A lead is considered converted if a `closed_won` event is recorded "
+            f"within {label_window_days} days of the lead's snapshot anchor date. "
+            f"The label is event-derived — never sampled directly. All features "
+            f"are pre-anchor (leakage-free by construction)."
+        )
+    else:
+        description = (
+            f"Binary label `{primary_task}` evaluated over a "
+            f"{label_window_days}-day window from the snapshot anchor date. "
+            f"The label is event-derived — never sampled directly. All features "
+            f"are pre-anchor (leakage-free by construction)."
+        )
     return replace(
         CONVERTED_WITHIN_90_DAYS,
         task_id=primary_task,
         label_window_days=label_window_days,
-        description=(
-            f"Predict whether a lead converts (closed_won event) within "
-            f"{label_window_days} days of the snapshot anchor date. Label is "
-            f"event-derived — never sampled directly. All features are "
-            f"pre-anchor (leakage-free by construction)."
-        ),
-        label_description=(
-            f"A lead is considered converted if a `closed_won` event is recorded "
-            f"within {label_window_days} days of the lead's snapshot anchor date. "
-            f"The label is derived from simulated events — it is never sampled "
-            f"directly."
-        ),
+        description=description,
     )
diff --git a/tests/narrative/test_dataset_card.py b/tests/narrative/test_dataset_card.py
index 1da12a0..e607b49 100644
--- a/tests/narrative/test_dataset_card.py
+++ b/tests/narrative/test_dataset_card.py
@@ -42,30 +42,33 @@ def test_card_contains_primary_task() -> None:
 
 
 def test_card_contains_label_definition() -> None:
-    card = render_dataset_card(_make_world_spec())
+    task = task_manifest_for_config()
+    card = render_dataset_card(_make_world_spec(), task_manifest=task)
     assert "closed_won" in card
     assert "90 days" in card
 
 
 def test_card_renders_custom_primary_task() -> None:
     spec = _make_world_spec(primary_task="churned_within_60_days")
-    card = render_dataset_card(spec)
+    task = task_manifest_for_config("churned_within_60_days", 60)
+    card = render_dataset_card(spec, task_manifest=task)
     assert "`churned_within_60_days`" in card
     assert "converted_within_90_days" not in card
 
 
 def test_card_renders_custom_label_window_days() -> None:
     spec = _make_world_spec(label_window_days=60)
-    card = render_dataset_card(spec)
-    assert "within 60 days" in card
-    assert "within 90 days" not in card
+    task = task_manifest_for_config(label_window_days=60)
+    card = render_dataset_card(spec, task_manifest=task)
+    assert "60" in card
 
 
 def test_card_renders_custom_task_and_window() -> None:
     spec = _make_world_spec(primary_task="upgraded_within_30_days", label_window_days=30)
-    card = render_dataset_card(spec)
+    task = task_manifest_for_config("upgraded_within_30_days", 30)
+    card = render_dataset_card(spec, task_manifest=task)
     assert "`upgraded_within_30_days`" in card
-    assert "30 days" in card
+    assert "30" in card
 
 
 def test_card_contains_use_cases() -> None:
@@ -139,8 +142,8 @@ def test_generator_world_spec_is_world_spec() -> None:
 # ---------------------------------------------------------------------------
 
 
-def test_card_uses_task_manifest_label_description() -> None:
-    """When a TaskManifest is provided, its label_description replaces hardcoded prose."""
+def test_card_uses_task_manifest_description() -> None:
+    """When a TaskManifest is provided, its description replaces default prose."""
     spec = _make_world_spec(primary_task="churned_within_60_days", label_window_days=60)
     task = TaskManifest(
         task_id="churned_within_60_days",
@@ -148,7 +151,7 @@ def test_card_uses_task_manifest_label_description() -> None:
         label_window_days=60,
         primary_table="leads",
         split=SplitSpec(train=0.7, valid=0.15, test=0.15),
-        label_description=(
+        description=(
             "A lead is considered churned if a `churn` event is recorded "
             "within 60 days of the snapshot anchor date."
         ),
@@ -159,8 +162,8 @@ def test_card_uses_task_manifest_label_description() -> None:
     assert "closed_won" not in card
 
 
-def test_card_default_task_manifest_matches_hardcoded() -> None:
-    """Default task manifest produces equivalent prose to the old hardcoded text."""
+def test_card_default_task_manifest_has_conversion_prose() -> None:
+    """Default task manifest produces conversion-specific prose."""
     spec = _make_world_spec()
     task = task_manifest_for_config()
     card = render_dataset_card(spec, task_manifest=task)
@@ -168,16 +171,16 @@ def test_card_default_task_manifest_matches_hardcoded() -> None:
     assert "90 days" in card
 
 
-def test_card_without_task_manifest_falls_back() -> None:
-    """Without a TaskManifest, the card falls back to the conversion prose."""
+def test_card_without_task_manifest_uses_generic_fallback() -> None:
+    """Without a TaskManifest, the card uses a task-agnostic fallback."""
     spec = _make_world_spec()
     card = render_dataset_card(spec)
-    assert "closed_won" in card
-    assert "90 days" in card
+    assert "event-derived" in card
+    assert "closed_won" not in card
 
 
-def test_card_task_manifest_empty_label_description_falls_back() -> None:
-    """A TaskManifest with empty label_description falls back to default prose."""
+def test_card_task_manifest_empty_description_uses_generic_fallback() -> None:
+    """A TaskManifest with empty description falls back to generic prose."""
     spec = _make_world_spec()
     task = TaskManifest(
         task_id="converted_within_90_days",
@@ -185,7 +188,18 @@ def test_card_task_manifest_empty_label_description_falls_back() -> None:
         label_window_days=90,
         primary_table="leads",
         split=SplitSpec(train=0.7, valid=0.15, test=0.15),
-        label_description="",
+        description="",
     )
     card = render_dataset_card(spec, task_manifest=task)
-    assert "closed_won" in card
+    assert "event-derived" in card
+    assert "closed_won" not in card
+
+
+def test_card_non_default_task_via_factory_has_generic_prose() -> None:
+    """task_manifest_for_config with non-default task produces generic description."""
+    spec = _make_world_spec(primary_task="churned_within_60_days", label_window_days=60)
+    task = task_manifest_for_config("churned_within_60_days", 60)
+    card = render_dataset_card(spec, task_manifest=task)
+    assert "`churned_within_60_days`" in card
+    assert "60-day" in card
+    assert "closed_won" not in card
diff --git a/tests/schema/test_tasks.py b/tests/schema/test_tasks.py
index e69376a..4039a6b 100644
--- a/tests/schema/test_tasks.py
+++ b/tests/schema/test_tasks.py
@@ -87,7 +87,6 @@ def test_task_manifest_to_dict_keys() -> None:
         "primary_table",
         "split",
         "description",
-        "label_description",
     }
     assert set(d.keys()) == expected