leadforge-dev · shaypal5 · May 4, 2026 · May 4, 2026 · May 4, 2026 · May 4, 2026
diff --git a/.agent-plan.md b/.agent-plan.md
@@ -45,6 +45,7 @@ First public dataset release: `leadforge-b2b-lead-scoring`. Three difficulty tie
 - [x] Update release/README.md — remove stale "Known limitations", add conversion rates to dataset summary
 - [x] Update release/HF_DATASET_CARD.md — add conversion rates to summary table
 - [x] Verify SHA-256 hash determinism (re-run build, compare hashes) — `scripts/verify_hash_determinism.py`; 73/73 files identical across two `build_public_release.py` runs (modulo `manifest.json`'s wall-clock `generation_timestamp`)
+- [x] Fix `current_stage` leakage in student_public bundles via exposure-layer redaction — `is_leakage_trap` flag distinguishes the pedagogical trap (`total_touches_all`) from true label leaks; `BundleFilter.redacted_columns` strips the latter; `validate_bundle()` enforces the invariant. 73/73 hash-determinism preserved.
 - [ ] Upload to Kaggle and HuggingFace
 - [ ] Announce
 
@@ -61,9 +62,19 @@ First public dataset release: `leadforge-b2b-lead-scoring`. Three difficulty tie
 - [x] Calibration across 20 seeds × 5 motif families: intro mean 43%, intermediate mean 22%, advanced mean 9%
 - [x] All 865 tests pass
 
-### Known issue: `current_stage` leakage at 90-day horizon
+### Resolved (partial): `current_stage` leakage at 90-day horizon
 
-The full bundle snapshot includes `current_stage` which at day 90 contains terminal stages (`closed_won`/`closed_lost`). This perfectly encodes the label. The flat CSV export drops it; the Parquet task splits retain it with documentation. A proper fix (windowed snapshot or column redaction in the exposure layer) is deferred.
+Deterministic leak fixed via exposure-layer redaction. `FeatureSpec` now carries an explicit `redact_in_modes: frozenset[ExposureMode]` field — *prescriptive* — alongside the descriptive `leakage_risk` flag. `current_stage` is marked `redact_in_modes={ExposureMode.student_public}`; the writer queries `redacted_columns_for(mode)` and strips matching columns from the snapshot, task splits, and feature dictionary before they hit disk. The pedagogical trap `total_touches_all` is preserved in all modes (no entry in `redact_in_modes`). The manifest records `redacted_columns: [...]` so the bundle is self-describing. `validate_bundle()` cross-checks parquet schemas, feature dictionary, and the manifest's declared redaction set against `redacted_columns_for(mode)` derived independently from the feature spec. Hash-determinism preserved (73/73 identical across builds).
+
+### Follow-up: structural leakage in `student_public` bundles (open)
+
+Stripping `current_stage` addresses the deterministic label-encoding leak but does **not** make the released bundle structurally leakage-free. Three concerns to address in a follow-up PR:
+
+1. **Event-aggregate features are computed over the label window.** `touch_count`, `session_count`, `pricing_page_views`, `expected_acv`, `days_since_last_touch`, etc. all aggregate events in `[lead_created_at, lead_created_at + 90d]`, the same window over which the label resolves. They correlate with post-conversion activity. The structural fix is a windowed snapshot (`snapshot_day=N` with `N < label_window_days`), as v6/v7 datasets already do at day 14/20. This shifts every feature value and every conversion rate in the release bundles, so it's deferred to its own PR with a coordinated documentation update.
+2. **`is_sql=False` is near-deterministic for non-conversion.** Measured on the regenerated bundle: P(converted | is_sql=False) = 0.038 (intro), 0.015 (intermediate), 0.006 (advanced). At advanced tier it effectively encodes the negative class. Either redact `is_sql` in `student_public` (probably correct) or accept it as a strong feature with documentation. Decide alongside #1.
+3. **`is_mql` is a constant `True`.** Zero variance feature in all three tiers. Should be removed from the snapshot or, if it can ever be False under some recipe, the simulator should produce that variance.
+
+Suggested action: open one tracked GitHub issue covering all three (currently no issue exists; user has standing instruction not to file without confirmation).
 
 ---
 

diff --git a/leadforge/api/bundle.py b/leadforge/api/bundle.py
@@ -23,6 +23,7 @@
 from leadforge.render.snapshots import build_snapshot
 from leadforge.render.tasks import write_task_splits
 from leadforge.schema.dictionaries import write_feature_dictionary
+from leadforge.schema.features import LEAD_SNAPSHOT_FEATURES, redacted_columns_for
 from leadforge.schema.tables import write_parquet
 from leadforge.schema.tasks import task_manifest_for_config
 
@@ -72,6 +73,12 @@ def write_bundle(
 
     # ------------------------------------------------------------------
     # 2. Snapshot + task splits → tasks/
+    #
+    # Apply exposure-mode redaction here (rather than in apply_exposure)
+    # so that the manifest's per-file SHA-256 hashes reflect the published
+    # column set without a post-write rewrite step.  The redacted column
+    # set is derived from the canonical feature spec — the same source
+    # of truth the validator uses to check bundles.
     # ------------------------------------------------------------------
     snapshot = build_snapshot(
         result,
@@ -80,16 +87,28 @@ def write_bundle(
         difficulty_params=config.difficulty_params,
         seed=config.seed,
     )
+    redacted = redacted_columns_for(config.exposure_mode)
+    if redacted:
+        drop_cols = [c for c in redacted if c in snapshot.columns]
+        if drop_cols:
+            snapshot = snapshot.drop(columns=drop_cols)
+    visible_features = tuple(f for f in LEAD_SNAPSHOT_FEATURES if f.name not in redacted)
+
     task = task_manifest_for_config(config.primary_task, config.label_window_days)
     task_row_counts = write_task_splits(snapshot, root / "tasks", seed=config.seed, task=task)
 
     # ------------------------------------------------------------------
     # 3. Dataset card and feature dictionary
     # ------------------------------------------------------------------
     (root / "dataset_card.md").write_text(
-        render_dataset_card(bundle.spec, task_manifest=task, table_counts=table_row_counts)
+        render_dataset_card(
+            bundle.spec,
+            task_manifest=task,
+            table_counts=table_row_counts,
+            features=visible_features,
+        )
     )
-    write_feature_dictionary(root / "feature_dictionary.csv")
+    write_feature_dictionary(root / "feature_dictionary.csv", features=visible_features)
 
     # ------------------------------------------------------------------
     # 4. Exposure metadata (research_instructor only)
@@ -106,5 +125,6 @@ def write_bundle(
         task_row_counts={task.task_id: task_row_counts},
         bundle_root=root,
         generation_timestamp=generation_timestamp,
+        redacted_columns=sorted(redacted),
     )
     write_manifest(manifest, root)
diff --git a/leadforge/exposure/filters.py b/leadforge/exposure/filters.py
@@ -4,6 +4,12 @@
 :class:`BundleFilter` that governs which artefacts are written when
 :func:`~leadforge.api.bundle.write_bundle` produces an output bundle.
 
+The per-feature redaction policy lives separately on
+:attr:`leadforge.schema.features.FeatureSpec.redact_in_modes` and is queried
+via :func:`leadforge.schema.features.redacted_columns_for`.  ``BundleFilter``
+deliberately does *not* duplicate that information so that the writer and
+the validator both consult the same source of truth.
+
 Adding a new mode: define its ``BundleFilter`` entry in ``FILTERS``.
 """
 
@@ -16,7 +22,7 @@
 
 @dataclass(frozen=True)
 class BundleFilter:
-    """Rules that govern bundle publication for one :class:`ExposureMode`.
+    """Mode-level publication policy.
 
     Attributes:
         write_metadata: Whether to create ``metadata/`` with hidden-truth

diff --git a/leadforge/narrative/dataset_card.py b/leadforge/narrative/dataset_card.py
@@ -9,7 +9,7 @@
 from collections import Counter
 from typing import TYPE_CHECKING
 
-from leadforge.schema.features import LEAD_SNAPSHOT_FEATURES
+from leadforge.schema.features import LEAD_SNAPSHOT_FEATURES, FeatureSpec
 
 if TYPE_CHECKING:
     from leadforge.core.models import WorldSpec
@@ -20,6 +20,7 @@ def render_dataset_card(
     world_spec: WorldSpec,
     task_manifest: TaskManifest | None = None,
     table_counts: dict[str, int] | None = None,
+    features: tuple[FeatureSpec, ...] = LEAD_SNAPSHOT_FEATURES,
 ) -> str:
     """Return a Markdown dataset card string for *world_spec*.
 
@@ -31,6 +32,10 @@ def render_dataset_card(
         table_counts: Optional mapping of table name → row count.  When
             provided, the table inventory section renders actual counts
             instead of a placeholder.
+        features: Feature spec tuple to render in the categories / leakage
+            sections.  Defaults to the canonical list; pass the redacted
+            tuple when rendering an exposure-filtered bundle so the card
+            describes only what is actually present.
 
     Sections:
     - Header (recipe id, version, seed, exposure mode)
@@ -149,18 +154,16 @@ def render_dataset_card(
     # ------------------------------------------------------------------
     lines += ["## Feature categories", ""]
     category_counts: Counter[str] = Counter()
-    for feat in LEAD_SNAPSHOT_FEATURES:
+    for feat in features:
         category_counts[feat.category] += 1
     lines += [
         "| Category | Count | Examples |",
         "|---|---:|---|",
     ]
     for cat, count in category_counts.items():
-        examples = [
-            f.name for f in LEAD_SNAPSHOT_FEATURES if f.category == cat and not f.is_target
-        ][:3]
+        examples = [f.name for f in features if f.category == cat and not f.is_target][:3]
         lines.append(f"| {cat} | {count} | {', '.join(examples)} |")
-    leakage_cols = [f.name for f in LEAD_SNAPSHOT_FEATURES if f.leakage_risk]
+    leakage_cols = [f.name for f in features if f.leakage_risk]
     if leakage_cols:
         lines += [
             "",

diff --git a/leadforge/render/manifests.py b/leadforge/render/manifests.py
@@ -35,6 +35,7 @@ def build_manifest(
     task_row_counts: dict[str, dict[str, int]],
     bundle_root: Path,
     generation_timestamp: str | None = None,
+    redacted_columns: list[str] | None = None,
 ) -> dict[str, Any]:
     """Build the bundle manifest dict.
 
@@ -49,13 +50,20 @@ def build_manifest(
         task_row_counts: Mapping of task_id → {split_name → row count}.
         bundle_root: Root directory of the written bundle.
         generation_timestamp: ISO-8601 UTC timestamp string.  Defaults to now.
+        redacted_columns: Sorted list of column names that the bundle writer
+            removed from snapshot / task splits / feature dictionary for
+            this exposure mode.  Recorded in the manifest so consumers
+            (and the validator) can audit redaction without inspecting
+            package internals.  Defaults to ``[]`` (nothing redacted).
 
     Returns:
         A JSON-serialisable dict ready to be written as ``manifest.json``.
     """
     if generation_timestamp is None:
         generation_timestamp = datetime.now(UTC).isoformat(timespec="seconds")
 
+    redacted_columns_list = sorted(redacted_columns) if redacted_columns else []
+
     # Build table entries with row counts and file hashes.
     tables: dict[str, Any] = {}
     for table_name, row_count in table_row_counts.items():
@@ -91,6 +99,7 @@ def build_manifest(
         "primary_task": config.primary_task,
         "label_window_days": config.label_window_days,
         "motif_family": world_graph.motif_family,
+        "redacted_columns": redacted_columns_list,
         "tables": tables,
         "tasks": tasks,
     }

diff --git a/leadforge/schema/dictionaries.py b/leadforge/schema/dictionaries.py
@@ -23,6 +23,13 @@ def feature_dictionary_df(
 
     Columns: name, dtype, description, category, is_target, leakage_risk.
 
+    The redaction policy (``FeatureSpec.redact_in_modes``) is intentionally
+    *not* serialised here: it is package-internal state, and which columns
+    a given bundle actually published is observable from the bundle's
+    schema and from ``manifest.redacted_columns``.  Keeping this CSV's
+    column set stable preserves backward compatibility with downstream
+    consumers that parse it strictly.
+
     Args:
         features: Ordered tuple of :class:`~leadforge.schema.features.FeatureSpec`
             objects.  Defaults to the canonical lead snapshot feature list.

diff --git a/leadforge/schema/features.py b/leadforge/schema/features.py
@@ -8,13 +8,31 @@
 
 from __future__ import annotations
 
-from dataclasses import dataclass
+from dataclasses import dataclass, field
+
+from leadforge.core.enums import ExposureMode
 
 
 @dataclass(frozen=True)
 class FeatureSpec:
     """Metadata for one column in the lead snapshot table.
 
+    Two concerns are kept deliberately separate:
+
+    - :attr:`leakage_risk` is *descriptive*: the value of this column is
+      computed from events that may post-date the snapshot anchor and so
+      correlates with the label.  It is informational metadata for
+      downstream consumers and is preserved in the published feature
+      dictionary.
+    - :attr:`redact_in_modes` is *prescriptive*: the bundle writer must
+      strip this column from any export whose mode is in this set.
+
+    These can disagree: ``total_touches_all`` is ``leakage_risk=True``
+    (it does encode post-snapshot information) but
+    ``redact_in_modes=frozenset()`` (it is deliberately retained as a
+    pedagogical trap).  Conversely a recipe could redact a column that
+    is not itself leakage-risky for unrelated policy reasons.
+
     Attributes:
         name: Column name as it appears in the Parquet file.
         dtype: Pandas-compatible dtype string (``"string"``, ``"Int64"``,
@@ -23,8 +41,10 @@ class FeatureSpec:
         category: Logical grouping (``"account"``, ``"contact"``,
             ``"lead_meta"``, ``"engagement"``, ``"sales"``, ``"target"``).
         is_target: True for the label column only.
-        leakage_risk: True if the column could contain post-snapshot-anchor
-            information and must be excluded from student_public exports.
+        leakage_risk: Descriptive — this column is post-snapshot correlated.
+        redact_in_modes: Prescriptive — exposure modes in which the
+            bundle writer must strip this column from snapshot, task
+            splits, and feature dictionary.
     """
 
     name: str
@@ -33,6 +53,7 @@ class FeatureSpec:
     category: str
     is_target: bool = False
     leakage_risk: bool = False
+    redact_in_modes: frozenset[ExposureMode] = field(default_factory=frozenset)
 
 
 # ---------------------------------------------------------------------------
@@ -122,6 +143,7 @@ class FeatureSpec:
         "a windowed snapshot.",
         "lead_meta",
         leakage_risk=True,
+        redact_in_modes=frozenset({ExposureMode.student_public}),
     ),
     FeatureSpec(
         "is_mql",
@@ -235,7 +257,7 @@ class FeatureSpec:
         "revenue band midpoint heuristic (NaN if neither available).",
         "sales",
     ),
-    # -- Leakage trap --
+    # -- Pedagogical leakage trap (deliberately retained in all modes) --
     FeatureSpec(
         "total_touches_all",
         "Int64",
@@ -254,3 +276,23 @@ class FeatureSpec:
         is_target=True,
     ),
 )
+
+
+def redacted_columns_for(
+    mode: ExposureMode,
+    features: tuple[FeatureSpec, ...] = LEAD_SNAPSHOT_FEATURES,
+) -> frozenset[str]:
+    """Return the set of column names that must be stripped from *mode* exports.
+
+    The redaction policy is encoded per-feature in
+    :attr:`FeatureSpec.redact_in_modes`.  Callers (the bundle writer, the
+    validation check) all derive their answer from this single function, so
+    a single source of truth governs both producing and verifying bundles.
+
+    Args:
+        mode: The exposure mode being published.
+        features: Feature spec tuple to consult.  Defaults to the canonical
+            lead snapshot list; callable with a custom tuple for tests or
+            future per-recipe feature sets.
+    """
+    return frozenset(f.name for f in features if mode in f.redact_in_modes)