leadforge-dev · shaypal5 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026
diff --git a/.agent-plan.md b/.agent-plan.md
@@ -106,10 +106,10 @@ Datasets:
 - [x] `lead_scoring_intro/lead_scoring_intro_v6_instructor.csv` — 1000 rows × 21 cols (+ `__leakage__touches_post_snapshot_15_90`)
 
 Validation results:
-- [x] Baseline AUC: 0.627 (within [0.62, 0.90])
-- [x] GBM improvement: +0.022 over LR (5-seed average)
-- [x] Trap delta: mean 0.046, min 0.034 (both above thresholds)
-- [x] Value-aware uplift: +17.6% at K=25, +41.3% at K=50
+- [x] Baseline AUC: 0.667 (within [0.62, 0.90])
+- [x] GBM improvement: +0.016 over LR (5-seed average)
+- [x] Trap delta: mean 0.045, min 0.021 (both above thresholds)
+- [x] Value-aware uplift: +14.8% at K=25, +11.3% at K=50
 - [x] All mandatory checks pass
 
 Documentation + CI:
@@ -120,6 +120,18 @@ Documentation + CI:
 - [x] `tests/mechanisms/test_mechanisms.py` — 6 tests for `LatentDecayIntensity`
 - [x] All 737 tests pass; lint + format clean
 
+### Pipeline refactors (PR #34, closes #31 + #32)
+
+- [x] `leadforge/core/rng.py` — `numpy_child()` method on `RNGRoot` returning `np.random.RandomState`
+- [x] `leadforge/pipelines/build_v5.py` — functions accept `seed: int` instead of `rng: np.random.RandomState`; `warnings.warn()` replaces `print(..., file=sys.stderr)`
+- [x] `leadforge/pipelines/build_v6.py` — same RNG + warnings refactor
+- [x] `scripts/build_v5_snapshot.py` — passes `seed` to pipeline functions; keeps `print()` for CLI progress
+- [x] `scripts/build_v6_snapshot.py` — same
+- [x] `tests/core/test_rng.py` — 3 new tests for `numpy_child()`
+- [x] `tests/scripts/test_build_v5_snapshot.py` — `capsys` replaced with `pytest.warns()`; `seed=` kwarg
+- [x] `tests/scripts/test_build_v6_snapshot.py` — `seed=` kwarg throughout
+- [x] All 740 tests pass; lint + format clean
+
 ---
 
 ## Deferred Items

diff --git a/lead_scoring_intro/RELEASE_v6.md b/lead_scoring_intro/RELEASE_v6.md
@@ -102,37 +102,37 @@ Evaluated on a 70/30 stratified hold-out split (seed 42).
 
 | Metric | Value |
 |---|---|
-| ROC-AUC | 0.627 |
-| PR-AUC | 0.405 |
+| ROC-AUC | 0.667 |
+| PR-AUC | 0.429 |
 | Base rate | 30.0% |
-| Precision@25 | 0.480 (Lift: 1.60x) |
-| Precision@50 | 0.420 (Lift: 1.40x) |
+| Precision@25 | 0.520 (Lift: 1.73x) |
+| Precision@50 | 0.480 (Lift: 1.60x) |
 
 ### Tree model comparison (5-seed average)
 
 | Model | Mean AUC | vs LR |
 |---|---|---|
-| Logistic Regression | 0.658 | — |
-| GBM (100 trees) | 0.680 | +0.022 |
+| Logistic Regression | 0.627 | — |
+| GBM (100 trees) | 0.643 | +0.016 |
 
 GBM reliably outperforms LR due to nonlinear interactions in the DGP (latent trait interactions with engagement patterns).
 
 ### Value-aware ranking
 
 | K | By P(convert) | By expected value | Uplift |
 |---|---|---|---|
-| 25 | $884,208 | $1,039,434 | +17.6% |
-| 50 | $1,379,208 | $1,949,380 | +41.3% |
+| 25 | $1,203,430 | $1,380,990 | +14.8% |
+| 50 | $1,809,281 | $2,014,459 | +11.3% |
 
 ### Leakage trap evaluation (instructor dataset)
 
 | Metric | Value |
 |---|---|
 | Column | `__leakage__touches_post_snapshot_15_90` |
 | Seeds | 10 (42–51) |
-| Mean AUC delta | 0.0458 |
-| Min AUC delta | 0.0343 |
-| Max AUC delta | 0.0599 |
+| Mean AUC delta | 0.0453 |
+| Min AUC delta | 0.0214 |
+| Max AUC delta | 0.0696 |
 
 The trap is **causally grounded**: post-snapshot touches are higher for leads with higher latent intent/fit (the same traits that drive conversion). No label-noise injection is used.
 

diff --git a/lead_scoring_intro/lead_scoring_intro_v6.csv b/lead_scoring_intro/lead_scoring_intro_v6.csv
diff --git a/lead_scoring_intro/lead_scoring_intro_v6_instructor.csv b/lead_scoring_intro/lead_scoring_intro_v6_instructor.csv
diff --git a/leadforge/core/rng.py b/leadforge/core/rng.py
@@ -11,10 +11,19 @@
     # Each child is an independent random.Random with a deterministically
     # derived seed — re-creating from the same root seed always gives
     # the same sequence.
+
+    # For numpy/pandas operations:
+    np_rng = root.numpy_child("subsample")
 """
 
+from __future__ import annotations
+
 import hashlib
 import random
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import numpy as np
 
 
 class RNGRoot:
@@ -45,5 +54,27 @@ def child(self, name: str) -> random.Random:
         derived_seed = int.from_bytes(digest[:8], "little")
         return random.Random(derived_seed)  # noqa: S311
 
+    def numpy_child(self, name: str) -> np.random.RandomState:
+        """Return a deterministic ``np.random.RandomState`` for the named substream.
+
+        Uses the same SHA-256 hash derivation as ``child()`` but truncates to
+        4 bytes (``RandomState`` requires a seed in ``[0, 2**32)``).  This means
+        ``child("x")`` and ``numpy_child("x")`` produce **different** derived
+        seeds — they are independent substreams that happen to share a name.
+
+        .. note::
+
+            ``np.random.RandomState`` is legacy numpy API.  We use it here
+            because ``pd.DataFrame.sample(random_state=...)`` and the rest of
+            the pipeline code rely on it.  A migration to
+            ``np.random.Generator`` is tracked but out of scope for now.
+        """
+        import numpy as np  # lazy — keeps core/rng.py importable without numpy
+
+        digest = hashlib.sha256(f"{self._seed}:{name}".encode()).digest()
+        # RandomState seed must be in [0, 2**32); use 4 bytes.
+        derived_seed = int.from_bytes(digest[:4], "little")
+        return np.random.RandomState(derived_seed)  # noqa: NPY002
+
     def __repr__(self) -> str:
         return f"RNGRoot(seed={self._seed})"
diff --git a/leadforge/pipelines/build_v5.py b/leadforge/pipelines/build_v5.py
@@ -7,11 +7,13 @@
 
 from __future__ import annotations
 
-import sys
+import warnings
 
 import numpy as np
 import pandas as pd
 
+from leadforge.core.rng import RNGRoot
+
 __all__ = [
     "ACV_CAP",
     "ACV_FLOOR",
@@ -114,22 +116,29 @@ def rename_and_select(df: pd.DataFrame) -> pd.DataFrame:
 
 def subsample(
     df: pd.DataFrame,
-    rng: np.random.RandomState,
+    seed: int,
     n: int = SUBSAMPLE_N,
     target_rate: float = TARGET_RATE,
 ) -> pd.DataFrame:
     """Stratified subsample to n rows at target_rate conversion."""
+    rng = RNGRoot(seed).numpy_child("subsample")
     positives = df[df["converted"] == 1]
     negatives = df[df["converted"] == 0]
     n_pos = int(n * target_rate)
     n_neg = n - n_pos
 
     if len(positives) < n_pos:
-        print(f"WARNING: only {len(positives)} positives, need {n_pos}", file=sys.stderr)
+        warnings.warn(
+            f"only {len(positives)} positives available, need {n_pos}",
+            stacklevel=2,
+        )
         n_pos = len(positives)
         n_neg = n - n_pos
     if len(negatives) < n_neg:
-        print(f"WARNING: only {len(negatives)} negatives, need {n_neg}", file=sys.stderr)
+        warnings.warn(
+            f"only {len(negatives)} negatives available, need {n_neg}",
+            stacklevel=2,
+        )
         n_neg = len(negatives)
 
     pos_sample = positives.sample(n=n_pos, random_state=rng)
@@ -139,7 +148,7 @@ def subsample(
     )
 
 
-def inject_missingness(df: pd.DataFrame, rng: np.random.RandomState) -> pd.DataFrame:
+def inject_missingness(df: pd.DataFrame, seed: int) -> pd.DataFrame:
     """Apply structured missingness per the v5 contract.
 
     Conditional rates per source (overall per-column rate stays <10%):
@@ -148,6 +157,7 @@ def inject_missingness(df: pd.DataFrame, rng: np.random.RandomState) -> pd.DataF
     - days_since_last_touch: structural NaN (no touches) + 3% MCAR
     - days_since_first_touch: structural NaN (no touches) + 2% MCAR
     """
+    rng = RNGRoot(seed).numpy_child("missingness")
     df = df.copy()
     n = len(df)
 
@@ -176,14 +186,15 @@ def inject_missingness(df: pd.DataFrame, rng: np.random.RandomState) -> pd.DataF
     return df
 
 
-def boost_leakage_trap(df: pd.DataFrame, rng: np.random.RandomState) -> pd.DataFrame:
+def boost_leakage_trap(df: pd.DataFrame, seed: int) -> pd.DataFrame:
     """Amplify the leakage trap signal to ensure robust detectability.
 
     Adds target-correlated noise to ``__leakage__total_touches_90d`` so
     that converted leads accumulate extra post-snapshot touches.  This
     simulates a realistic scenario where the feature aggregates engagement
     activity that occurs *after* the conversion decision is made.
     """
+    rng = RNGRoot(seed).numpy_child("leakage_trap")
     df = df.copy()
     trap_col = "__leakage__total_touches_90d"
     n = len(df)

diff --git a/leadforge/pipelines/build_v6.py b/leadforge/pipelines/build_v6.py
@@ -16,11 +16,13 @@
 
 from __future__ import annotations
 
-import sys
+import warnings
 
 import numpy as np
 import pandas as pd
 
+from leadforge.core.rng import RNGRoot
+
 __all__ = [
     "ACV_CAP",
     "ACV_FLOOR",
@@ -113,7 +115,7 @@ def derive_features(df: pd.DataFrame) -> pd.DataFrame:
 
 def softcap_expected_acv(
     df: pd.DataFrame,
-    rng: np.random.RandomState,
+    seed: int,
     floor: float = ACV_FLOOR,
     cap: float = ACV_CAP,
 ) -> pd.DataFrame:
@@ -122,6 +124,7 @@ def softcap_expected_acv(
     Values below floor are clipped. Values above cap are pulled toward cap
     with additive noise so they cluster near the cap rather than pile at it.
     """
+    rng = RNGRoot(seed).numpy_child("softcap_acv")
     df = df.copy()
     acv = df["expected_acv"].copy()
 
@@ -148,12 +151,13 @@ def cap_expected_acv(df: pd.DataFrame) -> pd.DataFrame:
     return df
 
 
-def assign_acquisition_wave(df: pd.DataFrame, rng: np.random.RandomState) -> pd.DataFrame:
+def assign_acquisition_wave(df: pd.DataFrame, seed: int) -> pd.DataFrame:
     """Assign acquisition_wave (A, B, C) based on lead index position.
 
     Waves A/B/C are roughly chronological: first third = A, middle = B,
     last third = C. A small amount of noise is added at the boundaries.
     """
+    rng = RNGRoot(seed).numpy_child("acquisition_wave")
     df = df.copy()
     n = len(df)
     waves = np.empty(n, dtype=object)
@@ -222,22 +226,29 @@ def rename_and_select(
 
 def subsample(
     df: pd.DataFrame,
-    rng: np.random.RandomState,
+    seed: int,
     n: int = SUBSAMPLE_N,
     target_rate: float = TARGET_RATE,
 ) -> pd.DataFrame:
     """Stratified subsample to n rows at target_rate conversion."""
+    rng = RNGRoot(seed).numpy_child("subsample")
     positives = df[df["converted"] == 1]
     negatives = df[df["converted"] == 0]
     n_pos = int(n * target_rate)
     n_neg = n - n_pos
 
     if len(positives) < n_pos:
-        print(f"WARNING: only {len(positives)} positives, need {n_pos}", file=sys.stderr)
+        warnings.warn(
+            f"only {len(positives)} positives available, need {n_pos}",
+            stacklevel=2,
+        )
         n_pos = len(positives)
         n_neg = n - n_pos
     if len(negatives) < n_neg:
-        print(f"WARNING: only {len(negatives)} negatives, need {n_neg}", file=sys.stderr)
+        warnings.warn(
+            f"only {len(negatives)} negatives available, need {n_neg}",
+            stacklevel=2,
+        )
         n_neg = len(negatives)
 
     pos_sample = positives.sample(n=n_pos, random_state=rng)
@@ -247,7 +258,7 @@ def subsample(
     )
 
 
-def inject_missingness(df: pd.DataFrame, rng: np.random.RandomState) -> pd.DataFrame:
+def inject_missingness(df: pd.DataFrame, seed: int) -> pd.DataFrame:
     """Apply structured missingness per the v6 contract.
 
     Patterns:
@@ -258,6 +269,7 @@ def inject_missingness(df: pd.DataFrame, rng: np.random.RandomState) -> pd.DataF
     5. Structural + MCAR: days_since_first_touch — NaN when no touches + 2% MCAR
     6. MCAR: days_since_last_touch — additional 3% on top of structural
     """
+    rng = RNGRoot(seed).numpy_child("missingness")
     df = df.copy()
     n = len(df)
 

diff --git a/scripts/build_v5_snapshot.py b/scripts/build_v5_snapshot.py
@@ -18,7 +18,6 @@
 import sys
 from pathlib import Path
 
-import numpy as np
 import pandas as pd
 
 from leadforge.api.generator import Generator
@@ -59,8 +58,6 @@ def generate_bundle(seed: int = SEED, n_leads: int = N_LEADS) -> pd.DataFrame:
 
 def build_v5_dataset(seed: int = SEED) -> pd.DataFrame:
     """Full pipeline: generate → derive → cap ACV → rename → subsample → boost → missingness."""
-    rng = np.random.RandomState(seed)
-
     print("Generating bundle...", file=sys.stderr)
     snapshot = generate_bundle(seed=seed)
     conv = snapshot["converted_within_90_days"].mean()
@@ -74,14 +71,14 @@ def build_v5_dataset(seed: int = SEED) -> pd.DataFrame:
     df = rename_and_select(df)
 
     print("Subsampling...", file=sys.stderr)
-    df = subsample(df, rng)
+    df = subsample(df, seed)
     print(f"  Subsampled: {len(df)} rows, conversion={df['converted'].mean():.1%}", file=sys.stderr)
 
     print("Boosting leakage trap...", file=sys.stderr)
-    df = boost_leakage_trap(df, rng)
+    df = boost_leakage_trap(df, seed)
 
     print("Injecting missingness...", file=sys.stderr)
-    df = inject_missingness(df, rng)
+    df = inject_missingness(df, seed)
 
     return df
 

diff --git a/scripts/build_v6_snapshot.py b/scripts/build_v6_snapshot.py
@@ -22,7 +22,6 @@
 import sys
 from pathlib import Path
 
-import numpy as np
 import pandas as pd
 
 from leadforge.api.generator import Generator
@@ -66,8 +65,6 @@ def generate_bundle(seed: int = SEED, n_leads: int = N_LEADS):
 
 def build_v6_datasets(seed: int = SEED) -> tuple[pd.DataFrame, pd.DataFrame]:
     """Full pipeline: generate -> derive -> process -> split into student + instructor."""
-    rng = np.random.RandomState(seed)
-
     print("Generating bundle (with latent touch intensity)...", file=sys.stderr)
     snapshot, bundle = generate_bundle(seed=seed)
     conv = snapshot["converted_within_90_days"].mean()
@@ -87,22 +84,22 @@ def build_v6_datasets(seed: int = SEED) -> tuple[pd.DataFrame, pd.DataFrame]:
     snapshot[INSTRUCTOR_TRAP_COL] = trap_series.values
 
     df = derive_features(snapshot)
-    df = softcap_expected_acv(df, rng)
-    df = assign_acquisition_wave(df, rng)
+    df = softcap_expected_acv(df, seed)
+    df = assign_acquisition_wave(df, seed)
 
     # Rename and select (instructor first to keep trap column)
     df_instructor = rename_and_select(df, instructor=True)
 
     print("Subsampling...", file=sys.stderr)
-    df_instructor = subsample(df_instructor, rng)
+    df_instructor = subsample(df_instructor, seed)
     print(
         f"  Subsampled: {len(df_instructor)} rows, "
         f"conversion={df_instructor['converted'].mean():.1%}",
         file=sys.stderr,
     )
 
     print("Injecting missingness...", file=sys.stderr)
-    df_instructor = inject_missingness(df_instructor, rng)
+    df_instructor = inject_missingness(df_instructor, seed)
 
     # Student version: drop the trap column
     student_cols = [c for c in df_instructor.columns if not c.startswith("__leakage__")]