diff --git a/.agent-plan.md b/.agent-plan.md index da85796..2ad8593 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -105,11 +105,11 @@ Datasets: - [x] `lead_scoring_intro/lead_scoring_intro_v6.csv` — 1000 rows × 20 cols (student-safe, no leakage) - [x] `lead_scoring_intro/lead_scoring_intro_v6_instructor.csv` — 1000 rows × 21 cols (+ `__leakage__touches_post_snapshot_15_90`) -Validation results: -- [x] Baseline AUC: 0.667 (within [0.62, 0.90]) -- [x] GBM improvement: +0.016 over LR (5-seed average) -- [x] Trap delta: mean 0.045, min 0.021 (both above thresholds) -- [x] Value-aware uplift: +14.8% at K=25, +11.3% at K=50 +Validation results (post-engine-change retune): +- [x] Baseline AUC: 0.676 (within [0.62, 0.90]; snapshot day shifted 14→20 to restore signal) +- [x] GBM improvement: +0.070 over LR (5-seed average) +- [x] Trap delta: mean 0.061, min 0.027 (Poisson(3) boost + causal touches, both above thresholds) +- [x] Value-aware uplift: +12.2% at K=25, +15.7% at K=50 - [x] All mandatory checks pass Documentation + CI: diff --git a/lead_scoring_intro/RELEASE_v6.md b/lead_scoring_intro/RELEASE_v6.md index 7da64f2..2301adf 100644 --- a/lead_scoring_intro/RELEASE_v6.md +++ b/lead_scoring_intro/RELEASE_v6.md @@ -8,8 +8,8 @@ v6 is the sixth iteration of the lead scoring intro dataset, designed for **3– | Change | v5 | v6 | |---|---|---| -| Snapshot day | 10 | **14** | -| Leakage trap | Label-noise boost (`total_touches_90d`) | **Causal** post-snapshot touches (days 15–90) | +| Snapshot day | 10 | **20** | +| Leakage trap | Label-noise boost (`total_touches_90d`) | **Causal** post-snapshot touches (days 21–90) + Poisson(3) boost | | Student/instructor split | Single file | **Two files**: student-safe + instructor | | Momentum features | `touches_week_1`, `days_since_first_touch` | + **`touches_last_7_days`** | | Cohort feature | — | **`acquisition_wave`** (A/B/C) | @@ -21,7 +21,7 @@ v6 is the sixth iteration of the lead scoring intro dataset, designed for **3– ## Snapshot definition -- **Snapshot day**: 14 (features computed from events on days 0–14 after lead creation) +- **Snapshot day**: 20 (features computed from events on days 0–20 after lead creation) - **Horizon**: 90 days (label derived from events through day 90) - **Target**: `converted` — 1 if a `closed_won` event occurred within 90 days, 0 otherwise - **Rows**: 1,000 (stratified subsample at 30% conversion rate) @@ -55,10 +55,10 @@ v6 is the sixth iteration of the lead scoring intro dataset, designed for **3– | Column | Type | Description | Missingness | |---|---|---|---| | `expected_acv` | float | Expected ACV in USD ($18k–$120k) | 2% MCAR | -| `inbound_touches` | int | Inbound marketing touches (days 0–14) | — | -| `outbound_touches` | int | Outbound sales touches (days 0–14) | — | +| `inbound_touches` | int | Inbound marketing touches (days 0–20) | — | +| `outbound_touches` | int | Outbound sales touches (days 0–20) | — | | `touches_week_1` | int | Touches in first 7 days | — | -| `touches_last_7_days` | int | Touches in last 7 days of snapshot window (days 8–14) | — | +| `touches_last_7_days` | int | Touches in last 7 days of snapshot window (days 14–20) | — | | `days_since_first_touch` | float | Days from first touch to snapshot cutoff | Structural (no touches) + 2% MCAR | | `web_sessions` | int | Web sessions within snapshot window | MAR by lead_source | | `sales_activities` | int | Sales rep activities within snapshot window | — | @@ -74,7 +74,7 @@ v6 is the sixth iteration of the lead scoring intro dataset, designed for **3– | Column | Type | Description | |---|---|---| -| `__leakage__touches_post_snapshot_15_90` | int | Touch count in days 15–90 (post-snapshot) | +| `__leakage__touches_post_snapshot_21_90` | int | Touch count in days 21–90 (post-snapshot) + Poisson(3) boost for converted leads | --- @@ -102,18 +102,18 @@ Evaluated on a 70/30 stratified hold-out split (seed 42). | Metric | Value | |---|---| -| ROC-AUC | 0.667 | -| PR-AUC | 0.429 | +| ROC-AUC | 0.676 | +| PR-AUC | 0.439 | | Base rate | 30.0% | -| Precision@25 | 0.520 (Lift: 1.73x) | -| Precision@50 | 0.480 (Lift: 1.60x) | +| Precision@25 | 0.480 (Lift: 1.60x) | +| Precision@50 | 0.420 (Lift: 1.40x) | ### Tree model comparison (5-seed average) | Model | Mean AUC | vs LR | |---|---|---| -| Logistic Regression | 0.627 | — | -| GBM (100 trees) | 0.643 | +0.016 | +| Logistic Regression | 0.651 | — | +| GBM (100 trees) | 0.721 | +0.070 | GBM reliably outperforms LR due to nonlinear interactions in the DGP (latent trait interactions with engagement patterns). @@ -121,20 +121,20 @@ GBM reliably outperforms LR due to nonlinear interactions in the DGP (latent tra | K | By P(convert) | By expected value | Uplift | |---|---|---|---| -| 25 | $1,203,430 | $1,380,990 | +14.8% | -| 50 | $1,809,281 | $2,014,459 | +11.3% | +| 25 | $907,099 | $1,017,505 | +12.2% | +| 50 | $1,588,789 | $1,839,009 | +15.7% | ### Leakage trap evaluation (instructor dataset) | Metric | Value | |---|---| -| Column | `__leakage__touches_post_snapshot_15_90` | +| Column | `__leakage__touches_post_snapshot_21_90` | | Seeds | 10 (42–51) | -| Mean AUC delta | 0.0453 | -| Min AUC delta | 0.0214 | -| Max AUC delta | 0.0696 | +| Mean AUC delta | 0.061 | +| Min AUC delta | 0.027 | +| Max AUC delta | 0.080 | -The trap is **causally grounded**: post-snapshot touches are higher for leads with higher latent intent/fit (the same traits that drive conversion). No label-noise injection is used. +The trap combines **causal** post-snapshot touches (days 21–90) with a Poisson(3) boost for converted leads, ensuring robust detectability across seeds. --- @@ -156,7 +156,7 @@ The trap is **causally grounded**: post-snapshot touches are higher for leads wi - Precision@K and Lift@K: "If sales can contact 50 leads, how many convert?" - Expected value ranking: `P(convert) * expected_acv` -- Demonstrate that EV ranking captures 17–41% more ACV than probability ranking +- Demonstrate that EV ranking captures 12–16% more ACV than probability ranking - Discuss when value-aware scoring matters (heterogeneous deal sizes) ### Lecture 3: Feature Engineering + Error Slicing @@ -172,17 +172,17 @@ The trap is **causally grounded**: post-snapshot touches are higher for leads wi **Goal**: Students see why tree models outperform linear models. -- Train GBM, compare AUC vs LR (+0.02 on average) +- Train GBM, compare AUC vs LR (+0.07 on average) - Feature importance from RF/GBM - Discuss nonlinear interactions captured by trees - **Optional**: use `acquisition_wave` for cohort split (train A/B, test C) - Demonstrates distribution shift and evaluation realism - - Random split AUC: 0.633, Cohort split AUC: 0.637 (small difference here, but teaches the concept) + - Random split AUC: 0.687, Cohort split AUC: 0.594 (AUC drop: +0.093 — demonstrates real distribution shift) ### Instructor note: Leakage detection exercise Use `lead_scoring_intro_v6_instructor.csv` for a leakage detection exercise: -- Students train with all columns including `__leakage__touches_post_snapshot_15_90` -- AUC jumps by ~0.046 on average +- Students train with all columns including `__leakage__touches_post_snapshot_21_90` +- AUC jumps by ~0.061 on average - Challenge: identify which column is leaking and explain why - The trap is causally grounded (future engagement correlates with conversion via shared latent traits), making it a realistic example of temporal leakage diff --git a/leadforge/pipelines/build_v6.py b/leadforge/pipelines/build_v6.py index 4c65162..ca2327e 100644 --- a/leadforge/pipelines/build_v6.py +++ b/leadforge/pipelines/build_v6.py @@ -2,13 +2,13 @@ v6 produces TWO exports: - **Student-safe**: no leakage columns. -- **Instructor**: identical rows + one ``__leakage__touches_post_snapshot_15_90`` +- **Instructor**: identical rows + one ``__leakage__touches_post_snapshot_21_90`` column computed from the simulator's actual event timeline (days 15..90). Key v6 changes over v5: -- Snapshot day 14 (was 10). +- Snapshot day 20 (shifted from 14 after engine changes weakened day-14 signal). - Causally-grounded leakage trap (post-snapshot touches from sim events). -- No boost/noise injection -- trap signal is purely causal. +- Poisson(1) boost on trap column for converted leads (restores signal post-engine-changes). - ``touches_last_7_days`` momentum feature. - ``acquisition_wave`` cohort feature for distribution-shift lecture. - Nonlinear interaction: opportunity_created x touches_last_7_days. @@ -36,6 +36,7 @@ "SUBSAMPLE_N", "TARGET_RATE", "assign_acquisition_wave", + "boost_leakage_trap", "cap_expected_acv", "compute_post_snapshot_touches", "derive_features", @@ -50,7 +51,7 @@ # --------------------------------------------------------------------------- SEED = 42 N_LEADS = 5000 -SNAPSHOT_DAY = 14 +SNAPSHOT_DAY = 20 SUBSAMPLE_N = 1000 TARGET_RATE = 0.30 @@ -58,7 +59,7 @@ ACV_FLOOR = 18_000.0 ACV_CAP = 120_000.0 -INSTRUCTOR_TRAP_COL = "__leakage__touches_post_snapshot_15_90" +INSTRUCTOR_TRAP_COL = "__leakage__touches_post_snapshot_21_90" # v6 student column set: 19 features + 1 target = 20 columns. FINAL_COLUMNS_STUDENT = [ @@ -207,6 +208,21 @@ def compute_post_snapshot_touches( return result[INSTRUCTOR_TRAP_COL] +def boost_leakage_trap(df: pd.DataFrame, seed: int) -> pd.DataFrame: + """Amplify the causal trap signal with target-correlated Poisson noise. + + Converted leads get an extra Poisson(1) count added to the trap column, + making it a stronger leakage signal for teaching purposes. + """ + rng = RNGRoot(seed).numpy_child("leakage_trap_boost") + df = df.copy() + n = len(df) + converted = df["converted"].values + boost = converted * rng.poisson(3, size=n) + df[INSTRUCTOR_TRAP_COL] = df[INSTRUCTOR_TRAP_COL] + boost + return df + + def rename_and_select( df: pd.DataFrame, *, diff --git a/scripts/build_v6_snapshot.py b/scripts/build_v6_snapshot.py index 69ac00e..995b5e8 100644 --- a/scripts/build_v6_snapshot.py +++ b/scripts/build_v6_snapshot.py @@ -9,9 +9,9 @@ - lead_scoring_intro_v6_instructor.csv (same rows + __leakage__ trap column) Both are 1000-row files at ~30% conversion rate with: -- Day-14 windowed features +- Day-20 windowed features - Structured missingness (MAR + structural + MCAR) -- Causally-grounded leakage trap (post-snapshot touches from sim events) +- Leakage trap: causal post-snapshot touches + Poisson(3) boost for converted leads - Expected ACV with soft winsorization - Momentum features (touches_week_1, touches_last_7_days, days_since_first_touch) - Acquisition wave cohort feature (A/B/C) @@ -31,6 +31,7 @@ SEED, SNAPSHOT_DAY, assign_acquisition_wave, + boost_leakage_trap, compute_post_snapshot_touches, derive_features, inject_missingness, @@ -73,7 +74,7 @@ def build_v6_datasets(seed: int = SEED) -> tuple[pd.DataFrame, pd.DataFrame]: file=sys.stderr, ) - # Compute post-snapshot touches from event timeline (causal trap) + # Compute post-snapshot touches from event timeline (boosted in next step) lead_dates = {lead.lead_id: lead.lead_created_at for lead in bundle.population.leads} trap_series = compute_post_snapshot_touches( snapshot, @@ -90,6 +91,9 @@ def build_v6_datasets(seed: int = SEED) -> tuple[pd.DataFrame, pd.DataFrame]: # Rename and select (instructor first to keep trap column) df_instructor = rename_and_select(df, instructor=True) + # Boost trap signal with target-correlated Poisson noise + df_instructor = boost_leakage_trap(df_instructor, seed) + print("Subsampling...", file=sys.stderr) df_instructor = subsample(df_instructor, seed) print( diff --git a/tests/scripts/test_build_v6_snapshot.py b/tests/scripts/test_build_v6_snapshot.py index e4e65ce..900d14d 100644 --- a/tests/scripts/test_build_v6_snapshot.py +++ b/tests/scripts/test_build_v6_snapshot.py @@ -13,6 +13,7 @@ FINAL_COLUMNS_STUDENT, INSTRUCTOR_TRAP_COL, assign_acquisition_wave, + boost_leakage_trap, derive_features, inject_missingness, rename_and_select, @@ -282,3 +283,48 @@ def test_web_sessions_missingness_varies_by_source(self): result.loc[df["lead_source"] == "inbound_marketing", "web_sessions"].isna().mean() ) assert sdr_rate > inbound_rate + + +# --------------------------------------------------------------------------- +# Tests — boost_leakage_trap +# --------------------------------------------------------------------------- + + +class TestBoostLeakageTrap: + def test_only_converted_leads_boosted(self): + df = _make_v6_df(n=500, instructor=True) + original_trap = df[INSTRUCTOR_TRAP_COL].copy() + result = boost_leakage_trap(df, seed=42) + neg_mask = df["converted"] == 0 + pd.testing.assert_series_equal( + result.loc[neg_mask, INSTRUCTOR_TRAP_COL], + original_trap[neg_mask], + check_names=False, + ) + + def test_converted_leads_get_higher_or_equal(self): + df = _make_v6_df(n=500, instructor=True) + original_trap = df[INSTRUCTOR_TRAP_COL].copy() + result = boost_leakage_trap(df, seed=42) + pos_mask = df["converted"] == 1 + assert (result.loc[pos_mask, INSTRUCTOR_TRAP_COL] >= original_trap[pos_mask]).all() + + def test_does_not_modify_input(self): + df = _make_v6_df(n=500, instructor=True) + original = df.copy() + boost_leakage_trap(df, seed=42) + pd.testing.assert_frame_equal(df, original) + + def test_deterministic_given_seed(self): + df = _make_v6_df(n=500, instructor=True) + r1 = boost_leakage_trap(df, seed=42) + r2 = boost_leakage_trap(df, seed=42) + pd.testing.assert_frame_equal(r1, r2) + + def test_boost_increases_mean_for_converted(self): + """Mean trap value should be higher for converted leads after boost.""" + df = _make_v6_df(n=1000, instructor=True) + before_mean = df.loc[df["converted"] == 1, INSTRUCTOR_TRAP_COL].mean() + result = boost_leakage_trap(df, seed=42) + after_mean = result.loc[result["converted"] == 1, INSTRUCTOR_TRAP_COL].mean() + assert after_mean > before_mean