diff --git a/.agent-plan.md b/.agent-plan.md index de08608..e6210b1 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -6,7 +6,7 @@ ## Current System State -**v0.5.0 in progress — Milestones 7–11 complete, v4-M1 in PR.** Full simulation engine + render/bundle + exposure filtering + CLI commands + validation harness implemented. v4 engine changes + build pipeline ready. 609 tests passing. +**v0.5.0 in progress — Milestones 7–11 complete, v4 dataset shipped.** Full simulation engine + render/bundle + exposure filtering + CLI commands + validation harness implemented. v4 engine changes + build pipeline merged (PR #21). v4 dataset generated and validated. 609 tests passing. --- @@ -24,7 +24,7 @@ See `docs/v4/design.md` for full details. - [x] `scripts/spike_category_signal.py` — spike experiment validating category signal approach - [x] Updated `CLAUDE.md`, `AGENTS.md`, `.agent-plan.md` -### v4-M1: Engine + build pipeline ✓ (in PR) +### v4-M1: Engine + build pipeline ✓ (PR #21, merged) Engine changes: - [x] Add `category_latent_correlations` to `difficulty_profiles.yaml` (intro profile, scale 1.8) @@ -37,17 +37,19 @@ Engine changes: - [x] 19 new tests (16 windowed snapshot + 3 category-latent correlations) Build pipeline: -- [x] `scripts/build_v4_snapshot.py` — day-14 snapshot + leakage trap + structured missingness + subsampling +- [x] `scripts/build_v4_snapshot.py` — day-10 snapshot + leakage trap + structured missingness + subsampling - [x] `scripts/validate_v4_dataset.py` — full validation per `docs/v4/validation_spec.md` - [x] End-to-end: generate bundle → build CSV → validate → all 7 mandatory checks pass -- [x] LR AUC 0.659 (without trap); 0.03+ boost with trap +- [x] LR AUC 0.652 (without trap); 0.034 boost with trap -### v4-M2: Documentation + release ⬜ +### v4-M2: Documentation + release ✓ (PR #22) -- [ ] Generate `lead_scoring_intro_v4.csv` (in datasets-private repo) -- [ ] Write `RELEASE_v4.md` -- [ ] Update dataset repo README -- [ ] Update `.agent-plan.md` to reflect completion +- [x] Generate `lead_scoring_intro_v4.csv` (in datasets-private repo) +- [x] Write `RELEASE_v4.md` with column dictionary, validation results, pedagogical notes +- [x] Update dataset repo README (v4 now recommended, v3 superseded) +- [x] Tune snapshot day (14 → 10) for leakage trap boost ≥ 0.03 after stacking fix +- [x] Add StandardScaler to validation script LR for convergence +- [x] Update `.agent-plan.md` to reflect completion --- diff --git a/scripts/build_v4_snapshot.py b/scripts/build_v4_snapshot.py index 7e228de..61ba1c8 100644 --- a/scripts/build_v4_snapshot.py +++ b/scripts/build_v4_snapshot.py @@ -5,7 +5,7 @@ python scripts/build_v4_snapshot.py OUTPUT_CSV Produces a 1000-row × 18-column CSV at ~30% conversion rate with: -- Day-14 windowed features +- Day-10 windowed features - Structured missingness (MAR for web_sessions, seniority) - Leakage trap (total_touches_all using full 90-day data) - Stratified subsampling @@ -27,7 +27,7 @@ # --------------------------------------------------------------------------- SEED = 42 N_LEADS = 5000 -SNAPSHOT_DAY = 14 +SNAPSHOT_DAY = 10 SUBSAMPLE_N = 1000 TARGET_RATE = 0.30 @@ -72,7 +72,7 @@ def generate_bundle(seed: int = SEED, n_leads: int = N_LEADS) -> pd.DataFrame: - """Generate a full bundle and return the day-14 snapshot.""" + """Generate a full bundle and return the day-10 snapshot.""" gen = Generator.from_recipe( "b2b_saas_procurement_v1", seed=seed, diff --git a/scripts/validate_v4_dataset.py b/scripts/validate_v4_dataset.py index 09c60a4..c57cd95 100644 --- a/scripts/validate_v4_dataset.py +++ b/scripts/validate_v4_dataset.py @@ -16,7 +16,7 @@ import pandas as pd from sklearn.linear_model import LogisticRegression from sklearn.metrics import roc_auc_score -from sklearn.preprocessing import LabelEncoder +from sklearn.preprocessing import LabelEncoder, StandardScaler # --------------------------------------------------------------------------- # Constants @@ -120,9 +120,12 @@ def _fit_lr(df: pd.DataFrame, exclude_cols: list[str] | None = None) -> float: x_df = x_df.select_dtypes(include=[np.number]) x_df = x_df.fillna(x_df.median()) + scaler = StandardScaler() + x_scaled = scaler.fit_transform(x_df) + lr = LogisticRegression(max_iter=2000, random_state=42) - lr.fit(x_df, y) - probs = lr.predict_proba(x_df)[:, 1] + lr.fit(x_scaled, y) + probs = lr.predict_proba(x_scaled)[:, 1] return float(roc_auc_score(y, probs))