diff --git a/.agent-plan.md b/.agent-plan.md index 14c3d14..87b81c2 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -42,11 +42,13 @@ early against the known-good lead-scoring path + physical reorg into Status: `LTV-M0` landed (#102, #103, #106). `LTV-M1`: `LTV-Pb` merged (#104); `LTV-Pc` (pLTV feature/task specs) still outstanding. `LTV-M2`: `LTV-Pd` (#107) and `LTV-Pe` (#108) merged (scheme protocol + render seam). `LTV-Pf` (physical -move, **hard break / no shims** per D12) split into Pf.1 (compute core — -simulation/mechanisms/structure moved) opened as **#109**, and Pf.2 (render -move, pending). Verified byte-identical. Sibling `leadforge-datasets-private` -build scripts must update to the new import paths (breakage issue filed). Next: -`LTV-Pf.2` (render), then `LTV-Pg` (scaffold `schemes/lifecycle/`). +move, **hard break / no shims** per D12): Pf.1 (compute core — +simulation/mechanisms/structure) merged (#109); Pf.2 (lead-scoring render — +snapshots/relational_snapshot_safe/tasks moved + relational.py split so the +shared write_relational_tables stays in the envelope) opened as **#110**. Both +byte-identical. Sibling `leadforge-datasets-private` consumes bundle files, not +internals — no lockstep update needed (heads-up issue #8 filed). Next: +`LTV-Pg` (scaffold `schemes/lifecycle/` + relocate the lead-scoring schema specs). --- diff --git a/CHANGELOG.md b/CHANGELOG.md index c085dd6..a8b6ce1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,10 +21,20 @@ back-compat shims, by design): | `leadforge.simulation.*` | `leadforge.schemes.lead_scoring.simulation.*` | | `leadforge.mechanisms.*` | `leadforge.schemes.lead_scoring.mechanisms.*` | | `leadforge.structure.*` | `leadforge.schemes.lead_scoring.structure.*` | - -`render/{snapshots,relational,tasks}` and the lead-scoring `schema` specs -relocate in follow-up PRs. Consumers importing internals (e.g. the -`leadforge-datasets-private` build scripts) must update to the new paths; +| `leadforge.render.snapshots` | `leadforge.schemes.lead_scoring.render.snapshots` | +| `leadforge.render.relational_snapshot_safe` | `leadforge.schemes.lead_scoring.render.relational_snapshot_safe` | +| `leadforge.render.tasks` | `leadforge.schemes.lead_scoring.render.tasks` | +| `leadforge.render.relational:to_dataframes` | `leadforge.schemes.lead_scoring.render.relational:to_dataframes` | +| `leadforge.render.relational:write_relational_tables` | `leadforge.render.relational_io:write_relational_tables` | + +The flat `leadforge.render.relational` module is **removed**: its 9-table +assembler (`to_dataframes`) moved to the scheme, and the scheme-agnostic writer +(`write_relational_tables`) moved to the new `leadforge.render.relational_io` +(renamed to avoid a basename clash with the scheme's `relational.py`). +`leadforge.render` remains the shared bundle-output envelope +(`relational_io` + `manifests`). The lead-scoring `schema` +specs relocate in a follow-up PR (LTV-Pg). Consumers importing internals (e.g. +the `leadforge-datasets-private` build scripts) must update to the new paths; the package stays on the `1.x` line (the public contract did not change). ### CLI surfaces v4 fields diff --git a/CLAUDE.md b/CLAUDE.md index 9317851..973d116 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -159,11 +159,11 @@ leadforge/ schema/ entities.py, relationships.py, events.py, features.py, tasks.py, dictionaries.py schemes/ base.py (GenerationScheme protocol + SCHEME_REGISTRY); lead_scoring/ — the lead-scoring scheme: __init__.py (build_world/ - write_bundle) + simulation/, mechanisms/, structure/ (moved in - LTV-Pf.1). render/ + lead-scoring schema specs migrate here in - LTV-Pf.2 / LTV-Pg. See docs/ltv/design.md §2.5. - render/ relational.py (+ write_relational_tables), snapshots.py, manifests.py, tasks.py - # lead-scoring render still here pending LTV-Pf.2 + write_bundle) + simulation/, mechanisms/, structure/, render/ + (moved in LTV-Pf.1/Pf.2). Lead-scoring schema specs migrate + here in LTV-Pg. See docs/ltv/design.md §2.5. + render/ relational_io.py (write_relational_tables — shared writer), manifests.py + # shared bundle-output envelope exposure/ modes.py, filters.py, redaction.py validation/ invariants.py, artifact_checks.py, realism.py, difficulty.py, drift.py recipes/ registry.py, b2b_saas_procurement_v1/{recipe,narrative,schema,motifs,difficulty_profiles}.yaml @@ -248,14 +248,13 @@ leadforge/ # Python package root │ ├── __init__.py # build_world() + write_bundle() │ ├── structure/ # Hidden world graph (WorldGraph, motifs, sampler) │ ├── mechanisms/ # Node/edge behavior (policies, hazards, scores, …) -│ └── simulation/ # World evolution (engine, population, state) -│ # NOTE (LTV-M2 reorg in progress): render/{snapshots,relational,tasks} -│ # relocate under schemes/lead_scoring/ in a follow-up; schema specs split -│ # in LTV-Pg. See docs/ltv/design.md §2.5 for the target layout. -├── render/ # Bundle output (envelope + not-yet-moved lead-scoring render) -│ ├── snapshots.py # build_snapshot() — ML-ready lead table -│ ├── relational.py # to_dataframes() — 9-table dict -│ ├── tasks.py # write_task_splits() — train/valid/test Parquet +│ ├── simulation/ # World evolution (engine, population, state) +│ └── render/ # Lead-scoring render: snapshots, relational +│ # (to_dataframes), relational_snapshot_safe, tasks +│ # NOTE (LTV-M2 reorg in progress): lead-scoring schema specs split in LTV-Pg. +│ # See docs/ltv/design.md §2.5 for the target layout. +├── render/ # Shared bundle-output envelope +│ ├── relational_io.py # write_relational_tables() — shared table writer │ └── manifests.py # build_manifest(), write_manifest() ├── exposure/ # Truth filtering │ ├── modes.py # apply_exposure() dispatch diff --git a/docs/ltv/roadmap.md b/docs/ltv/roadmap.md index 2133602..d6e0193 100644 --- a/docs/ltv/roadmap.md +++ b/docs/ltv/roadmap.md @@ -42,7 +42,7 @@ protocol + registry, with the package physically reorganized into |-----------|------------|-----|------------| | `LTV-M0` | Planning + design lock | `LTV-Pa` | #102, #103 (+ scheme reframe) | | `LTV-M1` | Lifecycle schema foundation | `LTV-Pb`, `LTV-Pc` | #104 (Pb) | -| `LTV-M2` | Generation-scheme architecture + physical reorg | `LTV-Pd`, `LTV-Pe`, `LTV-Pf`, `LTV-Pg` | #107 (Pd), #108 (Pe), #109 (Pf.1) | +| `LTV-M2` | Generation-scheme architecture + physical reorg | `LTV-Pd`, `LTV-Pe`, `LTV-Pf`, `LTV-Pg` | #107 (Pd), #108 (Pe), #109 (Pf.1), #110 (Pf.2) | | `LTV-M3` | Customer population + lifecycle world | `LTV-Ph`, `LTV-Pi` | | | `LTV-M4` | Lifecycle simulation engine | `LTV-Pj`, `LTV-Pk` | | | `LTV-M5` | Customer snapshots + pLTV targets (both regimes) | `LTV-Pl`, `LTV-Pm` | | @@ -125,11 +125,11 @@ Total: ~19 PRs across 9 milestones. - [x] **`LTV-Pf.1`** — compute core: `simulation/` + `mechanisms/` + `structure/` moved as whole directories (21 file renames, all callers rewritten). Verified byte-identical; full suite green. (**PR #109**) - - [ ] **`LTV-Pf.2`** — render: relocate `render/{snapshots,relational,tasks}` - under the scheme, splitting `render/relational.py` so the shared - `write_relational_tables` stays in the envelope while the 9-table - `to_dataframes` moves. (The lead-scoring `schema` specs split lands with - `LTV-Pg`.) + - [x] **`LTV-Pf.2`** — render: relocated `render/{snapshots,relational_snapshot_safe,tasks}` + under `schemes/lead_scoring/render/`, and split `render/relational.py` so the + shared `write_relational_tables` stays in the envelope while the 9-table + `to_dataframes` moved. Verified byte-identical; full suite green. (**PR #110**) + (The lead-scoring `schema` specs split lands with `LTV-Pg`.) - Tests: full suite + hash-determinism green; public API imports unchanged. - Labels: `type: refactor`, `layer: schema`, `layer: simulation`, `layer: render` - [ ] **`LTV-Pg`** — `refactor: scaffold schemes/lifecycle/ + relocate LTV-Pb/Pc specs`. diff --git a/leadforge/exposure/filters.py b/leadforge/exposure/filters.py index 1376674..f78eda9 100644 --- a/leadforge/exposure/filters.py +++ b/leadforge/exposure/filters.py @@ -31,7 +31,7 @@ class BundleFilter: relational_snapshot_safe: Whether the relational ``tables/`` dict must be projected onto the snapshot-safe shape before being written. When ``True``, the bundle writer routes through - :func:`leadforge.render.relational_snapshot_safe.to_dataframes_snapshot_safe`, + :func:`leadforge.schemes.lead_scoring.render.relational_snapshot_safe.to_dataframes_snapshot_safe`, which strips :data:`leadforge.validation.leakage_probes.BANNED_LEAD_COLUMNS` from ``leads``, :data:`~leadforge.validation.leakage_probes.BANNED_OPP_COLUMNS` from ``opportunities``, filters event tables per-lead by diff --git a/leadforge/render/manifests.py b/leadforge/render/manifests.py index 418780c..382a05a 100644 --- a/leadforge/render/manifests.py +++ b/leadforge/render/manifests.py @@ -43,7 +43,7 @@ # self-describing (``null`` means full-horizon, legacy behaviour). # "5" — PR 2.2: ``student_public`` bundles route through the # snapshot-safe relational export ( -# :mod:`leadforge.render.relational_snapshot_safe`). Public +# :mod:`leadforge.schemes.lead_scoring.render.relational_snapshot_safe`). Public # ``leads`` drops ``converted_within_90_days`` / # ``conversion_timestamp``; public ``opportunities`` drops # ``close_outcome`` / ``closed_at``; public bundles omit @@ -91,7 +91,7 @@ def build_manifest( package internals. Defaults to ``[]`` (nothing redacted). relational_snapshot_safe: ``True`` if the relational ``tables/`` were projected through - :func:`leadforge.render.relational_snapshot_safe.to_dataframes_snapshot_safe` + :func:`leadforge.schemes.lead_scoring.render.relational_snapshot_safe.to_dataframes_snapshot_safe` before being written. Recorded in the manifest so a tool reading a v5+ bundle can tell from the manifest alone whether ``tables/`` is the snapshot-safe (public) shape or the diff --git a/leadforge/render/relational_io.py b/leadforge/render/relational_io.py new file mode 100644 index 0000000..58c46a5 --- /dev/null +++ b/leadforge/render/relational_io.py @@ -0,0 +1,56 @@ +"""Shared relational-table writer (bundle-output envelope). + +:func:`write_relational_tables` is the scheme-agnostic step that serialises a +``{table_name: DataFrame}`` dict to a bundle's ``tables/`` directory. Each +generation scheme decides the relational *shape* (which tables, any +snapshot-safe projection) and then calls this to write them. The lead-scoring +table *assembler* (``to_dataframes``) lives with its scheme in +:mod:`leadforge.schemes.lead_scoring.render.relational`. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pandas as pd + +if TYPE_CHECKING: + from collections.abc import Collection + from pathlib import Path + + +def write_relational_tables( + dfs: dict[str, pd.DataFrame], + tables_dir: Path, + *, + redacted: Collection[str] = frozenset(), +) -> dict[str, int]: + """Write a ``{table_name: DataFrame}`` dict to *tables_dir* as Parquet. + + A shared, scheme-agnostic envelope step used by each scheme's + ``write_bundle``: it drops any *redacted* columns present in a table, + writes one ``.parquet`` per entry, and returns ``{table_name: + row_count}``. The relational *shape* (which tables, snapshot-safe + projection) is the scheme's concern and is decided before calling this. + + Args: + dfs: Mapping of table name → DataFrame, already projected to the + published shape (e.g. snapshot-safe for ``student_public``). + tables_dir: Destination directory (created if absent). + redacted: Column names to strip from any table that contains them. + + Returns: + Row count per written table, in *dfs* iteration order. + """ + from leadforge.schema.tables import write_parquet + + tables_dir.mkdir(parents=True, exist_ok=True) + row_counts: dict[str, int] = {} + for table_name, df in dfs.items(): + if redacted: + cols_to_drop = [c for c in redacted if c in df.columns] + if cols_to_drop: + df = df.drop(columns=cols_to_drop) + write_parquet(df, tables_dir / f"{table_name}.parquet") + row_counts[table_name] = len(df) + return row_counts diff --git a/leadforge/schemes/lead_scoring/__init__.py b/leadforge/schemes/lead_scoring/__init__.py index 55e1dea..e8a7dec 100644 --- a/leadforge/schemes/lead_scoring/__init__.py +++ b/leadforge/schemes/lead_scoring/__init__.py @@ -162,13 +162,16 @@ def write_bundle( from leadforge.exposure.modes import apply_exposure from leadforge.narrative.dataset_card import render_dataset_card from leadforge.render.manifests import build_manifest, write_manifest - from leadforge.render.relational import to_dataframes, write_relational_tables - from leadforge.render.relational_snapshot_safe import to_dataframes_snapshot_safe - from leadforge.render.snapshots import build_snapshot - from leadforge.render.tasks import write_task_splits + from leadforge.render.relational_io import write_relational_tables from leadforge.schema.dictionaries import write_feature_dictionary from leadforge.schema.features import LEAD_SNAPSHOT_FEATURES, redacted_columns_for from leadforge.schema.tasks import task_manifest_for_config + from leadforge.schemes.lead_scoring.render.relational import to_dataframes + from leadforge.schemes.lead_scoring.render.relational_snapshot_safe import ( + to_dataframes_snapshot_safe, + ) + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.tasks import write_task_splits if ( bundle.simulation_result is None diff --git a/leadforge/schemes/lead_scoring/render/__init__.py b/leadforge/schemes/lead_scoring/render/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/leadforge/render/relational.py b/leadforge/schemes/lead_scoring/render/relational.py similarity index 67% rename from leadforge/render/relational.py rename to leadforge/schemes/lead_scoring/render/relational.py index efca9fd..62387e2 100644 --- a/leadforge/render/relational.py +++ b/leadforge/schemes/lead_scoring/render/relational.py @@ -27,9 +27,6 @@ ) if TYPE_CHECKING: - from collections.abc import Collection - from pathlib import Path - from leadforge.schemes.lead_scoring.simulation.engine import SimulationResult from leadforge.schemes.lead_scoring.simulation.population import PopulationResult @@ -85,40 +82,3 @@ def to_dataframes( df = src.cls.empty_dataframe() dfs[table_name] = df return dfs - - -def write_relational_tables( - dfs: dict[str, pd.DataFrame], - tables_dir: Path, - *, - redacted: Collection[str] = frozenset(), -) -> dict[str, int]: - """Write a ``{table_name: DataFrame}`` dict to *tables_dir* as Parquet. - - A shared, scheme-agnostic envelope step used by each scheme's - ``write_bundle``: it drops any *redacted* columns present in a table, - writes one ``.parquet`` per entry, and returns ``{table_name: - row_count}``. The relational *shape* (which tables, snapshot-safe - projection) is the scheme's concern and is decided before calling this. - - Args: - dfs: Mapping of table name → DataFrame, already projected to the - published shape (e.g. snapshot-safe for ``student_public``). - tables_dir: Destination directory (created if absent). - redacted: Column names to strip from any table that contains them. - - Returns: - Row count per written table, in *dfs* iteration order. - """ - from leadforge.schema.tables import write_parquet - - tables_dir.mkdir(parents=True, exist_ok=True) - row_counts: dict[str, int] = {} - for table_name, df in dfs.items(): - if redacted: - cols_to_drop = [c for c in redacted if c in df.columns] - if cols_to_drop: - df = df.drop(columns=cols_to_drop) - write_parquet(df, tables_dir / f"{table_name}.parquet") - row_counts[table_name] = len(df) - return row_counts diff --git a/leadforge/render/relational_snapshot_safe.py b/leadforge/schemes/lead_scoring/render/relational_snapshot_safe.py similarity index 100% rename from leadforge/render/relational_snapshot_safe.py rename to leadforge/schemes/lead_scoring/render/relational_snapshot_safe.py diff --git a/leadforge/render/snapshots.py b/leadforge/schemes/lead_scoring/render/snapshots.py similarity index 100% rename from leadforge/render/snapshots.py rename to leadforge/schemes/lead_scoring/render/snapshots.py diff --git a/leadforge/render/tasks.py b/leadforge/schemes/lead_scoring/render/tasks.py similarity index 96% rename from leadforge/render/tasks.py rename to leadforge/schemes/lead_scoring/render/tasks.py index 7d6013c..d8e23a6 100644 --- a/leadforge/render/tasks.py +++ b/leadforge/schemes/lead_scoring/render/tasks.py @@ -35,7 +35,7 @@ def write_task_splits( Args: snapshot: Lead snapshot DataFrame from - :func:`~leadforge.render.snapshots.build_snapshot`. + :func:`~leadforge.schemes.lead_scoring.render.snapshots.build_snapshot`. out_dir: Parent directory for task outputs (typically ``bundle_root / "tasks"``). seed: Seed used for deterministic row shuffle. diff --git a/leadforge/validation/leakage_probes.py b/leadforge/validation/leakage_probes.py index 6db6551..eaed773 100644 --- a/leadforge/validation/leakage_probes.py +++ b/leadforge/validation/leakage_probes.py @@ -64,7 +64,7 @@ # --------------------------------------------------------------------------- # Snapshot-safe contract — single source of truth for "what is leakage". -# ``leadforge.render.relational_snapshot_safe`` (writer) and +# ``leadforge.schemes.lead_scoring.render.relational_snapshot_safe`` (writer) and # ``leadforge.render.manifests`` (manifest's structural_redactions) import # from here so the writer and the validator share one definition. # --------------------------------------------------------------------------- diff --git a/scripts/build_midproject_lead_scoring.py b/scripts/build_midproject_lead_scoring.py index 3363b37..b449ae7 100644 --- a/scripts/build_midproject_lead_scoring.py +++ b/scripts/build_midproject_lead_scoring.py @@ -31,7 +31,7 @@ softcap_expected_acv, subsample, ) -from leadforge.render.snapshots import build_snapshot +from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot # --------------------------------------------------------------------------- # Orchestration diff --git a/scripts/build_v4_snapshot.py b/scripts/build_v4_snapshot.py index 61ba1c8..eb278d8 100644 --- a/scripts/build_v4_snapshot.py +++ b/scripts/build_v4_snapshot.py @@ -20,7 +20,7 @@ import pandas as pd from leadforge.api.generator import Generator -from leadforge.render.snapshots import build_snapshot +from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot # --------------------------------------------------------------------------- # Constants diff --git a/scripts/build_v5_snapshot.py b/scripts/build_v5_snapshot.py index 80221e2..6fb7008 100644 --- a/scripts/build_v5_snapshot.py +++ b/scripts/build_v5_snapshot.py @@ -32,7 +32,7 @@ rename_and_select, subsample, ) -from leadforge.render.snapshots import build_snapshot +from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot # --------------------------------------------------------------------------- # Orchestration (stays in script — depends on Generator) diff --git a/scripts/build_v6_snapshot.py b/scripts/build_v6_snapshot.py index 995b5e8..09c4ec8 100644 --- a/scripts/build_v6_snapshot.py +++ b/scripts/build_v6_snapshot.py @@ -39,7 +39,7 @@ softcap_expected_acv, subsample, ) -from leadforge.render.snapshots import build_snapshot +from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot # --------------------------------------------------------------------------- # Orchestration diff --git a/scripts/build_v7_snapshot.py b/scripts/build_v7_snapshot.py index e5e2713..da84af9 100644 --- a/scripts/build_v7_snapshot.py +++ b/scripts/build_v7_snapshot.py @@ -38,7 +38,7 @@ softcap_expected_acv, subsample, ) -from leadforge.render.snapshots import build_snapshot +from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot # --------------------------------------------------------------------------- # Orchestration diff --git a/scripts/spike_category_signal.py b/scripts/spike_category_signal.py index 62f079c..ed3dabd 100644 --- a/scripts/spike_category_signal.py +++ b/scripts/spike_category_signal.py @@ -26,7 +26,7 @@ from leadforge.api.generator import Generator from leadforge.core.rng import RNGRoot -from leadforge.render.snapshots import build_snapshot +from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot from leadforge.schemes.lead_scoring.simulation.engine import simulate_world from leadforge.schemes.lead_scoring.simulation.population import PopulationResult, build_population from leadforge.schemes.lead_scoring.structure.sampler import sample_hidden_graph diff --git a/tests/integration/test_snapshot_safe_bundle.py b/tests/integration/test_snapshot_safe_bundle.py index 926572c..6099590 100644 --- a/tests/integration/test_snapshot_safe_bundle.py +++ b/tests/integration/test_snapshot_safe_bundle.py @@ -2,7 +2,7 @@ Covers the contract turned on in PR 2.2: ``student_public`` bundles route ``tables/`` through -:func:`leadforge.render.relational_snapshot_safe.to_dataframes_snapshot_safe` +:func:`leadforge.schemes.lead_scoring.render.relational_snapshot_safe.to_dataframes_snapshot_safe` (the structural fix against the alpha-bundle reconstruction paths A-E), ``research_instructor`` bundles keep the full-horizon export, and the manifest is self-describing via ``relational_snapshot_safe``, diff --git a/tests/render/test_relational_snapshot_safe.py b/tests/render/test_relational_snapshot_safe.py index 09dcdfd..a4e3821 100644 --- a/tests/render/test_relational_snapshot_safe.py +++ b/tests/render/test_relational_snapshot_safe.py @@ -5,7 +5,7 @@ import pandas as pd import pytest -from leadforge.render.relational_snapshot_safe import ( +from leadforge.schemes.lead_scoring.render.relational_snapshot_safe import ( BANNED_LEAD_COLUMNS, BANNED_OPP_COLUMNS, BANNED_TABLES, diff --git a/tests/render/test_render.py b/tests/render/test_render.py index b82ae48..0bb7bc9 100644 --- a/tests/render/test_render.py +++ b/tests/render/test_render.py @@ -58,7 +58,7 @@ def sim_outputs(): class TestToDataframes: def test_returns_all_table_names(self, sim_outputs): _, population, result, _ = sim_outputs - from leadforge.render.relational import to_dataframes + from leadforge.schemes.lead_scoring.render.relational import to_dataframes dfs = to_dataframes(result, population) expected = { @@ -76,14 +76,14 @@ def test_returns_all_table_names(self, sim_outputs): def test_lead_count_matches(self, sim_outputs): config, population, result, _ = sim_outputs - from leadforge.render.relational import to_dataframes + from leadforge.schemes.lead_scoring.render.relational import to_dataframes dfs = to_dataframes(result, population) assert len(dfs["leads"]) == config.n_leads def test_account_and_contact_counts(self, sim_outputs): config, population, result, _ = sim_outputs - from leadforge.render.relational import to_dataframes + from leadforge.schemes.lead_scoring.render.relational import to_dataframes dfs = to_dataframes(result, population) assert len(dfs["accounts"]) == config.n_accounts @@ -91,7 +91,7 @@ def test_account_and_contact_counts(self, sim_outputs): def test_dataframes_are_dataframes(self, sim_outputs): _, population, result, _ = sim_outputs - from leadforge.render.relational import to_dataframes + from leadforge.schemes.lead_scoring.render.relational import to_dataframes dfs = to_dataframes(result, population) for name, df in dfs.items(): @@ -100,8 +100,8 @@ def test_dataframes_are_dataframes(self, sim_outputs): def test_empty_tables_have_schema(self, sim_outputs): """Tables with zero rows must still expose the correct column names.""" _, population, result, _ = sim_outputs - from leadforge.render.relational import to_dataframes from leadforge.schema.entities import CustomerRow + from leadforge.schemes.lead_scoring.render.relational import to_dataframes dfs = to_dataframes(result, population) assert set(CustomerRow.DTYPE_MAP.keys()).issubset(set(dfs["customers"].columns)) @@ -109,8 +109,8 @@ def test_empty_tables_have_schema(self, sim_outputs): def test_fk_integrity(self, sim_outputs): """All FK constraints must hold on the produced DataFrames.""" _, population, result, _ = sim_outputs - from leadforge.render.relational import to_dataframes from leadforge.schema.relationships import ALL_CONSTRAINTS, validate_fk + from leadforge.schemes.lead_scoring.render.relational import to_dataframes dfs = to_dataframes(result, population) for constraint in ALL_CONSTRAINTS: @@ -126,7 +126,7 @@ def test_fk_integrity(self, sim_outputs): def test_deterministic_under_same_seed(self): """Same seed → identical relational DataFrames.""" - from leadforge.render.relational import to_dataframes + from leadforge.schemes.lead_scoring.render.relational import to_dataframes def _run(seed): cfg = _make_config(seed=seed) @@ -150,14 +150,14 @@ def _run(seed): class TestBuildSnapshot: def test_row_count_equals_lead_count(self, sim_outputs): config, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot snap = build_snapshot(result, population, horizon_days=config.horizon_days) assert len(snap) == config.n_leads def test_all_snapshot_columns_present(self, sim_outputs): _, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot snap = build_snapshot(result, population) for col in _SNAPSHOT_COLUMNS: @@ -165,21 +165,21 @@ def test_all_snapshot_columns_present(self, sim_outputs): def test_no_extra_columns(self, sim_outputs): _, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot snap = build_snapshot(result, population) assert set(snap.columns) == set(_SNAPSHOT_COLUMNS) def test_target_column_is_boolean(self, sim_outputs): _, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot snap = build_snapshot(result, population) assert snap["converted_within_90_days"].dtype.name == "boolean" def test_touch_counts_non_negative(self, sim_outputs): _, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot snap = build_snapshot(result, population) assert (snap["touch_count"].dropna() >= 0).all() @@ -189,7 +189,7 @@ def test_touch_counts_non_negative(self, sim_outputs): def test_inbound_plus_outbound_equals_total(self, sim_outputs): """inbound + outbound must equal touch_count exactly (only two directions in v1).""" _, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot snap = build_snapshot(result, population) valid = snap[["touch_count", "inbound_touch_count", "outbound_touch_count"]].dropna() @@ -198,7 +198,7 @@ def test_inbound_plus_outbound_equals_total(self, sim_outputs): def test_days_since_last_touch_finite_when_touches_exist(self, sim_outputs): _, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot snap = build_snapshot(result, population) has_touch = snap["touch_count"] > 0 @@ -208,7 +208,7 @@ def test_days_since_last_touch_finite_when_touches_exist(self, sim_outputs): def test_no_post_anchor_columns_in_snapshot(self, sim_outputs): """Columns that represent post-anchor truth must not appear in the snapshot.""" _, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot snap = build_snapshot(result, population) # These exist in LeadRow / OpportunityRow but must be excluded (leakage rule). @@ -218,7 +218,7 @@ def test_no_post_anchor_columns_in_snapshot(self, sim_outputs): def test_target_matches_simulation_result(self, sim_outputs): """converted_within_90_days in snapshot must match SimulationResult's flag.""" _, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot snap = build_snapshot(result, population) lead_flags = {row.lead_id: row.converted_within_90_days for row in result.leads} @@ -228,7 +228,7 @@ def test_target_matches_simulation_result(self, sim_outputs): def test_deterministic_under_same_seed(self): """Same seed → identical snapshots.""" - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot def _snap(seed): cfg = _make_config(seed=seed) @@ -251,8 +251,8 @@ def _snap(seed): class TestWriteTaskSplits: def test_three_files_written(self, sim_outputs, tmp_path): config, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot - from leadforge.render.tasks import write_task_splits + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.tasks import write_task_splits snap = build_snapshot(result, population, horizon_days=config.horizon_days) write_task_splits(snap, tmp_path, seed=config.seed) @@ -263,8 +263,8 @@ def test_three_files_written(self, sim_outputs, tmp_path): def test_task_manifest_written(self, sim_outputs, tmp_path): config, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot - from leadforge.render.tasks import write_task_splits + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.tasks import write_task_splits snap = build_snapshot(result, population, horizon_days=config.horizon_days) write_task_splits(snap, tmp_path, seed=config.seed) @@ -276,8 +276,8 @@ def test_task_manifest_written(self, sim_outputs, tmp_path): def test_row_counts_sum_to_total(self, sim_outputs, tmp_path): config, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot - from leadforge.render.tasks import write_task_splits + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.tasks import write_task_splits snap = build_snapshot(result, population, horizon_days=config.horizon_days) counts = write_task_splits(snap, tmp_path, seed=config.seed) @@ -287,8 +287,8 @@ def test_row_counts_sum_to_total(self, sim_outputs, tmp_path): def test_split_ratios_approx(self, sim_outputs, tmp_path): """Train ≈ 70%, valid ≈ 15%, test ≈ 15% (±5% tolerance for small samples).""" config, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot - from leadforge.render.tasks import write_task_splits + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.tasks import write_task_splits snap = build_snapshot(result, population, horizon_days=config.horizon_days) counts = write_task_splits(snap, tmp_path, seed=config.seed) @@ -299,8 +299,8 @@ def test_split_ratios_approx(self, sim_outputs, tmp_path): def test_splits_are_disjoint(self, sim_outputs, tmp_path): config, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot - from leadforge.render.tasks import write_task_splits + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.tasks import write_task_splits snap = build_snapshot(result, population, horizon_days=config.horizon_days) write_task_splits(snap, tmp_path, seed=config.seed) @@ -314,8 +314,8 @@ def test_splits_are_disjoint(self, sim_outputs, tmp_path): def test_deterministic_under_same_seed(self, sim_outputs, tmp_path): config, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot - from leadforge.render.tasks import write_task_splits + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.tasks import write_task_splits snap = build_snapshot(result, population, horizon_days=config.horizon_days) @@ -341,10 +341,10 @@ class TestBuildManifest: def _make_manifest(self, sim_outputs, tmp_path): config, population, result, world_graph = sim_outputs from leadforge.render.manifests import build_manifest - from leadforge.render.relational import to_dataframes - from leadforge.render.snapshots import build_snapshot - from leadforge.render.tasks import write_task_splits from leadforge.schema.tables import write_parquet + from leadforge.schemes.lead_scoring.render.relational import to_dataframes + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.tasks import write_task_splits tables_dir = tmp_path / "tables" tables_dir.mkdir() diff --git a/tests/render/test_snapshot_windowed.py b/tests/render/test_snapshot_windowed.py index 8d36d33..94efc6e 100644 --- a/tests/render/test_snapshot_windowed.py +++ b/tests/render/test_snapshot_windowed.py @@ -11,7 +11,7 @@ from leadforge.core.models import GenerationConfig from leadforge.core.rng import RNGRoot -from leadforge.render.snapshots import build_snapshot +from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot from leadforge.schemes.lead_scoring.simulation.engine import simulate_world from leadforge.schemes.lead_scoring.simulation.population import build_population from leadforge.schemes.lead_scoring.structure.sampler import sample_hidden_graph diff --git a/tests/render/test_write_relational_tables.py b/tests/render/test_write_relational_tables.py index 3e48a93..d67f750 100644 --- a/tests/render/test_write_relational_tables.py +++ b/tests/render/test_write_relational_tables.py @@ -4,7 +4,7 @@ import pandas as pd -from leadforge.render.relational import write_relational_tables +from leadforge.render.relational_io import write_relational_tables from leadforge.schema.tables import read_parquet diff --git a/tests/schemes/test_module_layout.py b/tests/schemes/test_module_layout.py index c6e75ab..3ba22d4 100644 --- a/tests/schemes/test_module_layout.py +++ b/tests/schemes/test_module_layout.py @@ -10,14 +10,23 @@ import pytest -# (old flat path, new scheme-owned path) for the modules moved in LTV-Pf.1. +# (old flat path, new scheme-owned path) for modules moved in LTV-Pf.1 (compute +# core) and LTV-Pf.2 (lead-scoring render). _MOVED = [ + # LTV-Pf.1 — compute core ("leadforge.simulation.engine", "leadforge.schemes.lead_scoring.simulation.engine"), ("leadforge.simulation.population", "leadforge.schemes.lead_scoring.simulation.population"), ("leadforge.simulation.state", "leadforge.schemes.lead_scoring.simulation.state"), ("leadforge.mechanisms.policies", "leadforge.schemes.lead_scoring.mechanisms.policies"), ("leadforge.structure.sampler", "leadforge.schemes.lead_scoring.structure.sampler"), ("leadforge.structure.graph", "leadforge.schemes.lead_scoring.structure.graph"), + # LTV-Pf.2 — lead-scoring render + ("leadforge.render.snapshots", "leadforge.schemes.lead_scoring.render.snapshots"), + ( + "leadforge.render.relational_snapshot_safe", + "leadforge.schemes.lead_scoring.render.relational_snapshot_safe", + ), + ("leadforge.render.tasks", "leadforge.schemes.lead_scoring.render.tasks"), ] @@ -39,6 +48,28 @@ def test_old_top_level_package_is_gone(pkg: str) -> None: importlib.import_module(f"leadforge.{pkg}") +def test_render_envelope_package_stays() -> None: + # LTV-Pf.2 moved the lead-scoring render modules, but `leadforge.render` + # remains the shared envelope: manifests + the relational-table writer + # (renamed to relational_io to avoid a basename clash with the scheme's + # relational.py assembler). + import leadforge.render.manifests # noqa: F401 + import leadforge.render.relational_io as shared_writer + + assert hasattr(shared_writer, "write_relational_tables") + + +def test_relational_split_to_dataframes_moved_to_scheme() -> None: + # The 9-table assembler moved to the scheme; the shared writer did not. + import leadforge.render.relational_io as shared_writer + from leadforge.schemes.lead_scoring.render.relational import to_dataframes # noqa: F401 + + assert not hasattr(shared_writer, "to_dataframes") + # The ambiguous flat `leadforge.render.relational` module is gone. + with pytest.raises(ModuleNotFoundError): + importlib.import_module("leadforge.render.relational") + + def test_public_api_unchanged_by_the_move() -> None: # The documented public surface must keep importing from its stable home. from leadforge.api import Generator, list_recipes # noqa: F401 diff --git a/tests/test_difficulty_modulation.py b/tests/test_difficulty_modulation.py index 006bc1c..95478a1 100644 --- a/tests/test_difficulty_modulation.py +++ b/tests/test_difficulty_modulation.py @@ -172,7 +172,7 @@ def test_distortions_change_values(self) -> None: import pandas as pd from leadforge.core.models import DifficultyParams - from leadforge.render.snapshots import ( + from leadforge.schemes.lead_scoring.render.snapshots import ( _FLOAT_DISTORTION_COLS, _apply_difficulty_distortions, ) diff --git a/tests/validation/test_leakage_probes.py b/tests/validation/test_leakage_probes.py index 216da69..6905ab5 100644 --- a/tests/validation/test_leakage_probes.py +++ b/tests/validation/test_leakage_probes.py @@ -15,7 +15,7 @@ For the structural probes each is exercised against two configurations: * a *clean* bundle, produced by running the same source frames through - :func:`leadforge.render.relational_snapshot_safe.to_dataframes_snapshot_safe`, + :func:`leadforge.schemes.lead_scoring.render.relational_snapshot_safe.to_dataframes_snapshot_safe`, on which every probe must produce zero findings; * a *tampered* bundle, in which one leakage channel at a time is re-introduced, on which the matching probe must fire with a finding @@ -30,7 +30,9 @@ import pandas as pd import pytest -from leadforge.render.relational_snapshot_safe import to_dataframes_snapshot_safe +from leadforge.schemes.lead_scoring.render.relational_snapshot_safe import ( + to_dataframes_snapshot_safe, +) from leadforge.validation import leakage_probes from leadforge.validation.leakage_probes import ( CHANNEL_BANNED_COLUMN,