From d80d242fdfe945656d898d6c13f8be368be03dbc Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Wed, 10 Jun 2026 22:58:13 +0300 Subject: [PATCH 1/3] refactor: move lead-scoring render under schemes/lead_scoring/render/ [LTV-Pf.2] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Second half of the physical reorg (LTV-Pf). Relocates the lead-scoring render modules under the scheme and splits the one genuinely-shared piece out of the envelope. Hard break, no shims (D12); byte-identical output. - git mv snapshots.py, relational_snapshot_safe.py, tasks.py → leadforge/schemes/lead_scoring/render/. - Split render/relational.py: the 9-table assembler `to_dataframes` (+ _TABLE_SOURCES) moved to schemes/lead_scoring/render/relational.py; the scheme-agnostic `write_relational_tables` stays in leadforge/render/relational.py (now a small shared writer module). `leadforge.render` remains the shared envelope (write_relational_tables + manifests). - Rewrote importers: the three moved modules repo-wide; `to_dataframes` imports to the scheme path; `write_relational_tables` import unchanged. - CHANGELOG "Moved" table extended; CLAUDE.md Repository Map + canonical layout updated; roadmap Pf.2 + agent-plan updated. - tests/schemes/test_module_layout.py: render modules added to the moved set; new tests assert the shared envelope stays and the relational split landed (to_dataframes in scheme, not in shared render). Verified byte-identical to pre-reorg main (14/14 files); full suite 1524 passed / 51 skipped; ruff + mypy clean (89 source files); scripts compile. Co-Authored-By: Claude Opus 4.8 --- .agent-plan.md | 12 +-- CHANGELOG.md | 14 ++- CLAUDE.md | 25 +++--- docs/ltv/roadmap.md | 10 +-- leadforge/exposure/filters.py | 2 +- leadforge/render/manifests.py | 4 +- leadforge/render/relational.py | 86 ++----------------- leadforge/schemes/lead_scoring/__init__.py | 11 ++- .../schemes/lead_scoring/render/__init__.py | 0 .../schemes/lead_scoring/render/relational.py | 84 ++++++++++++++++++ .../render/relational_snapshot_safe.py | 0 .../lead_scoring}/render/snapshots.py | 0 .../lead_scoring}/render/tasks.py | 2 +- leadforge/validation/leakage_probes.py | 2 +- scripts/build_midproject_lead_scoring.py | 2 +- scripts/build_v4_snapshot.py | 2 +- scripts/build_v5_snapshot.py | 2 +- scripts/build_v6_snapshot.py | 2 +- scripts/build_v7_snapshot.py | 2 +- scripts/spike_category_signal.py | 2 +- .../integration/test_snapshot_safe_bundle.py | 2 +- tests/render/test_relational_snapshot_safe.py | 2 +- tests/render/test_render.py | 64 +++++++------- tests/render/test_snapshot_windowed.py | 2 +- tests/schemes/test_module_layout.py | 28 +++++- tests/test_difficulty_modulation.py | 2 +- tests/validation/test_leakage_probes.py | 6 +- 27 files changed, 212 insertions(+), 158 deletions(-) create mode 100644 leadforge/schemes/lead_scoring/render/__init__.py create mode 100644 leadforge/schemes/lead_scoring/render/relational.py rename leadforge/{ => schemes/lead_scoring}/render/relational_snapshot_safe.py (100%) rename leadforge/{ => schemes/lead_scoring}/render/snapshots.py (100%) rename leadforge/{ => schemes/lead_scoring}/render/tasks.py (96%) diff --git a/.agent-plan.md b/.agent-plan.md index 14c3d14..1120869 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -42,11 +42,13 @@ early against the known-good lead-scoring path + physical reorg into Status: `LTV-M0` landed (#102, #103, #106). `LTV-M1`: `LTV-Pb` merged (#104); `LTV-Pc` (pLTV feature/task specs) still outstanding. `LTV-M2`: `LTV-Pd` (#107) and `LTV-Pe` (#108) merged (scheme protocol + render seam). `LTV-Pf` (physical -move, **hard break / no shims** per D12) split into Pf.1 (compute core — -simulation/mechanisms/structure moved) opened as **#109**, and Pf.2 (render -move, pending). Verified byte-identical. Sibling `leadforge-datasets-private` -build scripts must update to the new import paths (breakage issue filed). Next: -`LTV-Pf.2` (render), then `LTV-Pg` (scaffold `schemes/lifecycle/`). +move, **hard break / no shims** per D12): Pf.1 (compute core — +simulation/mechanisms/structure) merged (#109); Pf.2 (lead-scoring render — +snapshots/relational_snapshot_safe/tasks moved + relational.py split so the +shared write_relational_tables stays in the envelope) opened as **#NNN**. Both +byte-identical. Sibling `leadforge-datasets-private` consumes bundle files, not +internals — no lockstep update needed (heads-up issue #8 filed). Next: +`LTV-Pg` (scaffold `schemes/lifecycle/` + relocate the lead-scoring schema specs). --- diff --git a/CHANGELOG.md b/CHANGELOG.md index c085dd6..2b9feb0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,10 +21,16 @@ back-compat shims, by design): | `leadforge.simulation.*` | `leadforge.schemes.lead_scoring.simulation.*` | | `leadforge.mechanisms.*` | `leadforge.schemes.lead_scoring.mechanisms.*` | | `leadforge.structure.*` | `leadforge.schemes.lead_scoring.structure.*` | - -`render/{snapshots,relational,tasks}` and the lead-scoring `schema` specs -relocate in follow-up PRs. Consumers importing internals (e.g. the -`leadforge-datasets-private` build scripts) must update to the new paths; +| `leadforge.render.snapshots` | `leadforge.schemes.lead_scoring.render.snapshots` | +| `leadforge.render.relational_snapshot_safe` | `leadforge.schemes.lead_scoring.render.relational_snapshot_safe` | +| `leadforge.render.tasks` | `leadforge.schemes.lead_scoring.render.tasks` | +| `leadforge.render.relational:to_dataframes` | `leadforge.schemes.lead_scoring.render.relational:to_dataframes` | + +`leadforge.render` remains the shared bundle-output envelope: +`leadforge.render.relational.write_relational_tables` (the scheme-agnostic table +writer) and `leadforge.render.manifests` stay put. The lead-scoring `schema` +specs relocate in a follow-up PR (LTV-Pg). Consumers importing internals (e.g. +the `leadforge-datasets-private` build scripts) must update to the new paths; the package stays on the `1.x` line (the public contract did not change). ### CLI surfaces v4 fields diff --git a/CLAUDE.md b/CLAUDE.md index 9317851..34e6ef5 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -159,11 +159,11 @@ leadforge/ schema/ entities.py, relationships.py, events.py, features.py, tasks.py, dictionaries.py schemes/ base.py (GenerationScheme protocol + SCHEME_REGISTRY); lead_scoring/ — the lead-scoring scheme: __init__.py (build_world/ - write_bundle) + simulation/, mechanisms/, structure/ (moved in - LTV-Pf.1). render/ + lead-scoring schema specs migrate here in - LTV-Pf.2 / LTV-Pg. See docs/ltv/design.md §2.5. - render/ relational.py (+ write_relational_tables), snapshots.py, manifests.py, tasks.py - # lead-scoring render still here pending LTV-Pf.2 + write_bundle) + simulation/, mechanisms/, structure/, render/ + (moved in LTV-Pf.1/Pf.2). Lead-scoring schema specs migrate + here in LTV-Pg. See docs/ltv/design.md §2.5. + render/ relational.py (write_relational_tables — shared writer), manifests.py + # shared bundle-output envelope exposure/ modes.py, filters.py, redaction.py validation/ invariants.py, artifact_checks.py, realism.py, difficulty.py, drift.py recipes/ registry.py, b2b_saas_procurement_v1/{recipe,narrative,schema,motifs,difficulty_profiles}.yaml @@ -248,14 +248,13 @@ leadforge/ # Python package root │ ├── __init__.py # build_world() + write_bundle() │ ├── structure/ # Hidden world graph (WorldGraph, motifs, sampler) │ ├── mechanisms/ # Node/edge behavior (policies, hazards, scores, …) -│ └── simulation/ # World evolution (engine, population, state) -│ # NOTE (LTV-M2 reorg in progress): render/{snapshots,relational,tasks} -│ # relocate under schemes/lead_scoring/ in a follow-up; schema specs split -│ # in LTV-Pg. See docs/ltv/design.md §2.5 for the target layout. -├── render/ # Bundle output (envelope + not-yet-moved lead-scoring render) -│ ├── snapshots.py # build_snapshot() — ML-ready lead table -│ ├── relational.py # to_dataframes() — 9-table dict -│ ├── tasks.py # write_task_splits() — train/valid/test Parquet +│ ├── simulation/ # World evolution (engine, population, state) +│ └── render/ # Lead-scoring render: snapshots, relational +│ # (to_dataframes), relational_snapshot_safe, tasks +│ # NOTE (LTV-M2 reorg in progress): lead-scoring schema specs split in LTV-Pg. +│ # See docs/ltv/design.md §2.5 for the target layout. +├── render/ # Shared bundle-output envelope +│ ├── relational.py # write_relational_tables() — shared table writer │ └── manifests.py # build_manifest(), write_manifest() ├── exposure/ # Truth filtering │ ├── modes.py # apply_exposure() dispatch diff --git a/docs/ltv/roadmap.md b/docs/ltv/roadmap.md index 2133602..0f4ef95 100644 --- a/docs/ltv/roadmap.md +++ b/docs/ltv/roadmap.md @@ -125,11 +125,11 @@ Total: ~19 PRs across 9 milestones. - [x] **`LTV-Pf.1`** — compute core: `simulation/` + `mechanisms/` + `structure/` moved as whole directories (21 file renames, all callers rewritten). Verified byte-identical; full suite green. (**PR #109**) - - [ ] **`LTV-Pf.2`** — render: relocate `render/{snapshots,relational,tasks}` - under the scheme, splitting `render/relational.py` so the shared - `write_relational_tables` stays in the envelope while the 9-table - `to_dataframes` moves. (The lead-scoring `schema` specs split lands with - `LTV-Pg`.) + - [x] **`LTV-Pf.2`** — render: relocated `render/{snapshots,relational_snapshot_safe,tasks}` + under `schemes/lead_scoring/render/`, and split `render/relational.py` so the + shared `write_relational_tables` stays in the envelope while the 9-table + `to_dataframes` moved. Verified byte-identical; full suite green. (**PR #NNN**) + (The lead-scoring `schema` specs split lands with `LTV-Pg`.) - Tests: full suite + hash-determinism green; public API imports unchanged. - Labels: `type: refactor`, `layer: schema`, `layer: simulation`, `layer: render` - [ ] **`LTV-Pg`** — `refactor: scaffold schemes/lifecycle/ + relocate LTV-Pb/Pc specs`. diff --git a/leadforge/exposure/filters.py b/leadforge/exposure/filters.py index 1376674..f78eda9 100644 --- a/leadforge/exposure/filters.py +++ b/leadforge/exposure/filters.py @@ -31,7 +31,7 @@ class BundleFilter: relational_snapshot_safe: Whether the relational ``tables/`` dict must be projected onto the snapshot-safe shape before being written. When ``True``, the bundle writer routes through - :func:`leadforge.render.relational_snapshot_safe.to_dataframes_snapshot_safe`, + :func:`leadforge.schemes.lead_scoring.render.relational_snapshot_safe.to_dataframes_snapshot_safe`, which strips :data:`leadforge.validation.leakage_probes.BANNED_LEAD_COLUMNS` from ``leads``, :data:`~leadforge.validation.leakage_probes.BANNED_OPP_COLUMNS` from ``opportunities``, filters event tables per-lead by diff --git a/leadforge/render/manifests.py b/leadforge/render/manifests.py index 418780c..382a05a 100644 --- a/leadforge/render/manifests.py +++ b/leadforge/render/manifests.py @@ -43,7 +43,7 @@ # self-describing (``null`` means full-horizon, legacy behaviour). # "5" — PR 2.2: ``student_public`` bundles route through the # snapshot-safe relational export ( -# :mod:`leadforge.render.relational_snapshot_safe`). Public +# :mod:`leadforge.schemes.lead_scoring.render.relational_snapshot_safe`). Public # ``leads`` drops ``converted_within_90_days`` / # ``conversion_timestamp``; public ``opportunities`` drops # ``close_outcome`` / ``closed_at``; public bundles omit @@ -91,7 +91,7 @@ def build_manifest( package internals. Defaults to ``[]`` (nothing redacted). relational_snapshot_safe: ``True`` if the relational ``tables/`` were projected through - :func:`leadforge.render.relational_snapshot_safe.to_dataframes_snapshot_safe` + :func:`leadforge.schemes.lead_scoring.render.relational_snapshot_safe.to_dataframes_snapshot_safe` before being written. Recorded in the manifest so a tool reading a v5+ bundle can tell from the manifest alone whether ``tables/`` is the snapshot-safe (public) shape or the diff --git a/leadforge/render/relational.py b/leadforge/render/relational.py index efca9fd..58c46a5 100644 --- a/leadforge/render/relational.py +++ b/leadforge/render/relational.py @@ -1,91 +1,23 @@ -"""Relational export — convert SimulationResult to typed DataFrames. - -:func:`to_dataframes` is the single entry point. It produces one -``pd.DataFrame`` per relational table, with dtypes matching the -:attr:`~leadforge.schema.entities.AccountRow.DTYPE_MAP` of each entity -class. The resulting dict is consumed by the bundle writer to produce -the ``tables/`` directory in the output bundle. +"""Shared relational-table writer (bundle-output envelope). + +:func:`write_relational_tables` is the scheme-agnostic step that serialises a +``{table_name: DataFrame}`` dict to a bundle's ``tables/`` directory. Each +generation scheme decides the relational *shape* (which tables, any +snapshot-safe projection) and then calls this to write them. The lead-scoring +table *assembler* (``to_dataframes``) lives with its scheme in +:mod:`leadforge.schemes.lead_scoring.render.relational`. """ from __future__ import annotations -from typing import TYPE_CHECKING, Literal, NamedTuple +from typing import TYPE_CHECKING import pandas as pd -from leadforge.schema.entities import ( - AccountRow, - ContactRow, - CustomerRow, - EntityRowProtocol, - LeadRow, - OpportunityRow, - SalesActivityRow, - SessionRow, - SubscriptionRow, - TouchRow, -) - if TYPE_CHECKING: from collections.abc import Collection from pathlib import Path - from leadforge.schemes.lead_scoring.simulation.engine import SimulationResult - from leadforge.schemes.lead_scoring.simulation.population import PopulationResult - -_Source = Literal["population", "simulation"] - - -class _TableSource(NamedTuple): - cls: type[EntityRowProtocol] - origin: _Source # which object holds the rows - attr: str # attribute name on that object - - -# Maps table name → source descriptor. -_TABLE_SOURCES: dict[str, _TableSource] = { - AccountRow.TABLE_NAME: _TableSource(AccountRow, "population", "accounts"), - ContactRow.TABLE_NAME: _TableSource(ContactRow, "population", "contacts"), - LeadRow.TABLE_NAME: _TableSource(LeadRow, "simulation", "leads"), - TouchRow.TABLE_NAME: _TableSource(TouchRow, "simulation", "touches"), - SessionRow.TABLE_NAME: _TableSource(SessionRow, "simulation", "sessions"), - SalesActivityRow.TABLE_NAME: _TableSource(SalesActivityRow, "simulation", "sales_activities"), - OpportunityRow.TABLE_NAME: _TableSource(OpportunityRow, "simulation", "opportunities"), - CustomerRow.TABLE_NAME: _TableSource(CustomerRow, "simulation", "customers"), - SubscriptionRow.TABLE_NAME: _TableSource(SubscriptionRow, "simulation", "subscriptions"), -} - - -def to_dataframes( - result: SimulationResult, - population: PopulationResult, -) -> dict[str, pd.DataFrame]: - """Convert simulation output to one typed DataFrame per relational table. - - Args: - result: Output of :func:`~leadforge.schemes.lead_scoring.simulation.engine.simulate_world`. - population: Output of - :func:`~leadforge.schemes.lead_scoring.simulation.population.build_population`. - - Returns: - Dict mapping table name → ``pd.DataFrame`` with dtypes matching the - entity class's ``DTYPE_MAP``. Empty tables are returned as zero-row - DataFrames with the correct schema. - """ - dfs: dict[str, pd.DataFrame] = {} - for table_name, src in _TABLE_SOURCES.items(): - obj = population if src.origin == "population" else result - rows = getattr(obj, src.attr) # AttributeError surfaces missing attrs immediately - if rows: - df = pd.DataFrame([row.to_dict() for row in rows]) - for col, dtype in src.cls.DTYPE_MAP.items(): - if col in df.columns: - df[col] = df[col].astype(dtype) - else: - df = src.cls.empty_dataframe() - dfs[table_name] = df - return dfs - def write_relational_tables( dfs: dict[str, pd.DataFrame], diff --git a/leadforge/schemes/lead_scoring/__init__.py b/leadforge/schemes/lead_scoring/__init__.py index 55e1dea..c127da0 100644 --- a/leadforge/schemes/lead_scoring/__init__.py +++ b/leadforge/schemes/lead_scoring/__init__.py @@ -162,13 +162,16 @@ def write_bundle( from leadforge.exposure.modes import apply_exposure from leadforge.narrative.dataset_card import render_dataset_card from leadforge.render.manifests import build_manifest, write_manifest - from leadforge.render.relational import to_dataframes, write_relational_tables - from leadforge.render.relational_snapshot_safe import to_dataframes_snapshot_safe - from leadforge.render.snapshots import build_snapshot - from leadforge.render.tasks import write_task_splits + from leadforge.render.relational import write_relational_tables from leadforge.schema.dictionaries import write_feature_dictionary from leadforge.schema.features import LEAD_SNAPSHOT_FEATURES, redacted_columns_for from leadforge.schema.tasks import task_manifest_for_config + from leadforge.schemes.lead_scoring.render.relational import to_dataframes + from leadforge.schemes.lead_scoring.render.relational_snapshot_safe import ( + to_dataframes_snapshot_safe, + ) + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.tasks import write_task_splits if ( bundle.simulation_result is None diff --git a/leadforge/schemes/lead_scoring/render/__init__.py b/leadforge/schemes/lead_scoring/render/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/leadforge/schemes/lead_scoring/render/relational.py b/leadforge/schemes/lead_scoring/render/relational.py new file mode 100644 index 0000000..62387e2 --- /dev/null +++ b/leadforge/schemes/lead_scoring/render/relational.py @@ -0,0 +1,84 @@ +"""Relational export — convert SimulationResult to typed DataFrames. + +:func:`to_dataframes` is the single entry point. It produces one +``pd.DataFrame`` per relational table, with dtypes matching the +:attr:`~leadforge.schema.entities.AccountRow.DTYPE_MAP` of each entity +class. The resulting dict is consumed by the bundle writer to produce +the ``tables/`` directory in the output bundle. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Literal, NamedTuple + +import pandas as pd + +from leadforge.schema.entities import ( + AccountRow, + ContactRow, + CustomerRow, + EntityRowProtocol, + LeadRow, + OpportunityRow, + SalesActivityRow, + SessionRow, + SubscriptionRow, + TouchRow, +) + +if TYPE_CHECKING: + from leadforge.schemes.lead_scoring.simulation.engine import SimulationResult + from leadforge.schemes.lead_scoring.simulation.population import PopulationResult + +_Source = Literal["population", "simulation"] + + +class _TableSource(NamedTuple): + cls: type[EntityRowProtocol] + origin: _Source # which object holds the rows + attr: str # attribute name on that object + + +# Maps table name → source descriptor. +_TABLE_SOURCES: dict[str, _TableSource] = { + AccountRow.TABLE_NAME: _TableSource(AccountRow, "population", "accounts"), + ContactRow.TABLE_NAME: _TableSource(ContactRow, "population", "contacts"), + LeadRow.TABLE_NAME: _TableSource(LeadRow, "simulation", "leads"), + TouchRow.TABLE_NAME: _TableSource(TouchRow, "simulation", "touches"), + SessionRow.TABLE_NAME: _TableSource(SessionRow, "simulation", "sessions"), + SalesActivityRow.TABLE_NAME: _TableSource(SalesActivityRow, "simulation", "sales_activities"), + OpportunityRow.TABLE_NAME: _TableSource(OpportunityRow, "simulation", "opportunities"), + CustomerRow.TABLE_NAME: _TableSource(CustomerRow, "simulation", "customers"), + SubscriptionRow.TABLE_NAME: _TableSource(SubscriptionRow, "simulation", "subscriptions"), +} + + +def to_dataframes( + result: SimulationResult, + population: PopulationResult, +) -> dict[str, pd.DataFrame]: + """Convert simulation output to one typed DataFrame per relational table. + + Args: + result: Output of :func:`~leadforge.schemes.lead_scoring.simulation.engine.simulate_world`. + population: Output of + :func:`~leadforge.schemes.lead_scoring.simulation.population.build_population`. + + Returns: + Dict mapping table name → ``pd.DataFrame`` with dtypes matching the + entity class's ``DTYPE_MAP``. Empty tables are returned as zero-row + DataFrames with the correct schema. + """ + dfs: dict[str, pd.DataFrame] = {} + for table_name, src in _TABLE_SOURCES.items(): + obj = population if src.origin == "population" else result + rows = getattr(obj, src.attr) # AttributeError surfaces missing attrs immediately + if rows: + df = pd.DataFrame([row.to_dict() for row in rows]) + for col, dtype in src.cls.DTYPE_MAP.items(): + if col in df.columns: + df[col] = df[col].astype(dtype) + else: + df = src.cls.empty_dataframe() + dfs[table_name] = df + return dfs diff --git a/leadforge/render/relational_snapshot_safe.py b/leadforge/schemes/lead_scoring/render/relational_snapshot_safe.py similarity index 100% rename from leadforge/render/relational_snapshot_safe.py rename to leadforge/schemes/lead_scoring/render/relational_snapshot_safe.py diff --git a/leadforge/render/snapshots.py b/leadforge/schemes/lead_scoring/render/snapshots.py similarity index 100% rename from leadforge/render/snapshots.py rename to leadforge/schemes/lead_scoring/render/snapshots.py diff --git a/leadforge/render/tasks.py b/leadforge/schemes/lead_scoring/render/tasks.py similarity index 96% rename from leadforge/render/tasks.py rename to leadforge/schemes/lead_scoring/render/tasks.py index 7d6013c..d8e23a6 100644 --- a/leadforge/render/tasks.py +++ b/leadforge/schemes/lead_scoring/render/tasks.py @@ -35,7 +35,7 @@ def write_task_splits( Args: snapshot: Lead snapshot DataFrame from - :func:`~leadforge.render.snapshots.build_snapshot`. + :func:`~leadforge.schemes.lead_scoring.render.snapshots.build_snapshot`. out_dir: Parent directory for task outputs (typically ``bundle_root / "tasks"``). seed: Seed used for deterministic row shuffle. diff --git a/leadforge/validation/leakage_probes.py b/leadforge/validation/leakage_probes.py index 6db6551..eaed773 100644 --- a/leadforge/validation/leakage_probes.py +++ b/leadforge/validation/leakage_probes.py @@ -64,7 +64,7 @@ # --------------------------------------------------------------------------- # Snapshot-safe contract — single source of truth for "what is leakage". -# ``leadforge.render.relational_snapshot_safe`` (writer) and +# ``leadforge.schemes.lead_scoring.render.relational_snapshot_safe`` (writer) and # ``leadforge.render.manifests`` (manifest's structural_redactions) import # from here so the writer and the validator share one definition. # --------------------------------------------------------------------------- diff --git a/scripts/build_midproject_lead_scoring.py b/scripts/build_midproject_lead_scoring.py index 3363b37..b449ae7 100644 --- a/scripts/build_midproject_lead_scoring.py +++ b/scripts/build_midproject_lead_scoring.py @@ -31,7 +31,7 @@ softcap_expected_acv, subsample, ) -from leadforge.render.snapshots import build_snapshot +from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot # --------------------------------------------------------------------------- # Orchestration diff --git a/scripts/build_v4_snapshot.py b/scripts/build_v4_snapshot.py index 61ba1c8..eb278d8 100644 --- a/scripts/build_v4_snapshot.py +++ b/scripts/build_v4_snapshot.py @@ -20,7 +20,7 @@ import pandas as pd from leadforge.api.generator import Generator -from leadforge.render.snapshots import build_snapshot +from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot # --------------------------------------------------------------------------- # Constants diff --git a/scripts/build_v5_snapshot.py b/scripts/build_v5_snapshot.py index 80221e2..6fb7008 100644 --- a/scripts/build_v5_snapshot.py +++ b/scripts/build_v5_snapshot.py @@ -32,7 +32,7 @@ rename_and_select, subsample, ) -from leadforge.render.snapshots import build_snapshot +from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot # --------------------------------------------------------------------------- # Orchestration (stays in script — depends on Generator) diff --git a/scripts/build_v6_snapshot.py b/scripts/build_v6_snapshot.py index 995b5e8..09c4ec8 100644 --- a/scripts/build_v6_snapshot.py +++ b/scripts/build_v6_snapshot.py @@ -39,7 +39,7 @@ softcap_expected_acv, subsample, ) -from leadforge.render.snapshots import build_snapshot +from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot # --------------------------------------------------------------------------- # Orchestration diff --git a/scripts/build_v7_snapshot.py b/scripts/build_v7_snapshot.py index e5e2713..da84af9 100644 --- a/scripts/build_v7_snapshot.py +++ b/scripts/build_v7_snapshot.py @@ -38,7 +38,7 @@ softcap_expected_acv, subsample, ) -from leadforge.render.snapshots import build_snapshot +from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot # --------------------------------------------------------------------------- # Orchestration diff --git a/scripts/spike_category_signal.py b/scripts/spike_category_signal.py index 62f079c..ed3dabd 100644 --- a/scripts/spike_category_signal.py +++ b/scripts/spike_category_signal.py @@ -26,7 +26,7 @@ from leadforge.api.generator import Generator from leadforge.core.rng import RNGRoot -from leadforge.render.snapshots import build_snapshot +from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot from leadforge.schemes.lead_scoring.simulation.engine import simulate_world from leadforge.schemes.lead_scoring.simulation.population import PopulationResult, build_population from leadforge.schemes.lead_scoring.structure.sampler import sample_hidden_graph diff --git a/tests/integration/test_snapshot_safe_bundle.py b/tests/integration/test_snapshot_safe_bundle.py index 926572c..6099590 100644 --- a/tests/integration/test_snapshot_safe_bundle.py +++ b/tests/integration/test_snapshot_safe_bundle.py @@ -2,7 +2,7 @@ Covers the contract turned on in PR 2.2: ``student_public`` bundles route ``tables/`` through -:func:`leadforge.render.relational_snapshot_safe.to_dataframes_snapshot_safe` +:func:`leadforge.schemes.lead_scoring.render.relational_snapshot_safe.to_dataframes_snapshot_safe` (the structural fix against the alpha-bundle reconstruction paths A-E), ``research_instructor`` bundles keep the full-horizon export, and the manifest is self-describing via ``relational_snapshot_safe``, diff --git a/tests/render/test_relational_snapshot_safe.py b/tests/render/test_relational_snapshot_safe.py index 09dcdfd..a4e3821 100644 --- a/tests/render/test_relational_snapshot_safe.py +++ b/tests/render/test_relational_snapshot_safe.py @@ -5,7 +5,7 @@ import pandas as pd import pytest -from leadforge.render.relational_snapshot_safe import ( +from leadforge.schemes.lead_scoring.render.relational_snapshot_safe import ( BANNED_LEAD_COLUMNS, BANNED_OPP_COLUMNS, BANNED_TABLES, diff --git a/tests/render/test_render.py b/tests/render/test_render.py index b82ae48..0bb7bc9 100644 --- a/tests/render/test_render.py +++ b/tests/render/test_render.py @@ -58,7 +58,7 @@ def sim_outputs(): class TestToDataframes: def test_returns_all_table_names(self, sim_outputs): _, population, result, _ = sim_outputs - from leadforge.render.relational import to_dataframes + from leadforge.schemes.lead_scoring.render.relational import to_dataframes dfs = to_dataframes(result, population) expected = { @@ -76,14 +76,14 @@ def test_returns_all_table_names(self, sim_outputs): def test_lead_count_matches(self, sim_outputs): config, population, result, _ = sim_outputs - from leadforge.render.relational import to_dataframes + from leadforge.schemes.lead_scoring.render.relational import to_dataframes dfs = to_dataframes(result, population) assert len(dfs["leads"]) == config.n_leads def test_account_and_contact_counts(self, sim_outputs): config, population, result, _ = sim_outputs - from leadforge.render.relational import to_dataframes + from leadforge.schemes.lead_scoring.render.relational import to_dataframes dfs = to_dataframes(result, population) assert len(dfs["accounts"]) == config.n_accounts @@ -91,7 +91,7 @@ def test_account_and_contact_counts(self, sim_outputs): def test_dataframes_are_dataframes(self, sim_outputs): _, population, result, _ = sim_outputs - from leadforge.render.relational import to_dataframes + from leadforge.schemes.lead_scoring.render.relational import to_dataframes dfs = to_dataframes(result, population) for name, df in dfs.items(): @@ -100,8 +100,8 @@ def test_dataframes_are_dataframes(self, sim_outputs): def test_empty_tables_have_schema(self, sim_outputs): """Tables with zero rows must still expose the correct column names.""" _, population, result, _ = sim_outputs - from leadforge.render.relational import to_dataframes from leadforge.schema.entities import CustomerRow + from leadforge.schemes.lead_scoring.render.relational import to_dataframes dfs = to_dataframes(result, population) assert set(CustomerRow.DTYPE_MAP.keys()).issubset(set(dfs["customers"].columns)) @@ -109,8 +109,8 @@ def test_empty_tables_have_schema(self, sim_outputs): def test_fk_integrity(self, sim_outputs): """All FK constraints must hold on the produced DataFrames.""" _, population, result, _ = sim_outputs - from leadforge.render.relational import to_dataframes from leadforge.schema.relationships import ALL_CONSTRAINTS, validate_fk + from leadforge.schemes.lead_scoring.render.relational import to_dataframes dfs = to_dataframes(result, population) for constraint in ALL_CONSTRAINTS: @@ -126,7 +126,7 @@ def test_fk_integrity(self, sim_outputs): def test_deterministic_under_same_seed(self): """Same seed → identical relational DataFrames.""" - from leadforge.render.relational import to_dataframes + from leadforge.schemes.lead_scoring.render.relational import to_dataframes def _run(seed): cfg = _make_config(seed=seed) @@ -150,14 +150,14 @@ def _run(seed): class TestBuildSnapshot: def test_row_count_equals_lead_count(self, sim_outputs): config, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot snap = build_snapshot(result, population, horizon_days=config.horizon_days) assert len(snap) == config.n_leads def test_all_snapshot_columns_present(self, sim_outputs): _, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot snap = build_snapshot(result, population) for col in _SNAPSHOT_COLUMNS: @@ -165,21 +165,21 @@ def test_all_snapshot_columns_present(self, sim_outputs): def test_no_extra_columns(self, sim_outputs): _, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot snap = build_snapshot(result, population) assert set(snap.columns) == set(_SNAPSHOT_COLUMNS) def test_target_column_is_boolean(self, sim_outputs): _, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot snap = build_snapshot(result, population) assert snap["converted_within_90_days"].dtype.name == "boolean" def test_touch_counts_non_negative(self, sim_outputs): _, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot snap = build_snapshot(result, population) assert (snap["touch_count"].dropna() >= 0).all() @@ -189,7 +189,7 @@ def test_touch_counts_non_negative(self, sim_outputs): def test_inbound_plus_outbound_equals_total(self, sim_outputs): """inbound + outbound must equal touch_count exactly (only two directions in v1).""" _, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot snap = build_snapshot(result, population) valid = snap[["touch_count", "inbound_touch_count", "outbound_touch_count"]].dropna() @@ -198,7 +198,7 @@ def test_inbound_plus_outbound_equals_total(self, sim_outputs): def test_days_since_last_touch_finite_when_touches_exist(self, sim_outputs): _, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot snap = build_snapshot(result, population) has_touch = snap["touch_count"] > 0 @@ -208,7 +208,7 @@ def test_days_since_last_touch_finite_when_touches_exist(self, sim_outputs): def test_no_post_anchor_columns_in_snapshot(self, sim_outputs): """Columns that represent post-anchor truth must not appear in the snapshot.""" _, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot snap = build_snapshot(result, population) # These exist in LeadRow / OpportunityRow but must be excluded (leakage rule). @@ -218,7 +218,7 @@ def test_no_post_anchor_columns_in_snapshot(self, sim_outputs): def test_target_matches_simulation_result(self, sim_outputs): """converted_within_90_days in snapshot must match SimulationResult's flag.""" _, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot snap = build_snapshot(result, population) lead_flags = {row.lead_id: row.converted_within_90_days for row in result.leads} @@ -228,7 +228,7 @@ def test_target_matches_simulation_result(self, sim_outputs): def test_deterministic_under_same_seed(self): """Same seed → identical snapshots.""" - from leadforge.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot def _snap(seed): cfg = _make_config(seed=seed) @@ -251,8 +251,8 @@ def _snap(seed): class TestWriteTaskSplits: def test_three_files_written(self, sim_outputs, tmp_path): config, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot - from leadforge.render.tasks import write_task_splits + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.tasks import write_task_splits snap = build_snapshot(result, population, horizon_days=config.horizon_days) write_task_splits(snap, tmp_path, seed=config.seed) @@ -263,8 +263,8 @@ def test_three_files_written(self, sim_outputs, tmp_path): def test_task_manifest_written(self, sim_outputs, tmp_path): config, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot - from leadforge.render.tasks import write_task_splits + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.tasks import write_task_splits snap = build_snapshot(result, population, horizon_days=config.horizon_days) write_task_splits(snap, tmp_path, seed=config.seed) @@ -276,8 +276,8 @@ def test_task_manifest_written(self, sim_outputs, tmp_path): def test_row_counts_sum_to_total(self, sim_outputs, tmp_path): config, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot - from leadforge.render.tasks import write_task_splits + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.tasks import write_task_splits snap = build_snapshot(result, population, horizon_days=config.horizon_days) counts = write_task_splits(snap, tmp_path, seed=config.seed) @@ -287,8 +287,8 @@ def test_row_counts_sum_to_total(self, sim_outputs, tmp_path): def test_split_ratios_approx(self, sim_outputs, tmp_path): """Train ≈ 70%, valid ≈ 15%, test ≈ 15% (±5% tolerance for small samples).""" config, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot - from leadforge.render.tasks import write_task_splits + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.tasks import write_task_splits snap = build_snapshot(result, population, horizon_days=config.horizon_days) counts = write_task_splits(snap, tmp_path, seed=config.seed) @@ -299,8 +299,8 @@ def test_split_ratios_approx(self, sim_outputs, tmp_path): def test_splits_are_disjoint(self, sim_outputs, tmp_path): config, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot - from leadforge.render.tasks import write_task_splits + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.tasks import write_task_splits snap = build_snapshot(result, population, horizon_days=config.horizon_days) write_task_splits(snap, tmp_path, seed=config.seed) @@ -314,8 +314,8 @@ def test_splits_are_disjoint(self, sim_outputs, tmp_path): def test_deterministic_under_same_seed(self, sim_outputs, tmp_path): config, population, result, _ = sim_outputs - from leadforge.render.snapshots import build_snapshot - from leadforge.render.tasks import write_task_splits + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.tasks import write_task_splits snap = build_snapshot(result, population, horizon_days=config.horizon_days) @@ -341,10 +341,10 @@ class TestBuildManifest: def _make_manifest(self, sim_outputs, tmp_path): config, population, result, world_graph = sim_outputs from leadforge.render.manifests import build_manifest - from leadforge.render.relational import to_dataframes - from leadforge.render.snapshots import build_snapshot - from leadforge.render.tasks import write_task_splits from leadforge.schema.tables import write_parquet + from leadforge.schemes.lead_scoring.render.relational import to_dataframes + from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot + from leadforge.schemes.lead_scoring.render.tasks import write_task_splits tables_dir = tmp_path / "tables" tables_dir.mkdir() diff --git a/tests/render/test_snapshot_windowed.py b/tests/render/test_snapshot_windowed.py index 8d36d33..94efc6e 100644 --- a/tests/render/test_snapshot_windowed.py +++ b/tests/render/test_snapshot_windowed.py @@ -11,7 +11,7 @@ from leadforge.core.models import GenerationConfig from leadforge.core.rng import RNGRoot -from leadforge.render.snapshots import build_snapshot +from leadforge.schemes.lead_scoring.render.snapshots import build_snapshot from leadforge.schemes.lead_scoring.simulation.engine import simulate_world from leadforge.schemes.lead_scoring.simulation.population import build_population from leadforge.schemes.lead_scoring.structure.sampler import sample_hidden_graph diff --git a/tests/schemes/test_module_layout.py b/tests/schemes/test_module_layout.py index c6e75ab..98984c5 100644 --- a/tests/schemes/test_module_layout.py +++ b/tests/schemes/test_module_layout.py @@ -10,14 +10,23 @@ import pytest -# (old flat path, new scheme-owned path) for the modules moved in LTV-Pf.1. +# (old flat path, new scheme-owned path) for modules moved in LTV-Pf.1 (compute +# core) and LTV-Pf.2 (lead-scoring render). _MOVED = [ + # LTV-Pf.1 — compute core ("leadforge.simulation.engine", "leadforge.schemes.lead_scoring.simulation.engine"), ("leadforge.simulation.population", "leadforge.schemes.lead_scoring.simulation.population"), ("leadforge.simulation.state", "leadforge.schemes.lead_scoring.simulation.state"), ("leadforge.mechanisms.policies", "leadforge.schemes.lead_scoring.mechanisms.policies"), ("leadforge.structure.sampler", "leadforge.schemes.lead_scoring.structure.sampler"), ("leadforge.structure.graph", "leadforge.schemes.lead_scoring.structure.graph"), + # LTV-Pf.2 — lead-scoring render + ("leadforge.render.snapshots", "leadforge.schemes.lead_scoring.render.snapshots"), + ( + "leadforge.render.relational_snapshot_safe", + "leadforge.schemes.lead_scoring.render.relational_snapshot_safe", + ), + ("leadforge.render.tasks", "leadforge.schemes.lead_scoring.render.tasks"), ] @@ -39,6 +48,23 @@ def test_old_top_level_package_is_gone(pkg: str) -> None: importlib.import_module(f"leadforge.{pkg}") +def test_render_envelope_package_stays() -> None: + # LTV-Pf.2 moved the lead-scoring render modules, but `leadforge.render` + # remains the shared envelope (manifests + the relational-table writer). + import leadforge.render.manifests # noqa: F401 + import leadforge.render.relational as shared_relational + + assert hasattr(shared_relational, "write_relational_tables") + + +def test_relational_split_to_dataframes_moved_to_scheme() -> None: + # The 9-table assembler moved to the scheme; the shared writer did not. + import leadforge.render.relational as shared_relational + from leadforge.schemes.lead_scoring.render.relational import to_dataframes # noqa: F401 + + assert not hasattr(shared_relational, "to_dataframes") + + def test_public_api_unchanged_by_the_move() -> None: # The documented public surface must keep importing from its stable home. from leadforge.api import Generator, list_recipes # noqa: F401 diff --git a/tests/test_difficulty_modulation.py b/tests/test_difficulty_modulation.py index 006bc1c..95478a1 100644 --- a/tests/test_difficulty_modulation.py +++ b/tests/test_difficulty_modulation.py @@ -172,7 +172,7 @@ def test_distortions_change_values(self) -> None: import pandas as pd from leadforge.core.models import DifficultyParams - from leadforge.render.snapshots import ( + from leadforge.schemes.lead_scoring.render.snapshots import ( _FLOAT_DISTORTION_COLS, _apply_difficulty_distortions, ) diff --git a/tests/validation/test_leakage_probes.py b/tests/validation/test_leakage_probes.py index 216da69..6905ab5 100644 --- a/tests/validation/test_leakage_probes.py +++ b/tests/validation/test_leakage_probes.py @@ -15,7 +15,7 @@ For the structural probes each is exercised against two configurations: * a *clean* bundle, produced by running the same source frames through - :func:`leadforge.render.relational_snapshot_safe.to_dataframes_snapshot_safe`, + :func:`leadforge.schemes.lead_scoring.render.relational_snapshot_safe.to_dataframes_snapshot_safe`, on which every probe must produce zero findings; * a *tampered* bundle, in which one leakage channel at a time is re-introduced, on which the matching probe must fire with a finding @@ -30,7 +30,9 @@ import pandas as pd import pytest -from leadforge.render.relational_snapshot_safe import to_dataframes_snapshot_safe +from leadforge.schemes.lead_scoring.render.relational_snapshot_safe import ( + to_dataframes_snapshot_safe, +) from leadforge.validation import leakage_probes from leadforge.validation.leakage_probes import ( CHANNEL_BANNED_COLUMN, From e7817035b7e75d5ccdc740e5169ebbf6dc906ae5 Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Wed, 10 Jun 2026 22:58:56 +0300 Subject: [PATCH 2/3] docs(ltv): record LTV-Pf.2 (#110) in roadmap + agent-plan [LTV-Pf.2] Link PR #110; advance status. Next: LTV-Pg (scaffold schemes/lifecycle/). Co-Authored-By: Claude Opus 4.8 --- .agent-plan.md | 2 +- docs/ltv/roadmap.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.agent-plan.md b/.agent-plan.md index 1120869..87b81c2 100644 --- a/.agent-plan.md +++ b/.agent-plan.md @@ -45,7 +45,7 @@ and `LTV-Pe` (#108) merged (scheme protocol + render seam). `LTV-Pf` (physical move, **hard break / no shims** per D12): Pf.1 (compute core — simulation/mechanisms/structure) merged (#109); Pf.2 (lead-scoring render — snapshots/relational_snapshot_safe/tasks moved + relational.py split so the -shared write_relational_tables stays in the envelope) opened as **#NNN**. Both +shared write_relational_tables stays in the envelope) opened as **#110**. Both byte-identical. Sibling `leadforge-datasets-private` consumes bundle files, not internals — no lockstep update needed (heads-up issue #8 filed). Next: `LTV-Pg` (scaffold `schemes/lifecycle/` + relocate the lead-scoring schema specs). diff --git a/docs/ltv/roadmap.md b/docs/ltv/roadmap.md index 0f4ef95..d6e0193 100644 --- a/docs/ltv/roadmap.md +++ b/docs/ltv/roadmap.md @@ -42,7 +42,7 @@ protocol + registry, with the package physically reorganized into |-----------|------------|-----|------------| | `LTV-M0` | Planning + design lock | `LTV-Pa` | #102, #103 (+ scheme reframe) | | `LTV-M1` | Lifecycle schema foundation | `LTV-Pb`, `LTV-Pc` | #104 (Pb) | -| `LTV-M2` | Generation-scheme architecture + physical reorg | `LTV-Pd`, `LTV-Pe`, `LTV-Pf`, `LTV-Pg` | #107 (Pd), #108 (Pe), #109 (Pf.1) | +| `LTV-M2` | Generation-scheme architecture + physical reorg | `LTV-Pd`, `LTV-Pe`, `LTV-Pf`, `LTV-Pg` | #107 (Pd), #108 (Pe), #109 (Pf.1), #110 (Pf.2) | | `LTV-M3` | Customer population + lifecycle world | `LTV-Ph`, `LTV-Pi` | | | `LTV-M4` | Lifecycle simulation engine | `LTV-Pj`, `LTV-Pk` | | | `LTV-M5` | Customer snapshots + pLTV targets (both regimes) | `LTV-Pl`, `LTV-Pm` | | @@ -128,7 +128,7 @@ Total: ~19 PRs across 9 milestones. - [x] **`LTV-Pf.2`** — render: relocated `render/{snapshots,relational_snapshot_safe,tasks}` under `schemes/lead_scoring/render/`, and split `render/relational.py` so the shared `write_relational_tables` stays in the envelope while the 9-table - `to_dataframes` moved. Verified byte-identical; full suite green. (**PR #NNN**) + `to_dataframes` moved. Verified byte-identical; full suite green. (**PR #110**) (The lead-scoring `schema` specs split lands with `LTV-Pg`.) - Tests: full suite + hash-determinism green; public API imports unchanged. - Labels: `type: refactor`, `layer: schema`, `layer: simulation`, `layer: render` From 5ea70a0a674dfcc777fc5f765f171526a363296c Mon Sep 17 00:00:00 2001 From: Shay Palachy Date: Wed, 10 Jun 2026 23:04:17 +0300 Subject: [PATCH 3/3] refactor(render): rename shared writer to relational_io; preserve to_dataframes history (self-review) [LTV-Pf.2] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Self-review caught that the relational.py split lost git history and left two files named relational.py. Both files shared the basename, so git's same-path heuristic recorded the scheme file (which inherits to_dataframes + _TABLE_SOURCES, the bulk of the original) as a brand-new ADD and the gutted shared remnant as a MODIFY — breaking `git blame`/`log --follow` on to_dataframes, and contradicting the history-preservation standard Pf.1 set. Fix: rename the shared writer module leadforge/render/relational.py → leadforge/render/relational_io.py. With the basename collision gone, git now records the scheme file as R067 (rename of the original — to_dataframes history preserved back to M8 #13) and relational_io.py as a clean ADD. Bonus: no more ambiguous duplicate `relational.py` (the shared writer is scheme-agnostic and isn't "relational"-specific anyway). - Importers of write_relational_tables updated (scheme __init__, test). - test_module_layout.py: assert the shared writer is at render.relational_io and the flat `leadforge.render.relational` module is gone. - CHANGELOG "Moved" table + CLAUDE.md (both layout sections) updated. Verified byte-identical (14/14); full suite 1532 passed / 51 skipped; ruff + mypy clean. Co-Authored-By: Claude Opus 4.8 --- CHANGELOG.md | 12 ++++++++---- CLAUDE.md | 4 ++-- .../render/{relational.py => relational_io.py} | 0 leadforge/schemes/lead_scoring/__init__.py | 2 +- tests/render/test_write_relational_tables.py | 2 +- tests/schemes/test_module_layout.py | 15 ++++++++++----- 6 files changed, 22 insertions(+), 13 deletions(-) rename leadforge/render/{relational.py => relational_io.py} (100%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b9feb0..a8b6ce1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,10 +25,14 @@ back-compat shims, by design): | `leadforge.render.relational_snapshot_safe` | `leadforge.schemes.lead_scoring.render.relational_snapshot_safe` | | `leadforge.render.tasks` | `leadforge.schemes.lead_scoring.render.tasks` | | `leadforge.render.relational:to_dataframes` | `leadforge.schemes.lead_scoring.render.relational:to_dataframes` | - -`leadforge.render` remains the shared bundle-output envelope: -`leadforge.render.relational.write_relational_tables` (the scheme-agnostic table -writer) and `leadforge.render.manifests` stay put. The lead-scoring `schema` +| `leadforge.render.relational:write_relational_tables` | `leadforge.render.relational_io:write_relational_tables` | + +The flat `leadforge.render.relational` module is **removed**: its 9-table +assembler (`to_dataframes`) moved to the scheme, and the scheme-agnostic writer +(`write_relational_tables`) moved to the new `leadforge.render.relational_io` +(renamed to avoid a basename clash with the scheme's `relational.py`). +`leadforge.render` remains the shared bundle-output envelope +(`relational_io` + `manifests`). The lead-scoring `schema` specs relocate in a follow-up PR (LTV-Pg). Consumers importing internals (e.g. the `leadforge-datasets-private` build scripts) must update to the new paths; the package stays on the `1.x` line (the public contract did not change). diff --git a/CLAUDE.md b/CLAUDE.md index 34e6ef5..973d116 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -162,7 +162,7 @@ leadforge/ write_bundle) + simulation/, mechanisms/, structure/, render/ (moved in LTV-Pf.1/Pf.2). Lead-scoring schema specs migrate here in LTV-Pg. See docs/ltv/design.md §2.5. - render/ relational.py (write_relational_tables — shared writer), manifests.py + render/ relational_io.py (write_relational_tables — shared writer), manifests.py # shared bundle-output envelope exposure/ modes.py, filters.py, redaction.py validation/ invariants.py, artifact_checks.py, realism.py, difficulty.py, drift.py @@ -254,7 +254,7 @@ leadforge/ # Python package root │ # NOTE (LTV-M2 reorg in progress): lead-scoring schema specs split in LTV-Pg. │ # See docs/ltv/design.md §2.5 for the target layout. ├── render/ # Shared bundle-output envelope -│ ├── relational.py # write_relational_tables() — shared table writer +│ ├── relational_io.py # write_relational_tables() — shared table writer │ └── manifests.py # build_manifest(), write_manifest() ├── exposure/ # Truth filtering │ ├── modes.py # apply_exposure() dispatch diff --git a/leadforge/render/relational.py b/leadforge/render/relational_io.py similarity index 100% rename from leadforge/render/relational.py rename to leadforge/render/relational_io.py diff --git a/leadforge/schemes/lead_scoring/__init__.py b/leadforge/schemes/lead_scoring/__init__.py index c127da0..e8a7dec 100644 --- a/leadforge/schemes/lead_scoring/__init__.py +++ b/leadforge/schemes/lead_scoring/__init__.py @@ -162,7 +162,7 @@ def write_bundle( from leadforge.exposure.modes import apply_exposure from leadforge.narrative.dataset_card import render_dataset_card from leadforge.render.manifests import build_manifest, write_manifest - from leadforge.render.relational import write_relational_tables + from leadforge.render.relational_io import write_relational_tables from leadforge.schema.dictionaries import write_feature_dictionary from leadforge.schema.features import LEAD_SNAPSHOT_FEATURES, redacted_columns_for from leadforge.schema.tasks import task_manifest_for_config diff --git a/tests/render/test_write_relational_tables.py b/tests/render/test_write_relational_tables.py index 3e48a93..d67f750 100644 --- a/tests/render/test_write_relational_tables.py +++ b/tests/render/test_write_relational_tables.py @@ -4,7 +4,7 @@ import pandas as pd -from leadforge.render.relational import write_relational_tables +from leadforge.render.relational_io import write_relational_tables from leadforge.schema.tables import read_parquet diff --git a/tests/schemes/test_module_layout.py b/tests/schemes/test_module_layout.py index 98984c5..3ba22d4 100644 --- a/tests/schemes/test_module_layout.py +++ b/tests/schemes/test_module_layout.py @@ -50,19 +50,24 @@ def test_old_top_level_package_is_gone(pkg: str) -> None: def test_render_envelope_package_stays() -> None: # LTV-Pf.2 moved the lead-scoring render modules, but `leadforge.render` - # remains the shared envelope (manifests + the relational-table writer). + # remains the shared envelope: manifests + the relational-table writer + # (renamed to relational_io to avoid a basename clash with the scheme's + # relational.py assembler). import leadforge.render.manifests # noqa: F401 - import leadforge.render.relational as shared_relational + import leadforge.render.relational_io as shared_writer - assert hasattr(shared_relational, "write_relational_tables") + assert hasattr(shared_writer, "write_relational_tables") def test_relational_split_to_dataframes_moved_to_scheme() -> None: # The 9-table assembler moved to the scheme; the shared writer did not. - import leadforge.render.relational as shared_relational + import leadforge.render.relational_io as shared_writer from leadforge.schemes.lead_scoring.render.relational import to_dataframes # noqa: F401 - assert not hasattr(shared_relational, "to_dataframes") + assert not hasattr(shared_writer, "to_dataframes") + # The ambiguous flat `leadforge.render.relational` module is gone. + with pytest.raises(ModuleNotFoundError): + importlib.import_module("leadforge.render.relational") def test_public_api_unchanged_by_the_move() -> None: