leadforge-dev · shaypal5 · Jun 10, 2026 · Jun 10, 2026 · Jun 10, 2026
diff --git a/.agent-plan.md b/.agent-plan.md
@@ -32,8 +32,9 @@ churn kept as a secondary task; weekly simulation steps; independent generation
 bundle schema version 5 → 6. (Framing follows Google `lifetime_value`/ZILN and
 Voyantis pLTV; the earlier churn-classification framing was corrected.)
 
-Status: `LTV-M0` (planning) — design + roadmap landed (#102) and reframed to
-pLTV regression. `LTV-M1` (schema foundation) is next.
+Status: `LTV-M0` planning landed (#102) + reframed to pLTV regression (#103).
+`LTV-M1`: `LTV-Pb` (lifecycle entity rows + registries) opened as **#104**
+(awaiting review). `LTV-Pc` (pLTV feature spec + regression task specs) next.
 
 ---
 

diff --git a/docs/ltv/design.md b/docs/ltv/design.md
@@ -192,18 +192,34 @@ hard case: only a few weeks of health signal exist at the cutoff.
 | `amount_usd` | Int64 | the unit of pLTV value (§3) |
 | `payment_status` | string | `paid` / `failed` / `recovered` / `written_off` |
 
-### 4.2 Extended existing entity rows
+### 4.2 Richer customer / subscription rows (lifecycle-only)
 
 The current `CustomerRow` (4 fields) and `SubscriptionRow` (5 fields,
-`subscription_status` hardcoded `"active"`) are shells. The lifecycle recipe
-fills them out with **nullable** fields so the procurement recipe's output is
-unchanged.
-
-`CustomerRow` gains: `initial_mrr`, `initial_plan`, `contract_term_months`,
-`csm_rep_id`.
-
-`SubscriptionRow` gains: `current_mrr`, `subscription_end_at`, `churn_at`,
-`churn_reason`, `renewal_count`, `expansion_count`.
+`subscription_status` hardcoded `"active"`) are thin shells that only record
+conversion in the procurement world. The lifecycle bundle needs much richer
+versions.
+
+**Implementation note (decided in LTV-Pb):** these are added as *dedicated*
+classes — `CustomerLifecycleRow` and `SubscriptionLifecycleRow` (both reusing
+the logical table names `customers` / `subscriptions`) — kept in a separate
+`LIFECYCLE_ROW_TYPES` registry, **not** by extending the existing classes in
+place. The reason: `EntityRow.to_dict()` emits *every* dataclass field, so
+adding fields to `CustomerRow`/`SubscriptionRow` would silently change the
+lead-scoring instructor bundle's parquet schema. Dedicated classes keep the
+lead-scoring catalog (`ALL_ROW_TYPES`, `TABLE_NAMES`, `ALL_CONSTRAINTS`) and
+its output byte-for-byte unchanged. The two shapes never co-occur in one
+bundle.
+
+`CustomerLifecycleRow` carries: `customer_id`, `account_id`,
+`customer_start_at`, `initial_plan`, `initial_mrr`, `contract_term_months`,
+`csm_rep_id`, and a nullable `opportunity_id` (always `None` under independent
+generation; reserved for future chaining).
+
+`SubscriptionLifecycleRow` carries: `subscription_id`, `customer_id`,
+`plan_name`, `subscription_status`, `subscription_start_at`, `current_mrr`,
+`contract_term_months`, `renewal_count`, `expansion_count`, and the
+public-redacted terminal fields `subscription_end_at`, `churn_at`,
+`churn_reason`.
 
 ### 4.3 Public lifecycle table inventory
 
@@ -402,7 +418,7 @@ bands are fit on the regression metrics, not AUC.
 
 | file | change |
 |------|--------|
-| `leadforge/schema/entities.py` | add 3 rows; extend `CustomerRow`/`SubscriptionRow` |
+| `leadforge/schema/entities.py` | add 5 lifecycle rows + `LIFECYCLE_ROW_TYPES` registry (3 event tables + dedicated `CustomerLifecycleRow`/`SubscriptionLifecycleRow`); lead-scoring catalog untouched |
 | `leadforge/schema/features.py` | add `CUSTOMER_SNAPSHOT_FEATURES` (3 regression targets + secondary churn) |
 | `leadforge/schema/tasks.py` | add `LTV_REVENUE_{90,365,730}D` regression task specs + `CHURN_WITHIN_180D` |
 | `leadforge/schema/relationships.py` | FK constraints for new tables |

diff --git a/docs/ltv/roadmap.md b/docs/ltv/roadmap.md
@@ -29,7 +29,7 @@ it. Default labels per PR: a `type:` label, relevant `layer:` labels, and
 | Milestone | Capability | PRs | GitHub PRs |
 |-----------|------------|-----|------------|
 | `LTV-M0` | Planning + design lock | `LTV-Pa` | #102 (+ pLTV reframe) |
-| `LTV-M1` | Schema foundation | `LTV-Pb`, `LTV-Pc` | |
+| `LTV-M1` | Schema foundation | `LTV-Pb`, `LTV-Pc` | #104 (Pb) |
 | `LTV-M2` | Customer population + lifecycle world | `LTV-Pd`, `LTV-Pe` | |
 | `LTV-M3` | Lifecycle simulation engine | `LTV-Pf`, `LTV-Pg` | |
 | `LTV-M4` | Customer snapshots + pLTV targets (both regimes) | `LTV-Ph`, `LTV-Pi` | |
@@ -55,7 +55,7 @@ Total: ~15 PRs across 8 milestones (LTV-M0 = planning).
 
 ## `LTV-M1` — Schema foundation
 
-- [ ] **`LTV-Pb`** — `feat(schema): lifecycle entity rows`. Add
+- [x] **`LTV-Pb`** — `feat(schema): lifecycle entity rows` (**PR #104**). Add
   `SubscriptionEventRow`, `HealthSignalRow`, `InvoiceRow` to `entities.py`;
   extend `CustomerRow` / `SubscriptionRow` with nullable lifecycle fields
   (lead-scoring output unchanged). Register in `ALL_ROW_TYPES`. Add FK

diff --git a/leadforge/core/ids.py b/leadforge/core/ids.py
@@ -23,6 +23,18 @@
 The ``rep_`` prefix is an internal-only namespace used for sales-rep entities
 that participate in simulation mechanics but do **not** have a corresponding
 standalone relational table in the v1 output bundle.
+
+Lifecycle prefixes
+------------------
+The following prefixes are used by the lifecycle (``b2b_saas_ltv_v1``) bundle
+for entity types that exist only in the post-conversion world:
+
+subev_  — SubscriptionEvent
+hsig_   — HealthSignal
+inv_    — Invoice
+
+Customers and subscriptions in the lifecycle bundle reuse the existing
+``cust_`` / ``sub_`` prefixes.
 """
 
 from __future__ import annotations
@@ -40,6 +52,10 @@
     "customer": "cust",
     "subscription": "sub",
     "rep": "rep",
+    # Lifecycle (b2b_saas_ltv_v1) entity types.
+    "subscription_event": "subev",
+    "health_signal": "hsig",
+    "invoice": "inv",
 }
 
 _PAD_WIDTH = 6  # e.g. acct_000001

diff --git a/leadforge/schema/entities.py b/leadforge/schema/entities.py
@@ -385,6 +385,205 @@ def empty_dataframe(cls) -> pd.DataFrame:
         return _empty_df(cls.DTYPE_MAP)
 
 
+# ===========================================================================
+# Lifecycle entity rows (b2b_saas_ltv_v1 — see docs/ltv/design.md)
+# ---------------------------------------------------------------------------
+# These rows belong to the *lifecycle* bundle shape only.  They are kept in a
+# separate registry (:data:`LIFECYCLE_ROW_TYPES`) and are NOT added to
+# :data:`ALL_ROW_TYPES`, so the lead-scoring bundle's table inventory and
+# column schemas are completely unchanged.
+#
+# The lifecycle bundle's ``customers`` and ``subscriptions`` tables are richer
+# than the thin lead-scoring :class:`CustomerRow` / :class:`SubscriptionRow`
+# (which exist only to record conversion in the procurement world).  Rather
+# than extend those classes in place — which would change the lead-scoring
+# instructor bundle's parquet schema, since ``to_dict()`` emits every field —
+# the lifecycle bundle uses the dedicated :class:`CustomerLifecycleRow` /
+# :class:`SubscriptionLifecycleRow` classes below.  Both deliberately reuse the
+# logical table names ``customers`` / ``subscriptions``; the two shapes never
+# co-occur in one bundle, and the registries that hold them are disjoint.
+# ===========================================================================
+
+
+@dataclass
+class CustomerLifecycleRow:
+    """One row in the lifecycle ``customers`` table.
+
+    Static, set-at-acquisition attributes of a customer.  ``opportunity_id`` is
+    nullable because the lifecycle recipe generates customers **independently**
+    (no upstream opportunities table); it is reserved for future chained
+    generation from a lead-scoring bundle's converted leads.
+    """
+
+    TABLE_NAME: ClassVar[str] = "customers"
+    # Column order matches the dataclass field order below; ``opportunity_id``
+    # carries a default (nullable) so it must come last in both.
+    DTYPE_MAP: ClassVar[dict[str, str]] = {
+        "customer_id": "string",
+        "account_id": "string",
+        "customer_start_at": "string",
+        "initial_plan": "string",
+        "initial_mrr": "Int64",
+        "contract_term_months": "Int64",
+        "csm_rep_id": "string",
+        "opportunity_id": "string",
+    }
+
+    customer_id: str
+    account_id: str
+    customer_start_at: str
+    initial_plan: str
+    initial_mrr: int
+    contract_term_months: int
+    csm_rep_id: str
+    opportunity_id: str | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        return {f.name: getattr(self, f.name) for f in fields(self)}
+
+    @classmethod
+    def empty_dataframe(cls) -> pd.DataFrame:
+        return _empty_df(cls.DTYPE_MAP)
+
+
+@dataclass
+class SubscriptionLifecycleRow:
+    """One row in the lifecycle ``subscriptions`` table.
+
+    Carries the subscription's terminal/dynamic state as of the end of the
+    simulation.  Terminal fields (``subscription_end_at``, ``churn_at``,
+    ``churn_reason``) are redacted from ``student_public`` bundles per the
+    lifecycle snapshot-safety contract (see ``docs/ltv/design.md`` §5).
+    """
+
+    TABLE_NAME: ClassVar[str] = "subscriptions"
+    DTYPE_MAP: ClassVar[dict[str, str]] = {
+        "subscription_id": "string",
+        "customer_id": "string",
+        "plan_name": "string",
+        "subscription_status": "string",
+        "subscription_start_at": "string",
+        "current_mrr": "Int64",
+        "contract_term_months": "Int64",
+        "renewal_count": "Int64",
+        "expansion_count": "Int64",
+        "subscription_end_at": "string",
+        "churn_at": "string",
+        "churn_reason": "string",
+    }
+
+    subscription_id: str
+    customer_id: str
+    plan_name: str
+    subscription_status: str
+    subscription_start_at: str
+    current_mrr: int
+    contract_term_months: int
+    renewal_count: int
+    expansion_count: int
+    subscription_end_at: str | None = None
+    churn_at: str | None = None
+    churn_reason: str | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        return {f.name: getattr(self, f.name) for f in fields(self)}
+
+    @classmethod
+    def empty_dataframe(cls) -> pd.DataFrame:
+        return _empty_df(cls.DTYPE_MAP)
+
+
+@dataclass
+class SubscriptionEventRow:
+    """One row in the ``subscription_events`` table — a lifecycle state change."""
+
+    TABLE_NAME: ClassVar[str] = "subscription_events"
+    DTYPE_MAP: ClassVar[dict[str, str]] = {
+        "event_id": "string",
+        "subscription_id": "string",
+        "customer_id": "string",
+        "event_timestamp": "string",
+        "event_type": "string",
+        "mrr_before": "Int64",
+        "mrr_after": "Int64",
+        "contract_term_months_new": "Int64",
+    }
+
+    event_id: str
+    subscription_id: str
+    customer_id: str
+    event_timestamp: str
+    event_type: str
+    mrr_before: int
+    mrr_after: int
+    contract_term_months_new: int | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        return {f.name: getattr(self, f.name) for f in fields(self)}
+
+    @classmethod
+    def empty_dataframe(cls) -> pd.DataFrame:
+        return _empty_df(cls.DTYPE_MAP)
+
+
+@dataclass
+class HealthSignalRow:
+    """One row in the ``health_signals`` table — weekly product-usage telemetry."""
+
+    TABLE_NAME: ClassVar[str] = "health_signals"
+    DTYPE_MAP: ClassVar[dict[str, str]] = {
+        "signal_id": "string",
+        "customer_id": "string",
+        "period_start": "string",
+        "active_users": "Int64",
+        "feature_depth_score": "Float64",
+        "support_tickets": "Int64",
+        "nps_score": "Int64",
+    }
+
+    signal_id: str
+    customer_id: str
+    period_start: str
+    active_users: int
+    feature_depth_score: float
+    support_tickets: int
+    nps_score: int | None = None
+
+    def to_dict(self) -> dict[str, Any]:
+        return {f.name: getattr(self, f.name) for f in fields(self)}
+
+    @classmethod
+    def empty_dataframe(cls) -> pd.DataFrame:
+        return _empty_df(cls.DTYPE_MAP)
+
+
+@dataclass
+class InvoiceRow:
+    """One row in the ``invoices`` table — monthly billing; the unit of pLTV value."""
+
+    TABLE_NAME: ClassVar[str] = "invoices"
+    DTYPE_MAP: ClassVar[dict[str, str]] = {
+        "invoice_id": "string",
+        "customer_id": "string",
+        "invoice_date": "string",
+        "amount_usd": "Int64",
+        "payment_status": "string",
+    }
+
+    invoice_id: str
+    customer_id: str
+    invoice_date: str
+    amount_usd: int
+    payment_status: str
+
+    def to_dict(self) -> dict[str, Any]:
+        return {f.name: getattr(self, f.name) for f in fields(self)}
+
+    @classmethod
+    def empty_dataframe(cls) -> pd.DataFrame:
+        return _empty_df(cls.DTYPE_MAP)
+
+
 # ---------------------------------------------------------------------------
 # Registry
 # ---------------------------------------------------------------------------
@@ -402,3 +601,17 @@ def empty_dataframe(cls) -> pd.DataFrame:
 )
 
 TABLE_NAMES: tuple[str, ...] = tuple(cls.TABLE_NAME for cls in ALL_ROW_TYPES)
+
+# Lifecycle (b2b_saas_ltv_v1) bundle table inventory.  Kept separate from
+# ALL_ROW_TYPES so the lead-scoring bundle is unaffected.  AccountRow is shared
+# (reused unchanged); customers/subscriptions use the richer lifecycle classes.
+LIFECYCLE_ROW_TYPES: tuple[type[EntityRowProtocol], ...] = (
+    AccountRow,
+    CustomerLifecycleRow,
+    SubscriptionLifecycleRow,
+    SubscriptionEventRow,
+    HealthSignalRow,
+    InvoiceRow,
+)
+
+LIFECYCLE_TABLE_NAMES: tuple[str, ...] = tuple(cls.TABLE_NAME for cls in LIFECYCLE_ROW_TYPES)
diff --git a/leadforge/schema/relationships.py b/leadforge/schema/relationships.py
@@ -41,6 +41,22 @@ class FKConstraint:
 )
 
 
+# Lifecycle (b2b_saas_ltv_v1) FK constraints — see docs/ltv/design.md.
+# Kept separate from ALL_CONSTRAINTS so the lead-scoring model is unchanged.
+# The lifecycle ``customers`` table links only to ``accounts`` (independent
+# generation, no ``opportunities`` table), so there is no customer→opportunity
+# FK here despite the nullable ``opportunity_id`` column being reserved for
+# future chained generation.
+LIFECYCLE_CONSTRAINTS: tuple[FKConstraint, ...] = (
+    FKConstraint("customers", "account_id", "accounts", "account_id"),
+    FKConstraint("subscriptions", "customer_id", "customers", "customer_id"),
+    FKConstraint("subscription_events", "subscription_id", "subscriptions", "subscription_id"),
+    FKConstraint("subscription_events", "customer_id", "customers", "customer_id"),
+    FKConstraint("health_signals", "customer_id", "customers", "customer_id"),
+    FKConstraint("invoices", "customer_id", "customers", "customer_id"),
+)
+
+
 def validate_fk(
     child_values: list[str],
     parent_values: set[str],

diff --git a/tests/schema/test_ids.py b/tests/schema/test_ids.py
@@ -53,6 +53,9 @@ def test_id_prefixes_covers_all_entities() -> None:
         "customer",
         "subscription",
         "rep",
+        "subscription_event",
+        "health_signal",
+        "invoice",
     }
     assert set(ID_PREFIXES.keys()) == expected