From 90a6875d4135e535cb8e53b2878910c4aed3b4da Mon Sep 17 00:00:00 2001
From: Shay Palachy <shaypal5@users.noreply.github.com>
Date: Mon, 27 Apr 2026 16:03:57 +0300
Subject: [PATCH 1/2] =?UTF-8?q?feat:=20Milestone=205=20=E2=80=94=20populat?=
 =?UTF-8?q?ion=20generation=20and=20latent=20state=20initialisation?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements build_population() in leadforge/simulation/population.py:
- AccountRow generation: industry, region, employee/revenue/maturity bands,
  account created_at spread 30-730 days before world base date
- ContactRow generation: persona-driven title/role/buyer_role, conditional
  account FK, contact created_at anchored to parent account
- LeadRow generation: GTM-weighted lead_source, rep assignment from internal
  pool, lead_created_at within 30-day base window; initial stage = mql
- LatentState: 8 hidden traits across 3 entity types, all in [0,1], sampled
  from clipped Gaussians with motif-family-aware mean biases
- FK invariant: lead.account_id always equals contact.account_id
- All randomness via RNGRoot named substreams — fully deterministic

26 tests: counts, determinism, FK integrity, latent range/completeness,
motif bias properties (fit_dominant vs buying_committee_friction), and
observable field validity.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .agent-plan.md                      |  33 ++-
 leadforge/simulation/population.py  | 388 ++++++++++++++++++++++++++++
 tests/simulation/__init__.py        |   0
 tests/simulation/test_population.py | 351 +++++++++++++++++++++++++
 4 files changed, 761 insertions(+), 11 deletions(-)
 create mode 100644 leadforge/simulation/population.py
 create mode 100644 tests/simulation/__init__.py
 create mode 100644 tests/simulation/test_population.py

diff --git a/.agent-plan.md b/.agent-plan.md
index 0cc6073..317a687 100644
--- a/.agent-plan.md
+++ b/.agent-plan.md
@@ -6,33 +6,44 @@
 
 ## Current System State
 
-**v0.3.0 in progress — Milestone 4 complete (PR open).** Hidden world graph fully implemented:
-typed node system, DAG-validated WorldGraph, all 5 v1 motif families, stochastic rewiring, and
-graph sampler. 327 tests passing.
+**v0.3.0 in progress — Milestone 5 complete (PR open).** Population generation fully
+implemented: accounts, contacts, leads with all observable fields, full latent state
+(8 traits across 3 entity types), motif-family-aware bias, and FK integrity guaranteed.
+358 tests passing.
 
 ---
 
-## Active Task Breakdown — Milestone 5: Population Generation (v0.3.0)
+## Active Task Breakdown — Milestone 6: Mechanism Layer v1 (v0.3.0)
 
-Goal: Generate the base commercial world population before dynamic events begin.
+Goal: Implement the static and dynamic mechanisms that drive simulation behavior.
 
-- [ ] **1. Account generation** (`simulation/population.py`) — accounts with latent traits
-- [ ] **2. Contact generation** — contacts conditional on account properties
-- [ ] **3. Lead creation** — leads anchored to contacts/accounts
-- [ ] **4. Latent-state initialisation** — sample core latent traits tied to graph/motif
+- [ ] **1. Base mechanism interface** (`mechanisms/base.py`)
+- [ ] **2. Static mechanisms** — categorical, ordinal, bounded-numeric draws (`mechanisms/static.py`)
+- [ ] **3. Transition mechanisms** — lead-stage advancement logic (`mechanisms/transitions.py`)
+- [ ] **4. Score/hazard mechanisms** — latent-to-observable scoring, conversion hazard (`mechanisms/scores.py`, `mechanisms/hazards.py`)
+- [ ] **5. Measurement mechanisms** — noisy proxy observation of latent traits (`mechanisms/measurement.py`)
 
 ---
 
 ## Context Pointers
 
-- Milestone 5 scope: `docs/leadforge_implementation_plan.md` §8 "Milestone 5"
-- Structure spec: `docs/leadforge_architecture_spec.md` §11
+- Milestone 6 scope: `docs/leadforge_implementation_plan.md` §9 "Milestone 6"
+- Mechanism types: `docs/leadforge_architecture_spec.md` §10 "Mechanism layer"
 - Latent variables: `docs/leadforge_architecture_spec.md` §9
 
 ---
 
 ## Completed Phases
 
+### Milestone 5 — Population Generation ✓ (v0.3.0 in PR)
+- `leadforge/simulation/population.py`: `build_population()` — accounts (3 latent traits),
+  contacts (4 latent traits, conditional on account), leads (1 latent trait, FK-consistent),
+  `LatentState` and `PopulationResult` output types
+- Motif-family latent biases (`_MOTIF_LATENT_BIAS`) linking world structure to population
+- `tests/simulation/test_population.py`: 26 tests covering counts, determinism, FK integrity,
+  latent value ranges, trait completeness, motif bias properties, and observable field validity
+- Total: 358 tests passing
+
 ### Milestone 4 — World Structure Layer ✓ (v0.3.0 in PR)
 - `leadforge/structure/node_types.py`: `NodeType` enum (9 categories); `ROOT_ELIGIBLE`,
   `REQUIRES_PARENT`, `LEAF_ONLY` constraint sets
diff --git a/leadforge/simulation/population.py b/leadforge/simulation/population.py
new file mode 100644
index 0000000..3b19d18
--- /dev/null
+++ b/leadforge/simulation/population.py
@@ -0,0 +1,388 @@
+"""Population generation — accounts, contacts, leads, and latent states.
+
+:func:`build_population` is the single entry point consumed by the
+simulation layer.  All randomness derives from named :class:`~leadforge.core.rng.RNGRoot`
+substreams so the full population is deterministic given ``config.seed``.
+
+Latent state
+------------
+Each entity carries hidden ground-truth traits that drive simulation
+mechanics but are **never** directly exposed in ``student_public`` mode:
+
+- **account** — ``latent_account_fit``, ``latent_budget_readiness``,
+  ``latent_process_maturity``
+- **contact** — ``latent_problem_awareness``, ``latent_contact_authority``,
+  ``latent_responsiveness``, ``latent_engagement_propensity``
+- **lead** — ``latent_sales_friction``
+
+All values are floats in [0, 1] sampled from a clipped Gaussian.  The
+active motif family shifts the mean of selected traits to create a
+structurally coherent world (e.g. ``fit_dominant`` raises the mean of
+``latent_account_fit``).
+"""
+
+from __future__ import annotations
+
+import random
+from dataclasses import dataclass, field
+from datetime import date, timedelta
+from typing import TYPE_CHECKING
+
+from leadforge.core.ids import ID_PREFIXES, make_id
+from leadforge.core.models import GenerationConfig
+from leadforge.core.rng import RNGRoot
+from leadforge.schema.entities import AccountRow, ContactRow, LeadRow
+
+if TYPE_CHECKING:
+    from leadforge.narrative.spec import NarrativeSpec
+    from leadforge.structure.graph import WorldGraph
+
+
+# ---------------------------------------------------------------------------
+# Output types
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class LatentState:
+    """Hidden ground-truth latent variables for all entities in one world.
+
+    Each mapping is ``entity_id → {trait_name: float_in_[0,1]}``.
+    """
+
+    account_latents: dict[str, dict[str, float]] = field(default_factory=dict)
+    contact_latents: dict[str, dict[str, float]] = field(default_factory=dict)
+    lead_latents: dict[str, dict[str, float]] = field(default_factory=dict)
+
+
+@dataclass
+class PopulationResult:
+    """Output of one :func:`build_population` call."""
+
+    accounts: list[AccountRow]
+    contacts: list[ContactRow]
+    leads: list[LeadRow]
+    latent_state: LatentState
+
+
+# ---------------------------------------------------------------------------
+# Internal constants
+# ---------------------------------------------------------------------------
+
+_EMPLOYEE_BANDS = ("200-499", "500-999", "1000-1999", "2000+")
+_EMPLOYEE_BAND_WEIGHTS = (0.35, 0.35, 0.20, 0.10)
+
+_REVENUE_BANDS = ("$1M-$10M", "$10M-$50M", "$50M-$200M", "$200M+")
+_REVENUE_BAND_WEIGHTS = (0.25, 0.40, 0.25, 0.10)
+
+_PROCESS_MATURITY_BANDS = ("low", "medium", "high")
+_PROCESS_MATURITY_BAND_WEIGHTS = (0.30, 0.45, 0.25)
+_PROCESS_MATURITY_MEANS = {"low": 0.25, "medium": 0.50, "high": 0.75}
+
+_SENIORITY_LEVELS = ("individual_contributor", "manager", "director", "vp", "c_suite")
+_SENIORITY_WEIGHTS = (0.25, 0.30, 0.25, 0.15, 0.05)
+
+_EMAIL_DOMAIN_TYPES = ("corporate", "personal", "unknown")
+_EMAIL_DOMAIN_WEIGHTS = (0.80, 0.12, 0.08)
+
+# Base reference date: all leads are created within a 30-day window starting here.
+_WORLD_BASE_DATE = date(2024, 1, 1)
+
+# Number of internal sales-rep entities used for lead assignment.
+_N_REPS = 10
+
+# Motif-family-specific additive bias on the default 0.50 latent mean.
+# Only traits explicitly listed are shifted; all others stay at 0.50.
+_MOTIF_LATENT_BIAS: dict[str, dict[str, float]] = {
+    "fit_dominant": {
+        "latent_account_fit": 0.10,
+        "latent_budget_readiness": 0.05,
+    },
+    "intent_dominant": {
+        "latent_engagement_propensity": 0.12,
+        "latent_problem_awareness": 0.10,
+    },
+    "sales_execution_sensitive": {
+        "latent_sales_friction": 0.12,
+        "latent_responsiveness": -0.08,
+    },
+    "demo_trial_mediated": {
+        "latent_engagement_propensity": 0.08,
+        "latent_problem_awareness": 0.06,
+    },
+    "buying_committee_friction": {
+        "latent_contact_authority": -0.10,
+        "latent_sales_friction": 0.15,
+    },
+}
+
+# Map GTM channel names → GtmMotionSpec attribute names.
+_CHANNEL_TO_SHARE_ATTR: dict[str, str] = {
+    "inbound_marketing": "inbound_share",
+    "sdr_outbound": "outbound_share",
+    "partner_referral": "partner_share",
+}
+
+
+# ---------------------------------------------------------------------------
+# Public entry point
+# ---------------------------------------------------------------------------
+
+
+def build_population(
+    config: GenerationConfig,
+    narrative: NarrativeSpec,
+    world_graph: WorldGraph,
+) -> PopulationResult:
+    """Generate accounts, contacts, leads, and their latent states.
+
+    All randomness is derived from named substreams of ``RNGRoot(config.seed)``
+    so the result is fully deterministic for a given ``(config, world_graph)``.
+
+    Args:
+        config: Fully resolved generation configuration (counts, seed, etc.).
+        narrative: Parsed narrative spec providing ICP industries, geographies,
+            personas, and GTM channel mix.
+        world_graph: The sampled hidden world graph; its ``motif_family`` is used
+            to apply latent-trait mean biases that make the world structurally
+            coherent.
+
+    Returns:
+        A :class:`PopulationResult` containing the three entity lists and the
+        full :class:`LatentState`.
+    """
+    root = RNGRoot(config.seed)
+    bias = _MOTIF_LATENT_BIAS.get(world_graph.motif_family, {})
+
+    accounts, acct_latents = _generate_accounts(
+        n=config.n_accounts,
+        narrative=narrative,
+        bias=bias,
+        rng=root.child("population_accounts"),
+    )
+
+    contacts, cont_latents = _generate_contacts(
+        n=config.n_contacts,
+        accounts=accounts,
+        narrative=narrative,
+        bias=bias,
+        rng=root.child("population_contacts"),
+    )
+
+    leads, lead_latents = _generate_leads(
+        n=config.n_leads,
+        contacts=contacts,
+        narrative=narrative,
+        bias=bias,
+        rng=root.child("population_leads"),
+    )
+
+    return PopulationResult(
+        accounts=accounts,
+        contacts=contacts,
+        leads=leads,
+        latent_state=LatentState(
+            account_latents=acct_latents,
+            contact_latents=cont_latents,
+            lead_latents=lead_latents,
+        ),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Account generation
+# ---------------------------------------------------------------------------
+
+
+def _generate_accounts(
+    n: int,
+    narrative: NarrativeSpec,
+    bias: dict[str, float],
+    rng: random.Random,
+) -> tuple[list[AccountRow], dict[str, dict[str, float]]]:
+    industries = list(narrative.market.icp_industries)
+    geographies = list(narrative.market.geographies)
+
+    rows: list[AccountRow] = []
+    latents: dict[str, dict[str, float]] = {}
+
+    for i in range(1, n + 1):
+        acct_id = make_id(ID_PREFIXES["account"], i)
+
+        industry = rng.choice(industries)
+        region = rng.choice(geographies)
+        employee_band = rng.choices(_EMPLOYEE_BANDS, weights=_EMPLOYEE_BAND_WEIGHTS, k=1)[0]
+        revenue_band = rng.choices(_REVENUE_BANDS, weights=_REVENUE_BAND_WEIGHTS, k=1)[0]
+        maturity_band = rng.choices(
+            _PROCESS_MATURITY_BANDS, weights=_PROCESS_MATURITY_BAND_WEIGHTS, k=1
+        )[0]
+
+        days_before = rng.randint(30, 730)
+        created_at = (_WORLD_BASE_DATE - timedelta(days=days_before)).isoformat()
+
+        rows.append(
+            AccountRow(
+                account_id=acct_id,
+                company_name=f"Company {acct_id}",
+                industry=industry,
+                region=region,
+                employee_band=employee_band,
+                estimated_revenue_band=revenue_band,
+                process_maturity_band=maturity_band,
+                created_at=created_at,
+            )
+        )
+        latents[acct_id] = {
+            "latent_account_fit": _sample_latent(rng, 0.50 + bias.get("latent_account_fit", 0.0)),
+            "latent_budget_readiness": _sample_latent(
+                rng, 0.50 + bias.get("latent_budget_readiness", 0.0)
+            ),
+            # Correlated with observable band; not directly biased by motif.
+            "latent_process_maturity": _sample_latent(
+                rng, _PROCESS_MATURITY_MEANS[maturity_band], std=0.15
+            ),
+        }
+
+    return rows, latents
+
+
+# ---------------------------------------------------------------------------
+# Contact generation
+# ---------------------------------------------------------------------------
+
+
+def _generate_contacts(
+    n: int,
+    accounts: list[AccountRow],
+    narrative: NarrativeSpec,
+    bias: dict[str, float],
+    rng: random.Random,
+) -> tuple[list[ContactRow], dict[str, dict[str, float]]]:
+    personas = list(narrative.personas)
+
+    rows: list[ContactRow] = []
+    latents: dict[str, dict[str, float]] = {}
+
+    for i in range(1, n + 1):
+        cnt_id = make_id(ID_PREFIXES["contact"], i)
+        account = rng.choice(accounts)
+
+        persona = rng.choice(personas)
+        job_title = rng.choice(list(persona.title_variants))
+        role_function = persona.role
+        buyer_role = persona.decision_authority
+        seniority = rng.choices(_SENIORITY_LEVELS, weights=_SENIORITY_WEIGHTS, k=1)[0]
+        email_domain = rng.choices(_EMAIL_DOMAIN_TYPES, weights=_EMAIL_DOMAIN_WEIGHTS, k=1)[0]
+
+        # Contacts are created at or shortly after their account.
+        acct_date = date.fromisoformat(account.created_at)
+        days_after = rng.randint(0, 30)
+        created_at = (acct_date + timedelta(days=days_after)).isoformat()
+
+        rows.append(
+            ContactRow(
+                contact_id=cnt_id,
+                account_id=account.account_id,
+                job_title=job_title,
+                role_function=role_function,
+                seniority=seniority,
+                buyer_role=buyer_role,
+                email_domain_type=email_domain,
+                created_at=created_at,
+            )
+        )
+        latents[cnt_id] = {
+            "latent_problem_awareness": _sample_latent(
+                rng, 0.50 + bias.get("latent_problem_awareness", 0.0)
+            ),
+            "latent_contact_authority": _sample_latent(
+                rng, 0.50 + bias.get("latent_contact_authority", 0.0)
+            ),
+            "latent_responsiveness": _sample_latent(
+                rng, 0.50 + bias.get("latent_responsiveness", 0.0)
+            ),
+            "latent_engagement_propensity": _sample_latent(
+                rng, 0.50 + bias.get("latent_engagement_propensity", 0.0)
+            ),
+        }
+
+    return rows, latents
+
+
+# ---------------------------------------------------------------------------
+# Lead generation
+# ---------------------------------------------------------------------------
+
+
+def _generate_leads(
+    n: int,
+    contacts: list[ContactRow],
+    narrative: NarrativeSpec,
+    bias: dict[str, float],
+    rng: random.Random,
+) -> tuple[list[LeadRow], dict[str, dict[str, float]]]:
+    channels, channel_weights = _channel_weights(narrative)
+    rep_ids = [make_id(ID_PREFIXES["rep"], i) for i in range(1, _N_REPS + 1)]
+
+    rows: list[LeadRow] = []
+    latents: dict[str, dict[str, float]] = {}
+
+    for i in range(1, n + 1):
+        lead_id = make_id(ID_PREFIXES["lead"], i)
+        contact = rng.choice(contacts)
+
+        lead_source = rng.choices(channels, weights=channel_weights, k=1)[0]
+        days_offset = rng.randint(0, 29)
+        lead_created_at = (_WORLD_BASE_DATE + timedelta(days=days_offset)).isoformat()
+        owner_rep_id = rng.choice(rep_ids)
+
+        rows.append(
+            LeadRow(
+                lead_id=lead_id,
+                contact_id=contact.contact_id,
+                account_id=contact.account_id,
+                lead_created_at=lead_created_at,
+                lead_source=lead_source,
+                first_touch_channel=lead_source,
+                current_stage="mql",
+                owner_rep_id=owner_rep_id,
+                is_mql=True,
+                is_sql=False,
+                converted_within_90_days=False,
+                conversion_timestamp=None,
+            )
+        )
+        latents[lead_id] = {
+            "latent_sales_friction": _sample_latent(
+                rng, 0.50 + bias.get("latent_sales_friction", 0.0)
+            ),
+        }
+
+    return rows, latents
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _sample_latent(rng: random.Random, mean: float = 0.50, std: float = 0.20) -> float:
+    """Draw a latent trait value in [0, 1] from a clipped Gaussian."""
+    mean = max(0.10, min(0.90, mean))
+    return max(0.0, min(1.0, rng.gauss(mean, std)))
+
+
+def _channel_weights(narrative: NarrativeSpec) -> tuple[list[str], list[float]]:
+    """Return (channels, weights) lists ordered as in the GTM spec."""
+    gtm = narrative.gtm_motion
+    channels: list[str] = []
+    weights: list[float] = []
+    for ch in gtm.channels:
+        attr = _CHANNEL_TO_SHARE_ATTR.get(ch)
+        channels.append(ch)
+        weights.append(float(getattr(gtm, attr)) if attr else 1.0 / len(gtm.channels))
+    # Normalise in case shares don't sum to exactly 1.0
+    total = sum(weights)
+    if total > 0:
+        weights = [w / total for w in weights]
+    return channels, weights
diff --git a/tests/simulation/__init__.py b/tests/simulation/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/simulation/test_population.py b/tests/simulation/test_population.py
new file mode 100644
index 0000000..61308d5
--- /dev/null
+++ b/tests/simulation/test_population.py
@@ -0,0 +1,351 @@
+"""Tests for leadforge.simulation.population — build_population."""
+
+from __future__ import annotations
+
+from leadforge.api.generator import Generator
+from leadforge.core.ids import ID_PREFIXES, make_id
+from leadforge.core.models import GenerationConfig
+from leadforge.simulation.population import (
+    _N_REPS,
+    PopulationResult,
+    build_population,
+)
+from leadforge.structure.sampler import sample_hidden_graph
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+_SEED = 42
+_N_ACCOUNTS = 50
+_N_CONTACTS = 120
+_N_LEADS = 200
+
+
+def _make_result(seed: int = _SEED, motif: str | None = None) -> PopulationResult:
+    config = GenerationConfig(
+        seed=seed,
+        n_accounts=_N_ACCOUNTS,
+        n_contacts=_N_CONTACTS,
+        n_leads=_N_LEADS,
+    )
+    gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=seed)
+    narrative = gen.world_spec.narrative
+    assert narrative is not None
+    graph = sample_hidden_graph(seed=seed, motif_family_name=motif)
+    return build_population(config, narrative, graph)
+
+
+# ---------------------------------------------------------------------------
+# Counts
+# ---------------------------------------------------------------------------
+
+
+def test_population_counts() -> None:
+    result = _make_result()
+    assert len(result.accounts) == _N_ACCOUNTS
+    assert len(result.contacts) == _N_CONTACTS
+    assert len(result.leads) == _N_LEADS
+
+
+def test_latent_state_counts() -> None:
+    result = _make_result()
+    assert len(result.latent_state.account_latents) == _N_ACCOUNTS
+    assert len(result.latent_state.contact_latents) == _N_CONTACTS
+    assert len(result.latent_state.lead_latents) == _N_LEADS
+
+
+# ---------------------------------------------------------------------------
+# Determinism
+# ---------------------------------------------------------------------------
+
+
+def test_build_population_is_deterministic() -> None:
+    r1 = _make_result(seed=7)
+    r2 = _make_result(seed=7)
+    assert [a.account_id for a in r1.accounts] == [a.account_id for a in r2.accounts]
+    assert [c.contact_id for c in r1.contacts] == [c.contact_id for c in r2.contacts]
+    assert [lead.lead_id for lead in r1.leads] == [lead.lead_id for lead in r2.leads]
+    assert r1.latent_state.account_latents == r2.latent_state.account_latents
+    assert r1.latent_state.contact_latents == r2.latent_state.contact_latents
+    assert r1.latent_state.lead_latents == r2.latent_state.lead_latents
+
+
+def test_different_seeds_give_different_results() -> None:
+    r1 = _make_result(seed=1)
+    r2 = _make_result(seed=2)
+    assert r1.latent_state.account_latents != r2.latent_state.account_latents
+
+
+# ---------------------------------------------------------------------------
+# Entity IDs
+# ---------------------------------------------------------------------------
+
+
+def test_account_ids_are_sequential_and_unique() -> None:
+    result = _make_result()
+    ids = [a.account_id for a in result.accounts]
+    expected = [make_id(ID_PREFIXES["account"], i) for i in range(1, _N_ACCOUNTS + 1)]
+    assert ids == expected
+
+
+def test_contact_ids_are_sequential_and_unique() -> None:
+    result = _make_result()
+    ids = [c.contact_id for c in result.contacts]
+    expected = [make_id(ID_PREFIXES["contact"], i) for i in range(1, _N_CONTACTS + 1)]
+    assert ids == expected
+
+
+def test_lead_ids_are_sequential_and_unique() -> None:
+    result = _make_result()
+    ids = [lead.lead_id for lead in result.leads]
+    expected = [make_id(ID_PREFIXES["lead"], i) for i in range(1, _N_LEADS + 1)]
+    assert ids == expected
+
+
+# ---------------------------------------------------------------------------
+# FK integrity
+# ---------------------------------------------------------------------------
+
+
+def test_contact_account_ids_are_valid() -> None:
+    result = _make_result()
+    valid_acct_ids = {a.account_id for a in result.accounts}
+    for c in result.contacts:
+        assert c.account_id in valid_acct_ids, f"contact {c.contact_id} → unknown account"
+
+
+def test_lead_contact_ids_are_valid() -> None:
+    result = _make_result()
+    valid_cnt_ids = {c.contact_id for c in result.contacts}
+    for lead in result.leads:
+        assert lead.contact_id in valid_cnt_ids, f"lead {lead.lead_id} → unknown contact"
+
+
+def test_lead_account_ids_are_valid() -> None:
+    result = _make_result()
+    valid_acct_ids = {a.account_id for a in result.accounts}
+    for lead in result.leads:
+        assert lead.account_id in valid_acct_ids, f"lead {lead.lead_id} → unknown account"
+
+
+def test_lead_contact_account_consistency() -> None:
+    """lead.account_id must match the account_id of lead.contact_id."""
+    result = _make_result()
+    contact_to_account = {c.contact_id: c.account_id for c in result.contacts}
+    for lead in result.leads:
+        assert lead.account_id == contact_to_account[lead.contact_id]
+
+
+# ---------------------------------------------------------------------------
+# Latent value ranges and completeness
+# ---------------------------------------------------------------------------
+
+_EXPECTED_ACCOUNT_TRAITS = {
+    "latent_account_fit",
+    "latent_budget_readiness",
+    "latent_process_maturity",
+}
+_EXPECTED_CONTACT_TRAITS = {
+    "latent_problem_awareness",
+    "latent_contact_authority",
+    "latent_responsiveness",
+    "latent_engagement_propensity",
+}
+_EXPECTED_LEAD_TRAITS = {"latent_sales_friction"}
+
+
+def test_account_latent_traits_present() -> None:
+    result = _make_result()
+    for acct_id, traits in result.latent_state.account_latents.items():
+        assert traits.keys() == _EXPECTED_ACCOUNT_TRAITS, f"account {acct_id}"
+
+
+def test_contact_latent_traits_present() -> None:
+    result = _make_result()
+    for cnt_id, traits in result.latent_state.contact_latents.items():
+        assert traits.keys() == _EXPECTED_CONTACT_TRAITS, f"contact {cnt_id}"
+
+
+def test_lead_latent_traits_present() -> None:
+    result = _make_result()
+    for lead_id, traits in result.latent_state.lead_latents.items():
+        assert traits.keys() == _EXPECTED_LEAD_TRAITS, f"lead {lead_id}"
+
+
+def test_all_latent_values_in_unit_interval() -> None:
+    result = _make_result()
+    for store in (
+        result.latent_state.account_latents,
+        result.latent_state.contact_latents,
+        result.latent_state.lead_latents,
+    ):
+        for entity_id, traits in store.items():
+            for trait, val in traits.items():
+                assert 0.0 <= val <= 1.0, f"{entity_id}.{trait} = {val}"
+
+
+# ---------------------------------------------------------------------------
+# Lead observable fields
+# ---------------------------------------------------------------------------
+
+
+def test_lead_initial_stage_is_mql() -> None:
+    result = _make_result()
+    for lead in result.leads:
+        assert lead.current_stage == "mql"
+        assert lead.is_mql is True
+        assert lead.is_sql is False
+        assert lead.converted_within_90_days is False
+        assert lead.conversion_timestamp is None
+
+
+def test_lead_source_is_valid_channel() -> None:
+    from leadforge.api.generator import Generator
+
+    gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=_SEED)
+    narrative = gen.world_spec.narrative
+    assert narrative is not None
+    valid_channels = set(narrative.gtm_motion.channels)
+    result = _make_result()
+    for lead in result.leads:
+        assert lead.lead_source in valid_channels
+        assert lead.first_touch_channel == lead.lead_source
+
+
+def test_lead_owner_rep_id_is_valid() -> None:
+    result = _make_result()
+    valid_rep_ids = {make_id(ID_PREFIXES["rep"], i) for i in range(1, _N_REPS + 1)}
+    for lead in result.leads:
+        assert lead.owner_rep_id in valid_rep_ids
+
+
+def test_lead_created_at_within_base_window() -> None:
+    from datetime import date
+
+    result = _make_result()
+    base = date(2024, 1, 1)
+    end = date(2024, 1, 30)
+    for lead in result.leads:
+        d = date.fromisoformat(lead.lead_created_at)
+        assert base <= d <= end, f"lead {lead.lead_id} created_at {d} out of window"
+
+
+# ---------------------------------------------------------------------------
+# Account observable fields
+# ---------------------------------------------------------------------------
+
+
+def test_account_industry_is_valid() -> None:
+    from leadforge.api.generator import Generator
+
+    gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=_SEED)
+    narrative = gen.world_spec.narrative
+    assert narrative is not None
+    valid = set(narrative.market.icp_industries)
+    result = _make_result()
+    for a in result.accounts:
+        assert a.industry in valid
+
+
+def test_account_region_is_valid() -> None:
+    from leadforge.api.generator import Generator
+
+    gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=_SEED)
+    narrative = gen.world_spec.narrative
+    assert narrative is not None
+    valid = set(narrative.market.geographies)
+    result = _make_result()
+    for a in result.accounts:
+        assert a.region in valid
+
+
+# ---------------------------------------------------------------------------
+# Motif latent bias (property test across seeds)
+# ---------------------------------------------------------------------------
+
+
+def test_fit_dominant_raises_account_fit_mean() -> None:
+    """fit_dominant worlds should have higher mean latent_account_fit than
+    buying_committee_friction worlds across a range of seeds."""
+    fit_means = []
+    friction_means = []
+    for seed in range(15):
+        config = GenerationConfig(seed=seed, n_accounts=200, n_contacts=400, n_leads=600)
+        gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=seed)
+        narrative = gen.world_spec.narrative
+        assert narrative is not None
+
+        g_fit = sample_hidden_graph(seed=seed, motif_family_name="fit_dominant")
+        r_fit = build_population(config, narrative, g_fit)
+        fit_means.append(
+            sum(t["latent_account_fit"] for t in r_fit.latent_state.account_latents.values())
+            / config.n_accounts
+        )
+
+        g_fric = sample_hidden_graph(seed=seed, motif_family_name="buying_committee_friction")
+        r_fric = build_population(config, narrative, g_fric)
+        friction_means.append(
+            sum(t["latent_account_fit"] for t in r_fric.latent_state.account_latents.values())
+            / config.n_accounts
+        )
+
+    avg_fit = sum(fit_means) / len(fit_means)
+    avg_fric = sum(friction_means) / len(friction_means)
+    assert avg_fit > avg_fric, (
+        f"Expected fit_dominant mean ({avg_fit:.3f}) > buying_committee_friction ({avg_fric:.3f})"
+    )
+
+
+def test_buying_committee_friction_lowers_contact_authority() -> None:
+    """buying_committee_friction worlds should have lower mean latent_contact_authority."""
+    bc_means = []
+    fd_means = []
+    for seed in range(15):
+        config = GenerationConfig(seed=seed, n_accounts=100, n_contacts=300, n_leads=400)
+        gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=seed)
+        narrative = gen.world_spec.narrative
+        assert narrative is not None
+
+        g_bc = sample_hidden_graph(seed=seed, motif_family_name="buying_committee_friction")
+        r_bc = build_population(config, narrative, g_bc)
+        bc_means.append(
+            sum(t["latent_contact_authority"] for t in r_bc.latent_state.contact_latents.values())
+            / config.n_contacts
+        )
+
+        g_fd = sample_hidden_graph(seed=seed, motif_family_name="fit_dominant")
+        r_fd = build_population(config, narrative, g_fd)
+        fd_means.append(
+            sum(t["latent_contact_authority"] for t in r_fd.latent_state.contact_latents.values())
+            / config.n_contacts
+        )
+
+    avg_bc = sum(bc_means) / len(bc_means)
+    avg_fd = sum(fd_means) / len(fd_means)
+    assert avg_bc < avg_fd, (
+        f"Expected buying_committee_friction mean ({avg_bc:.3f}) < fit_dominant ({avg_fd:.3f})"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Latent state entity-ID alignment
+# ---------------------------------------------------------------------------
+
+
+def test_latent_state_account_ids_match_rows() -> None:
+    result = _make_result()
+    row_ids = {a.account_id for a in result.accounts}
+    assert set(result.latent_state.account_latents.keys()) == row_ids
+
+
+def test_latent_state_contact_ids_match_rows() -> None:
+    result = _make_result()
+    row_ids = {c.contact_id for c in result.contacts}
+    assert set(result.latent_state.contact_latents.keys()) == row_ids
+
+
+def test_latent_state_lead_ids_match_rows() -> None:
+    result = _make_result()
+    row_ids = {lead.lead_id for lead in result.leads}
+    assert set(result.latent_state.lead_latents.keys()) == row_ids

From 0559a83d15f375cdfa53f9d4abb3b38ed15e91e4 Mon Sep 17 00:00:00 2001
From: Shay Palachy <shaypal5@users.noreply.github.com>
Date: Tue, 28 Apr 2026 06:20:08 +0300
Subject: [PATCH 2/2] fix: address Copilot round-1 review on PR #10

- Docstring: correct determinism contract to include narrative and
  world_graph.motif_family (COPILOT-1)
- build_population: add _validate_narrative() up-front guard that raises
  InvalidConfigError for empty industries, geographies, personas, or
  channels (COPILOT-2)
- _channel_weights: fall back to uniform distribution when all GTM shares
  sum to zero, preventing random.choices ValueError (COPILOT-3)
- 5 new tests covering all three fixes (363 total passing)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 leadforge/simulation/population.py  | 40 ++++++++++++--
 tests/simulation/test_population.py | 83 +++++++++++++++++++++++++++++
 2 files changed, 118 insertions(+), 5 deletions(-)

diff --git a/leadforge/simulation/population.py b/leadforge/simulation/population.py
index 3b19d18..9f84e0b 100644
--- a/leadforge/simulation/population.py
+++ b/leadforge/simulation/population.py
@@ -28,6 +28,7 @@
 from datetime import date, timedelta
 from typing import TYPE_CHECKING
 
+from leadforge.core.exceptions import InvalidConfigError
 from leadforge.core.ids import ID_PREFIXES, make_id
 from leadforge.core.models import GenerationConfig
 from leadforge.core.rng import RNGRoot
@@ -137,12 +138,15 @@ def build_population(
     """Generate accounts, contacts, leads, and their latent states.
 
     All randomness is derived from named substreams of ``RNGRoot(config.seed)``
-    so the result is fully deterministic for a given ``(config, world_graph)``.
+    so the result is fully deterministic for a given
+    ``(config, narrative, world_graph.motif_family)``.
 
     Args:
         config: Fully resolved generation configuration (counts, seed, etc.).
         narrative: Parsed narrative spec providing ICP industries, geographies,
-            personas, and GTM channel mix.
+            personas, and GTM channel mix.  Must have non-empty
+            ``market.icp_industries``, ``market.geographies``, ``personas``,
+            and ``gtm_motion.channels``.
         world_graph: The sampled hidden world graph; its ``motif_family`` is used
             to apply latent-trait mean biases that make the world structurally
             coherent.
@@ -150,7 +154,11 @@ def build_population(
     Returns:
         A :class:`PopulationResult` containing the three entity lists and the
         full :class:`LatentState`.
+
+    Raises:
+        InvalidConfigError: If any required narrative collection is empty.
     """
+    _validate_narrative(narrative)
     root = RNGRoot(config.seed)
     bias = _MOTIF_LATENT_BIAS.get(world_graph.motif_family, {})
 
@@ -366,6 +374,20 @@ def _generate_leads(
 # ---------------------------------------------------------------------------
 
 
+def _validate_narrative(narrative: NarrativeSpec) -> None:
+    """Raise :exc:`InvalidConfigError` if any collection required by population
+    generation is empty."""
+    checks: list[tuple[object, str]] = [
+        (narrative.market.icp_industries, "narrative.market.icp_industries"),
+        (narrative.market.geographies, "narrative.market.geographies"),
+        (narrative.personas, "narrative.personas"),
+        (narrative.gtm_motion.channels, "narrative.gtm_motion.channels"),
+    ]
+    for collection, name in checks:
+        if not collection:
+            raise InvalidConfigError(f"{name} must not be empty")
+
+
 def _sample_latent(rng: random.Random, mean: float = 0.50, std: float = 0.20) -> float:
     """Draw a latent trait value in [0, 1] from a clipped Gaussian."""
     mean = max(0.10, min(0.90, mean))
@@ -373,16 +395,24 @@ def _sample_latent(rng: random.Random, mean: float = 0.50, std: float = 0.20) ->
 
 
 def _channel_weights(narrative: NarrativeSpec) -> tuple[list[str], list[float]]:
-    """Return (channels, weights) lists ordered as in the GTM spec."""
+    """Return (channels, weights) lists ordered as in the GTM spec.
+
+    If the per-channel share attributes sum to zero (all shares are 0),
+    falls back to a uniform distribution so ``random.choices`` never
+    receives an all-zero weight list.
+    """
     gtm = narrative.gtm_motion
     channels: list[str] = []
     weights: list[float] = []
     for ch in gtm.channels:
         attr = _CHANNEL_TO_SHARE_ATTR.get(ch)
         channels.append(ch)
-        weights.append(float(getattr(gtm, attr)) if attr else 1.0 / len(gtm.channels))
-    # Normalise in case shares don't sum to exactly 1.0
+        weights.append(float(getattr(gtm, attr)) if attr else 0.0)
     total = sum(weights)
     if total > 0:
         weights = [w / total for w in weights]
+    else:
+        # All shares are zero — fall back to uniform.
+        uniform = 1.0 / len(channels)
+        weights = [uniform] * len(channels)
     return channels, weights
diff --git a/tests/simulation/test_population.py b/tests/simulation/test_population.py
index 61308d5..c41d657 100644
--- a/tests/simulation/test_population.py
+++ b/tests/simulation/test_population.py
@@ -2,12 +2,17 @@
 
 from __future__ import annotations
 
+import pytest
+
 from leadforge.api.generator import Generator
+from leadforge.core.exceptions import InvalidConfigError
 from leadforge.core.ids import ID_PREFIXES, make_id
 from leadforge.core.models import GenerationConfig
+from leadforge.narrative.spec import NarrativeSpec
 from leadforge.simulation.population import (
     _N_REPS,
     PopulationResult,
+    _channel_weights,
     build_population,
 )
 from leadforge.structure.sampler import sample_hidden_graph
@@ -349,3 +354,81 @@ def test_latent_state_lead_ids_match_rows() -> None:
     result = _make_result()
     row_ids = {lead.lead_id for lead in result.leads}
     assert set(result.latent_state.lead_latents.keys()) == row_ids
+
+
+# ---------------------------------------------------------------------------
+# Narrative validation (COPILOT-2 / COPILOT-3)
+# ---------------------------------------------------------------------------
+
+
+def _base_narrative() -> NarrativeSpec:
+    """Return a minimal valid NarrativeSpec for mutation tests."""
+    gen = Generator.from_recipe("b2b_saas_procurement_v1", seed=0)
+    narrative = gen.world_spec.narrative
+    assert narrative is not None
+    return narrative
+
+
+def _build_with_narrative(narrative: NarrativeSpec) -> PopulationResult:
+    config = GenerationConfig(seed=0, n_accounts=10, n_contacts=20, n_leads=30)
+    graph = sample_hidden_graph(seed=0)
+    return build_population(config, narrative, graph)
+
+
+def test_empty_industries_raises() -> None:
+    import dataclasses
+
+    narrative = _base_narrative()
+    bad_market = dataclasses.replace(narrative.market, icp_industries=())
+    bad_narrative = dataclasses.replace(narrative, market=bad_market)
+    with pytest.raises(InvalidConfigError, match="icp_industries"):
+        _build_with_narrative(bad_narrative)
+
+
+def test_empty_geographies_raises() -> None:
+    import dataclasses
+
+    narrative = _base_narrative()
+    bad_market = dataclasses.replace(narrative.market, geographies=())
+    bad_narrative = dataclasses.replace(narrative, market=bad_market)
+    with pytest.raises(InvalidConfigError, match="geographies"):
+        _build_with_narrative(bad_narrative)
+
+
+def test_empty_personas_raises() -> None:
+    import dataclasses
+
+    narrative = _base_narrative()
+    bad_narrative = dataclasses.replace(narrative, personas=())
+    with pytest.raises(InvalidConfigError, match="personas"):
+        _build_with_narrative(bad_narrative)
+
+
+def test_empty_channels_raises() -> None:
+    import dataclasses
+
+    narrative = _base_narrative()
+    bad_gtm = dataclasses.replace(narrative.gtm_motion, channels=())
+    bad_narrative = dataclasses.replace(narrative, gtm_motion=bad_gtm)
+    with pytest.raises(InvalidConfigError, match="channels"):
+        _build_with_narrative(bad_narrative)
+
+
+def test_channel_weights_zero_shares_falls_back_to_uniform() -> None:
+    """If all GTM shares are 0, _channel_weights should return uniform weights."""
+    narrative = _base_narrative()
+    import dataclasses
+
+    bad_gtm = dataclasses.replace(
+        narrative.gtm_motion,
+        inbound_share=0.0,
+        outbound_share=0.0,
+        partner_share=0.0,
+    )
+    bad_narrative = dataclasses.replace(narrative, gtm_motion=bad_gtm)
+    channels, weights = _channel_weights(bad_narrative)
+    assert len(channels) == len(weights)
+    assert all(w > 0 for w in weights)
+    assert abs(sum(weights) - 1.0) < 1e-9
+    expected = 1.0 / len(channels)
+    assert all(abs(w - expected) < 1e-9 for w in weights)