Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion .agent-plan.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,15 @@ Documentation + CI:
- [x] `leadforge/render/snapshots.py` — added comment documenting the feature-label temporal mismatch when `label_window_days < horizon_days` (features aggregate over full horizon; label uses shorter window)
- [x] 7 new tests: default-90 unchanged, shorter window fewer conversions, 1-day window zero conversions, late conversions excluded (with day-offset verification), conversion_timestamp still set outside window, event counts unchanged by window, bundle round-trip integration; total 788 passing

### Fix: direct conversion bypass for pre-SQL leads (PR #45, closes #44)

- [x] `leadforge/simulation/engine.py` — added `_DIRECT_CONVERSION_STAGES` and `_DIRECT_CONVERSION_DISCOUNT` (0.01) constants; pre-SQL leads (`mql`, `sal`) now have a small daily probability of converting directly, bypassing the full funnel
- [x] `leadforge/simulation/engine.py` — fixed `is_sql` post-sim computation to use `state.sql_day is not None` instead of also checking `current_stage in _SQL_OR_DEEPER` (which incorrectly flagged direct-converted leads as SQL due to `closed_won` being in that set)
- [x] `leadforge/simulation/engine.py` — extended opportunity/customer/subscription creation to handle direct-converted non-SQL leads
- [x] 5 new tests: non-SQL leads convert, non-SQL rate much lower than SQL rate (>5x), determinism, opportunity for direct-converted leads, customer+subscription for direct-converted leads
- [x] 2 existing tests updated: `test_most_converted_leads_are_sql` (was `test_converted_leads_also_sql`), `test_non_sql_non_converted_leads_no_opportunity` (was `test_non_sql_leads_no_opportunity`)
- [x] All 793 tests pass; lint + format clean

### Generalize dataset card prose (PR #42, closes #38)

- [x] `leadforge/schema/tasks.py` — `TaskManifest.description` rewritten for dataset-card use; `task_manifest_for_config()` produces task-specific prose (conversion-specific for default task, generic for others)
Expand Down Expand Up @@ -208,7 +217,7 @@ Documentation + CI:
- Plugin architecture
- External-API enrichment
- Web UI or dashboard
- Engine fix: `is_sql=False` → never converts (deterministic invariant)
- ~~Engine fix: `is_sql=False` → never converts (deterministic invariant)~~ — **Fixed** (PR #45, closes #44): added rare direct conversion path for pre-SQL leads

---

Expand Down
50 changes: 45 additions & 5 deletions leadforge/simulation/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
- Stage advancement is driven by :class:`~leadforge.mechanisms.transitions.HazardTransition`
(mql → … → negotiation); final conversion is driven by
:class:`~leadforge.mechanisms.hazards.ConversionHazard` (negotiation → closed_won).
- A rare **direct conversion** path allows pre-SQL leads (``mql``, ``sal``) to
convert with a heavily discounted daily probability
(``_DIRECT_CONVERSION_DISCOUNT`` × normal hazard rate), breaking the
deterministic ``is_sql=False → never converts`` invariant.
- A small daily churn probability independently moves any non-terminal lead to
``closed_lost``.

Expand Down Expand Up @@ -49,6 +53,7 @@
from leadforge.core.models import GenerationConfig
from leadforge.core.rng import RNGRoot
from leadforge.mechanisms.base import MechanismContext
from leadforge.mechanisms.hazards import ConversionHazard
from leadforge.mechanisms.policies import assign_mechanisms
from leadforge.mechanisms.transitions import StageSequence
from leadforge.schema.entities import (
Expand All @@ -71,6 +76,16 @@
# Daily churn probability from any active stage.
_DAILY_CHURN_RATE = 0.004

# Pre-SQL stages eligible for a rare direct conversion (bypassing the full
# funnel). The daily probability is the standard ConversionHazard rate
# multiplied by ``_DIRECT_CONVERSION_DISCOUNT``.
_DIRECT_CONVERSION_STAGES = frozenset({"mql", "sal"})

# Discount factor applied to the ConversionHazard daily probability for
# leads at pre-SQL stages. Keeps the full funnel relevant while breaking
# the deterministic ``is_sql=False → never converts`` invariant.
_DIRECT_CONVERSION_DISCOUNT = 0.01

# Funnel stages that imply meaningful sales engagement → opportunity creation.
_SQL_OR_DEEPER = frozenset(
{
Expand Down Expand Up @@ -184,6 +199,11 @@ def simulate_world(
mechanisms = assign_mechanisms(
world_graph.motif_family, mech_rng, latent_touch_intensity=latent_touch_intensity
)
# Narrow type for direct conversion path (daily_probability is on
# ConversionHazard, not the Mechanism ABC).
if not isinstance(mechanisms.conversion_hazard, ConversionHazard):
raise TypeError("conversion_hazard must be a ConversionHazard instance")
conversion_hazard: ConversionHazard = mechanisms.conversion_hazard
stage_seq = StageSequence()

# Build lookup indexes.
Expand Down Expand Up @@ -251,7 +271,7 @@ def simulate_world(
# -- 2. Stage advance or conversion check (transition stream) -
if state.current_stage == "negotiation":
# Final close: ConversionHazard decides closed_won.
if mechanisms.conversion_hazard.sample(ctx, transition_rng):
if conversion_hazard.sample(ctx, transition_rng):
state.mark_converted(t)
# Fall through to emit events on conversion day.
else:
Expand All @@ -261,6 +281,17 @@ def simulate_world(
if next_s is not None:
state.advance_stage(next_s, t)

# -- 2b. Direct conversion for pre-SQL leads (rare bypass) -----
# Breaks the ``is_sql=False → never converts`` deterministic
# invariant by giving pre-SQL leads a small, latent-modulated
# daily probability of converting without reaching negotiation.
if not state.is_terminal and state.current_stage in _DIRECT_CONVERSION_STAGES:
direct_p = (
conversion_hazard.daily_probability(ctx.latents) * _DIRECT_CONVERSION_DISCOUNT
)
if transition_rng.random() < direct_p:
state.mark_converted(t)

# -- 3. Touches (event stream) --------------------------------
event_date = (lead_dates[lead.lead_id] + timedelta(days=t)).isoformat()

Expand Down Expand Up @@ -334,7 +365,13 @@ def simulate_world(
state = states[lead.lead_id]
lead_date = lead_dates[lead.lead_id]

is_sql = state.sql_day is not None or state.current_stage in _SQL_OR_DEEPER
# A lead "is SQL" if it reached the SQL qualification gate through
# normal funnel progression. Direct-converted leads (mql/sal →
# closed_won) skip SQL and should NOT be flagged as SQL.
# Note: sql_day is set by advance_stage() when the lead enters any
# stage in _SQL_OR_DEEPER, and also at initialisation for leads that
# start at sql+ stages. It is NOT set by mark_converted().
is_sql = state.sql_day is not None
conv_ts: str | None = None
if state.converted and state.conversion_day is not None:
conv_ts = (lead_date + timedelta(days=state.conversion_day)).isoformat()
Expand Down Expand Up @@ -366,11 +403,14 @@ def simulate_world(
)
)

# Opportunity: created when lead first reached sql or deeper.
if is_sql:
# Opportunity: created when lead first reached sql or deeper,
# or when a lead converted directly from a pre-SQL stage.
if is_sql or state.converted:
opp_ctr += 1
opp_id = make_id(ID_PREFIXES["opportunity"], opp_ctr)
opp_day = state.sql_day if state.sql_day is not None else 0
# For direct-converted leads (no SQL stage), anchor the
# opportunity at the conversion day instead.
opp_day = state.sql_day if state.sql_day is not None else (state.conversion_day or 0)
opp_created_at = (lead_date + timedelta(days=opp_day)).isoformat()

close_outcome: str | None = None
Expand Down
90 changes: 83 additions & 7 deletions tests/simulation/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,11 +218,14 @@ def test_sql_leads_are_flagged(self) -> None:
}:
assert lead.is_sql

def test_converted_leads_also_sql(self) -> None:
result = _run_sim(n_leads=100)
for lead in result.leads:
if lead.converted_within_90_days:
assert lead.is_sql
def test_most_converted_leads_are_sql(self) -> None:
"""Most converted leads should have reached SQL, but direct conversion
allows some non-SQL leads to convert too."""
result = _run_sim(n_leads=500, seed=42)
converted = [lead for lead in result.leads if lead.converted_within_90_days]
sql_converted = [lead for lead in converted if lead.is_sql]
# The vast majority of conversions should still go through SQL.
assert len(sql_converted) / len(converted) > 0.8


# ---------------------------------------------------------------------------
Expand All @@ -238,11 +241,13 @@ def test_sql_leads_have_opportunity(self) -> None:
if lead.is_sql:
assert lead.lead_id in opp_lead_ids

def test_non_sql_leads_no_opportunity(self) -> None:
def test_non_sql_non_converted_leads_no_opportunity(self) -> None:
"""Non-SQL leads that did NOT convert should have no opportunity.
Direct-converted non-SQL leads get an opportunity at conversion time."""
result = _run_sim(n_leads=100)
opp_lead_ids = {o.lead_id for o in result.opportunities}
for lead in result.leads:
if not lead.is_sql:
if not lead.is_sql and not lead.converted_within_90_days:
assert lead.lead_id not in opp_lead_ids

def test_opportunity_acv_positive(self) -> None:
Expand Down Expand Up @@ -393,6 +398,77 @@ def test_enterprise(self) -> None:
assert _plan_from_acv(200_000) == "enterprise"


# ---------------------------------------------------------------------------
# Direct conversion (pre-SQL bypass)
# ---------------------------------------------------------------------------


class TestDirectConversion:
"""Verify the rare direct-conversion path for pre-SQL leads."""

def test_some_non_sql_leads_convert(self) -> None:
"""With enough leads, at least one non-SQL lead should convert."""
result = _run_sim(seed=42, n_leads=2000)
non_sql_converted = [
lead for lead in result.leads if lead.converted_within_90_days and not lead.is_sql
]
assert len(non_sql_converted) > 0, (
"Expected at least one non-SQL conversion in 2000-lead sim"
)

def test_non_sql_conversion_rate_much_lower(self) -> None:
"""Non-SQL conversion rate should be significantly lower than SQL."""
result = _run_sim(seed=42, n_leads=2000)
sql_leads = [lead for lead in result.leads if lead.is_sql]
non_sql_leads = [lead for lead in result.leads if not lead.is_sql]
assert len(sql_leads) > 0
assert len(non_sql_leads) > 0

sql_rate = sum(lead.converted_within_90_days for lead in sql_leads) / len(sql_leads)
non_sql_rate = sum(lead.converted_within_90_days for lead in non_sql_leads) / len(
non_sql_leads
)
# Non-SQL rate should be at least 5x lower than SQL rate.
Comment on lines +427 to +431
assert non_sql_rate < sql_rate / 5, (
f"Non-SQL rate {non_sql_rate:.4f} not much lower than SQL rate {sql_rate:.4f}"
)

def test_direct_conversion_deterministic(self) -> None:
"""Direct conversion path preserves full determinism."""
r1 = _run_sim(seed=77, n_leads=500)
r2 = _run_sim(seed=77, n_leads=500)
labels1 = [row.converted_within_90_days for row in r1.leads]
labels2 = [row.converted_within_90_days for row in r2.leads]
assert labels1 == labels2

def test_direct_converted_lead_has_opportunity(self) -> None:
"""A direct-converted non-SQL lead should still get an opportunity row."""
result = _run_sim(seed=42, n_leads=2000)
opp_lead_ids = {o.lead_id for o in result.opportunities}
non_sql_converted = [
lead for lead in result.leads if lead.converted_within_90_days and not lead.is_sql
]
for lead in non_sql_converted:
assert lead.lead_id in opp_lead_ids

def test_direct_converted_lead_has_customer_and_subscription(self) -> None:
"""A direct-converted non-SQL lead should get customer + subscription rows."""
result = _run_sim(seed=42, n_leads=2000)
cust_opp_ids = {c.opportunity_id for c in result.customers}
sub_cust_ids = {s.customer_id for s in result.subscriptions}
opp_by_lead = {o.lead_id: o for o in result.opportunities}
cust_by_opp = {c.opportunity_id: c for c in result.customers}
non_sql_converted = [
lead for lead in result.leads if lead.converted_within_90_days and not lead.is_sql
]
for lead in non_sql_converted:
opp = opp_by_lead.get(lead.lead_id)
assert opp is not None, f"No opportunity for direct-converted lead {lead.lead_id}"
assert opp.opportunity_id in cust_opp_ids
cust = cust_by_opp[opp.opportunity_id]
assert cust.customer_id in sub_cust_ids


# ---------------------------------------------------------------------------
# label_window_days affects label derivation
# ---------------------------------------------------------------------------
Expand Down
Loading