Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
"""add model_selection decision + promotion columns

Revision ID: e4f5a6b7c8d9
Revises: d3e4f5a6b7c8
Create Date: 2026-06-01 11:00:00.000000

Slice C of the Forecast Champion Selector (issue #362). Adds the decision +
operationalization columns to ``model_selection_run`` — all ADDITIVE:

- ``trained_model_type`` / ``is_override`` / ``override_reason`` — which model
the final bundle holds and whether it was a non-recommended override;
- ``champion_run_id`` / ``promoted_alias`` / ``promotion_decision`` — the
approval-gated registry handoff (registry ``model_run.run_id``, alias name,
and the audited decision record);
- ``feature_frame_version`` — M1, the request's V (1 or 2) persisted at
run-creation so train/promote carry the REAL version end-to-end. The
server_default ``'1'`` backfills legacy rows ONLY (not a code hardcode).

No CheckConstraint change. ``downgrade`` drops all seven columns.
"""

from collections.abc import Sequence

import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

from alembic import op

# revision identifiers, used by Alembic.
revision: str = "e4f5a6b7c8d9"
down_revision: str | None = "d3e4f5a6b7c8"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None


def upgrade() -> None:
"""Apply migration — seven additive columns on model_selection_run."""
op.add_column(
"model_selection_run",
sa.Column("trained_model_type", sa.String(length=40), nullable=True),
)
op.add_column(
"model_selection_run",
sa.Column(
"is_override",
sa.Boolean(),
nullable=False,
server_default=sa.text("false"),
),
)
op.add_column(
"model_selection_run",
sa.Column("override_reason", sa.String(length=2000), nullable=True),
)
op.add_column(
"model_selection_run",
sa.Column("champion_run_id", sa.String(length=32), nullable=True),
)
op.add_column(
"model_selection_run",
sa.Column("promoted_alias", sa.String(length=100), nullable=True),
)
op.add_column(
"model_selection_run",
sa.Column(
"promotion_decision",
postgresql.JSONB(astext_type=sa.Text()),
nullable=True,
),
)
op.add_column(
"model_selection_run",
sa.Column(
"feature_frame_version",
sa.Integer(),
nullable=False,
server_default=sa.text("1"),
),
)


def downgrade() -> None:
"""Revert migration — drop the seven Slice C columns."""
op.drop_column("model_selection_run", "feature_frame_version")
op.drop_column("model_selection_run", "promotion_decision")
op.drop_column("model_selection_run", "promoted_alias")
op.drop_column("model_selection_run", "champion_run_id")
op.drop_column("model_selection_run", "override_reason")
op.drop_column("model_selection_run", "is_override")
op.drop_column("model_selection_run", "trained_model_type")
137 changes: 137 additions & 0 deletions app/features/model_selection/decision.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
"""Deterministic forecast-decision layer for the champion selector (Slice C).

Pure functions — NO LLM, NO DB, NO I/O (mirror ``explanations.py``). Translate a
horizon forecast into an inventory-decision heuristic a planner can act on:
peak/low demand day, a CLEARLY-LABELED safety-stock heuristic, and bias-risk
wording.

The safety-stock formula is the demand-variability-only form (King 2011,
constant lead time):

safety_stock = z(service_level) * sigma_daily * sqrt(lead_time_days)
expected_demand_over_lead_time = average_demand * lead_time_days
reorder_point = expected_demand_over_lead_time + safety_stock

``z`` comes from a fixed one-sided service-level lookup (NO scipy); an
in-between service level falls back to the nearest table key. Every field is
labeled ``method="heuristic"`` and carries a caveat — this output NEVER feeds
ranking (LOCKED #3).
"""

from __future__ import annotations

import statistics
from datetime import date
from typing import Any

from app.features.model_selection.schemas import ForecastDecision

# LOCKED #4 — the canonical bias sentence, kept byte-identical to the frontend
# ``BIAS_EXPLANATION`` constant (``components/champion-selector/copy.ts``) so the
# wording never drifts between the two surfaces.
BIAS_EXPLANATION = (
"Positive bias means the model under-forecasts (risk of stockouts); "
"negative bias means it over-forecasts (risk of overstock)."
)

# One-sided service-level z values (NO scipy dependency). Source: King 2011
# safety-stock z-from-service-level table.
_Z_TABLE: dict[float, float] = {0.90: 1.2816, 0.95: 1.6449, 0.975: 1.9600, 0.99: 2.3263}

_CAVEATS = [
"Safety stock is a deterministic heuristic (demand variability only; constant lead time).",
"Not a substitute for a full inventory-optimisation model.",
]


def z_for_service_level(service_level: float) -> float:
"""Return the one-sided z for a service level (exact key, else nearest).

An exact table key returns its z directly; any other level snaps to the
nearest table key (documented heuristic — the table is coarse on purpose).
"""
if service_level in _Z_TABLE:
return _Z_TABLE[service_level]
nearest = min(_Z_TABLE, key=lambda key: abs(key - service_level))
return _Z_TABLE[nearest]


def _coerce_date(value: object) -> date | None:
"""Coerce a point's ``date`` (ISO string in JSON-mode dumps, or a date)."""
if isinstance(value, date):
return value
if isinstance(value, str):
try:
return date.fromisoformat(value)
except ValueError:
return None
return None


def forecast_peak_low(
points: list[dict[str, Any]],
) -> tuple[date | None, float | None, date | None, float | None]:
"""Return ``(peak_date, peak_demand, low_date, low_demand)`` over points.

Picks the max/min ``forecast`` value; ``(None, None, None, None)`` on an
empty forecast. Ties resolve to the first occurrence (deterministic).
"""
if not points:
return (None, None, None, None)
peak = max(points, key=lambda p: float(p["forecast"]))
low = min(points, key=lambda p: float(p["forecast"]))
return (
_coerce_date(peak.get("date")),
float(peak["forecast"]),
_coerce_date(low.get("date")),
float(low["forecast"]),
)


def _bias_direction(winner_bias: float | None) -> str:
"""Plain-English direction phrase for a winner's bias sign."""
if winner_bias is None:
return "has no recorded bias measurement"
if winner_bias > 0:
return "under-forecasts (risk of stockouts)"
if winner_bias < 0:
return "over-forecasts (risk of overstock)"
return "is roughly unbiased"


def compute_forecast_decision(
points: list[dict[str, Any]],
average_demand: float,
lead_time_days: int,
service_level: float,
winner_bias: float | None,
) -> ForecastDecision:
"""Build the deterministic, labeled inventory-decision heuristic.

``sigma_daily`` is the POPULATION stdev of the forecast values; a flat or
single-point forecast yields ``sigma=0`` → ``safety_stock=0`` (honest, not
an error).
"""
values = [float(p["forecast"]) for p in points]
sigma_daily = statistics.pstdev(values) if len(values) > 1 else 0.0
z = z_for_service_level(service_level)
safety_stock = z * sigma_daily * (lead_time_days**0.5)
expected_lt = average_demand * lead_time_days
bias_dir = _bias_direction(winner_bias)
if winner_bias is None:
bias_text = f"{BIAS_EXPLANATION} For this winner, bias {bias_dir}."
else:
bias_text = (
f"{BIAS_EXPLANATION} For this winner, bias {winner_bias:.2f} indicates it {bias_dir}."
)
return ForecastDecision(
lead_time_days=lead_time_days,
service_level=service_level,
z_value=z,
sigma_daily_demand=sigma_daily,
expected_demand_over_lead_time=expected_lt,
safety_stock=safety_stock,
reorder_point=expected_lt + safety_stock,
bias_risk_text=bias_text,
caveats=list(_CAVEATS),
)
29 changes: 28 additions & 1 deletion app/features/model_selection/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,16 @@
from enum import Enum
from typing import Any

from sqlalchemy import CheckConstraint, Date, DateTime, ForeignKey, Index, Integer, String
from sqlalchemy import (
Boolean,
CheckConstraint,
Date,
DateTime,
ForeignKey,
Index,
Integer,
String,
)
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column

Expand Down Expand Up @@ -110,6 +119,24 @@ class ModelSelectionRun(TimestampMixin, Base):
completed_at: Mapped[_dt.datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True
)
# Slice C (forecast decision + operationalization) — all additive.
# ``trained_model_type`` records which model the final bundle holds (the
# ranked winner, or a user override); ``is_override`` flags a non-recommended
# choice; the promotion columns capture the approval-gated registry handoff.
trained_model_type: Mapped[str | None] = mapped_column(String(40), nullable=True)
is_override: Mapped[bool] = mapped_column(
Boolean, default=False, server_default="false", nullable=False
)
override_reason: Mapped[str | None] = mapped_column(String(2000), nullable=True)
champion_run_id: Mapped[str | None] = mapped_column(String(32), nullable=True)
promoted_alias: Mapped[str | None] = mapped_column(String(100), nullable=True)
promotion_decision: Mapped[dict[str, Any] | None] = mapped_column(JSONB, nullable=True)
# M1 — V2 promotion support: the request's feature_frame_version persisted at
# run-creation so train/promote carry the REAL version end-to-end. The
# server_default '1' backfills legacy rows ONLY (it is not a code hardcode).
feature_frame_version: Mapped[int] = mapped_column(
Integer, default=1, server_default="1", nullable=False
)

__table_args__ = (
CheckConstraint(
Expand Down
Loading