From 8b35bdcff1597495ea89d82baba1534af871cb63 Mon Sep 17 00:00:00 2001
From: Rolando Santamaria Maso <kyberneees@gmail.com>
Date: Thu, 4 Jun 2026 20:59:53 +0200
Subject: [PATCH 01/11] feat(schedule): native in-process scheduler core (phase
 1)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce internal/schedule — the engine for odek's native cron capability,
replacing the Docker + supercronic approach. Running in-process means the host
already has resolved config (API key, model, bot token, default chat) in
memory, so a scheduled task sees exactly what an interactive one does — no
environment-inheritance games, no external cron daemon, no container-only
behaviour.

This phase is the standalone core only — no CLI or bot wiring yet.

- types.go: Job / Delivery / RunState. Definitions and runtime state persist
  to separate files so a hand-edit never races a state write.
- cronexpr.go: stdlib-only 5-field cron parser (ranges, steps, lists, names,
  @macros) with correct Vixie dom/dow union semantics, timezone-aware Next()
  via coarse unit-stepping, and a horizon that clears the leap-century gap.
- store.go: atomic (temp+rename, 0600) CRUD for schedules.json and
  schedule-state.json, mirroring session.Store; validates jobs on write.
- scheduler.go: firing engine decoupled from the agent/telegram via Runner and
  Deliverer interfaces. Earliest-fire timer (no per-minute polling), bounded
  concurrency, per-job overlap guard, missed-run skip/catchup policy, mtime
  hot-reload, and graceful drain on context cancellation.

Tests: 39 cases, 87.9% coverage, green under -race. Parser table tests
(ranges/steps/lists/names/macros/dom-dow union/leap day/timezone/errors);
engine tests drive reconcile/fireDue directly with explicit clocks plus one
real-clock lifecycle test — deterministic, no flaky sleeps.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 internal/schedule/cronexpr.go       | 305 +++++++++++++++++++++++
 internal/schedule/cronexpr_test.go  | 278 +++++++++++++++++++++
 internal/schedule/scheduler.go      | 336 +++++++++++++++++++++++++
 internal/schedule/scheduler_test.go | 369 ++++++++++++++++++++++++++++
 internal/schedule/store.go          | 362 +++++++++++++++++++++++++++
 internal/schedule/store_test.go     | 293 ++++++++++++++++++++++
 internal/schedule/types.go          |  74 ++++++
 7 files changed, 2017 insertions(+)
 create mode 100644 internal/schedule/cronexpr.go
 create mode 100644 internal/schedule/cronexpr_test.go
 create mode 100644 internal/schedule/scheduler.go
 create mode 100644 internal/schedule/scheduler_test.go
 create mode 100644 internal/schedule/store.go
 create mode 100644 internal/schedule/store_test.go
 create mode 100644 internal/schedule/types.go

diff --git a/internal/schedule/cronexpr.go b/internal/schedule/cronexpr.go
new file mode 100644
index 0000000..79598d5
--- /dev/null
+++ b/internal/schedule/cronexpr.go
@@ -0,0 +1,305 @@
+package schedule
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+	"time"
+)
+
+// Schedule is a parsed cron expression bound to a timezone. It answers one
+// question — "given an instant, when does this next fire?" — via Next.
+//
+// Supported syntax is standard 5-field Vixie cron:
+//
+//	┌ minute        0-59
+//	│ ┌ hour        0-23
+//	│ │ ┌ dom        1-31
+//	│ │ │ ┌ month    1-12 or JAN-DEC
+//	│ │ │ │ ┌ dow    0-6 or SUN-SAT (0 and 7 both mean Sunday)
+//	* * * * *
+//
+// Each field accepts: a wildcard "*", a single value, a range "a-b", a step
+// "*/n" / "a-b/n" / "a/n" (from a to the field max), and comma-separated lists
+// of any of those. Month and day-of-week also accept three-letter names.
+//
+// Macros: @yearly (@annually), @monthly, @weekly, @daily (@midnight), @hourly.
+//
+// Day-of-month / day-of-week coupling follows Vixie semantics: when BOTH
+// fields are restricted (neither is "*"), a day matches if EITHER field
+// matches (union). When at least one is a wildcard, the usual intersection
+// applies. This is why "0 0 13 * 5" fires on the 13th OR any Friday, not only
+// Friday-the-13th.
+type Schedule struct {
+	minute uint64 // bitset over 0..59
+	hour   uint64 // bitset over 0..23
+	dom    uint64 // bitset over 1..31
+	month  uint64 // bitset over 1..12
+	dow    uint64 // bitset over 0..6 (Sunday=0)
+
+	domStar bool // dom field was a wildcard ("*" or "*/n")
+	dowStar bool // dow field was a wildcard
+
+	loc  *time.Location
+	expr string // original expression, for String()
+}
+
+// matchHorizon bounds Next's search. The rarest legitimate expression is
+// Feb 29, whose gap can reach 8 years across a non-leap century boundary
+// (e.g. 2096 → 2104, since 2100 is not a leap year); 9 years clears it with
+// margin. Stepping past the horizon means the expression matches nothing
+// reachable (e.g. "0 0 30 2 *", Feb 30) and Next returns the zero time. Even
+// then the coarse unit-stepping gives up in a few thousand iterations.
+const matchHorizon = 9 * 366 * 24 * time.Hour
+
+var monthNames = map[string]int{
+	"jan": 1, "feb": 2, "mar": 3, "apr": 4, "may": 5, "jun": 6,
+	"jul": 7, "aug": 8, "sep": 9, "oct": 10, "nov": 11, "dec": 12,
+}
+
+var dowNames = map[string]int{
+	"sun": 0, "mon": 1, "tue": 2, "wed": 3, "thu": 4, "fri": 5, "sat": 6,
+}
+
+// Parse compiles a cron expression in UTC.
+func Parse(expr string) (*Schedule, error) {
+	return ParseInLocation(expr, time.UTC)
+}
+
+// ParseInLocation compiles a cron expression bound to loc. A nil loc defaults
+// to UTC. The location only affects Next/Matches, not parsing.
+func ParseInLocation(expr string, loc *time.Location) (*Schedule, error) {
+	if loc == nil {
+		loc = time.UTC
+	}
+	raw := strings.TrimSpace(expr)
+	if raw == "" {
+		return nil, fmt.Errorf("cron: empty expression")
+	}
+
+	if strings.HasPrefix(raw, "@") {
+		expanded, err := expandMacro(raw)
+		if err != nil {
+			return nil, err
+		}
+		raw = expanded
+	}
+
+	fields := strings.Fields(raw)
+	if len(fields) != 5 {
+		return nil, fmt.Errorf("cron: expected 5 fields, got %d in %q", len(fields), expr)
+	}
+
+	s := &Schedule{loc: loc, expr: strings.TrimSpace(expr)}
+	var err error
+
+	if s.minute, _, err = parseField(fields[0], 0, 59, nil); err != nil {
+		return nil, fmt.Errorf("cron: minute: %w", err)
+	}
+	if s.hour, _, err = parseField(fields[1], 0, 23, nil); err != nil {
+		return nil, fmt.Errorf("cron: hour: %w", err)
+	}
+	if s.dom, s.domStar, err = parseField(fields[2], 1, 31, nil); err != nil {
+		return nil, fmt.Errorf("cron: day-of-month: %w", err)
+	}
+	if s.month, _, err = parseField(fields[3], 1, 12, monthNames); err != nil {
+		return nil, fmt.Errorf("cron: month: %w", err)
+	}
+	// Day-of-week accepts 0..7; 7 is a second spelling of Sunday (0).
+	dowMask, dowStar, err := parseField(fields[4], 0, 7, dowNames)
+	if err != nil {
+		return nil, fmt.Errorf("cron: day-of-week: %w", err)
+	}
+	if dowMask&(1<<7) != 0 {
+		dowMask |= 1 << 0 // fold 7 → 0
+		dowMask &^= 1 << 7
+	}
+	s.dow, s.dowStar = dowMask, dowStar
+
+	return s, nil
+}
+
+// expandMacro rewrites a @macro into its 5-field equivalent.
+func expandMacro(m string) (string, error) {
+	switch strings.ToLower(m) {
+	case "@yearly", "@annually":
+		return "0 0 1 1 *", nil
+	case "@monthly":
+		return "0 0 1 * *", nil
+	case "@weekly":
+		return "0 0 * * 0", nil
+	case "@daily", "@midnight":
+		return "0 0 * * *", nil
+	case "@hourly":
+		return "0 * * * *", nil
+	case "@reboot":
+		// @reboot has no meaning for a persistent scheduler — the catchup
+		// flag covers "run if a fire was missed while down". Reject it
+		// explicitly rather than silently never firing.
+		return "", fmt.Errorf("cron: @reboot is not supported (use the job's catchup option)")
+	default:
+		return "", fmt.Errorf("cron: unknown macro %q", m)
+	}
+}
+
+// parseField parses one cron field into a bitset over [min,max]. star reports
+// whether the field began with "*" (a wildcard), which the caller needs for
+// the dom/dow union rule. names, if non-nil, maps lowercased symbolic names
+// (e.g. "mon") to values.
+func parseField(field string, min, max int, names map[string]int) (mask uint64, star bool, err error) {
+	if field == "" {
+		return 0, false, fmt.Errorf("empty field")
+	}
+	star = strings.HasPrefix(field, "*")
+	for item := range strings.SplitSeq(field, ",") {
+		m, err := parseItem(item, min, max, names)
+		if err != nil {
+			return 0, false, err
+		}
+		mask |= m
+	}
+	return mask, star, nil
+}
+
+// parseItem parses a single comma-separated element: "*", "*/n", "a", "a-b",
+// "a-b/n", or "a/n".
+func parseItem(item string, min, max int, names map[string]int) (uint64, error) {
+	rng := item
+	step := 1
+	if before, stepStr, found := strings.Cut(item, "/"); found {
+		rng = before
+		n, err := strconv.Atoi(stepStr)
+		if err != nil || n <= 0 {
+			return 0, fmt.Errorf("invalid step %q in %q", stepStr, item)
+		}
+		step = n
+	}
+
+	var lo, hi int
+	switch {
+	case rng == "*":
+		lo, hi = min, max
+	case strings.ContainsRune(rng, '-'):
+		parts := strings.SplitN(rng, "-", 2)
+		var err error
+		if lo, err = parseValue(parts[0], names); err != nil {
+			return 0, err
+		}
+		if hi, err = parseValue(parts[1], names); err != nil {
+			return 0, err
+		}
+	default:
+		v, err := parseValue(rng, names)
+		if err != nil {
+			return 0, err
+		}
+		lo = v
+		// "a/n" means "from a to the maximum, stepping n"; a bare "a" is just a.
+		if step > 1 {
+			hi = max
+		} else {
+			hi = v
+		}
+	}
+
+	if lo < min || hi > max || lo > hi {
+		return 0, fmt.Errorf("value out of range [%d,%d] in %q", min, max, item)
+	}
+
+	var mask uint64
+	for v := lo; v <= hi; v += step {
+		mask |= 1 << uint(v)
+	}
+	return mask, nil
+}
+
+// parseValue resolves a single token to an int, accepting symbolic names when
+// names is non-nil.
+func parseValue(tok string, names map[string]int) (int, error) {
+	tok = strings.TrimSpace(tok)
+	if tok == "" {
+		return 0, fmt.Errorf("empty value")
+	}
+	if names != nil {
+		if v, ok := names[strings.ToLower(tok)]; ok {
+			return v, nil
+		}
+	}
+	v, err := strconv.Atoi(tok)
+	if err != nil {
+		return 0, fmt.Errorf("invalid value %q", tok)
+	}
+	return v, nil
+}
+
+// Matches reports whether t (in the schedule's location, to the minute) is a
+// firing time.
+func (s *Schedule) Matches(t time.Time) bool {
+	t = t.In(s.loc)
+	if s.minute&(1<<uint(t.Minute())) == 0 {
+		return false
+	}
+	if s.hour&(1<<uint(t.Hour())) == 0 {
+		return false
+	}
+	if s.month&(1<<uint(int(t.Month()))) == 0 {
+		return false
+	}
+	return s.dayMatches(t)
+}
+
+// dayMatches applies the Vixie dom/dow coupling rule.
+func (s *Schedule) dayMatches(t time.Time) bool {
+	domMatch := s.dom&(1<<uint(t.Day())) != 0
+	dowMatch := s.dow&(1<<uint(int(t.Weekday()))) != 0
+	if s.domStar || s.dowStar {
+		return domMatch && dowMatch
+	}
+	return domMatch || dowMatch
+}
+
+// Next returns the first firing time strictly after the given instant, or the
+// zero time if none occurs within the search horizon. The result is in the
+// schedule's location and has zero seconds/nanoseconds.
+//
+// It advances by the coarsest non-matching unit (month → day → hour → minute)
+// so even rare expressions converge in a handful of iterations rather than
+// stepping minute-by-minute across years.
+func (s *Schedule) Next(after time.Time) time.Time {
+	after = after.In(s.loc)
+	limit := after.Add(matchHorizon)
+
+	// Start at the top of the next minute, seconds/nanoseconds zeroed. Rebuild
+	// via time.Date rather than Truncate so non-whole-minute historical zone
+	// offsets can't misalign the boundary.
+	y, mo, d := after.Date()
+	h, mi, _ := after.Clock()
+	t := time.Date(y, mo, d, h, mi, 0, 0, s.loc).Add(time.Minute)
+
+	for t.Before(limit) {
+		if s.month&(1<<uint(int(t.Month()))) == 0 {
+			// Jump to the first day of next month at 00:00.
+			t = time.Date(t.Year(), t.Month(), 1, 0, 0, 0, 0, s.loc).AddDate(0, 1, 0)
+			continue
+		}
+		if !s.dayMatches(t) {
+			// Jump to 00:00 of the next day.
+			t = time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, s.loc).AddDate(0, 0, 1)
+			continue
+		}
+		if s.hour&(1<<uint(t.Hour())) == 0 {
+			// Jump to the top of the next hour.
+			t = time.Date(t.Year(), t.Month(), t.Day(), t.Hour(), 0, 0, 0, s.loc).Add(time.Hour)
+			continue
+		}
+		if s.minute&(1<<uint(t.Minute())) == 0 {
+			t = t.Add(time.Minute) // seconds already zero
+			continue
+		}
+		return t
+	}
+	return time.Time{}
+}
+
+// String returns the original expression the schedule was parsed from.
+func (s *Schedule) String() string { return s.expr }
diff --git a/internal/schedule/cronexpr_test.go b/internal/schedule/cronexpr_test.go
new file mode 100644
index 0000000..c541858
--- /dev/null
+++ b/internal/schedule/cronexpr_test.go
@@ -0,0 +1,278 @@
+package schedule
+
+import (
+	"testing"
+	"time"
+)
+
+// mustParse fails the test if expr does not parse.
+func mustParse(t *testing.T, expr string) *Schedule {
+	t.Helper()
+	s, err := Parse(expr)
+	if err != nil {
+		t.Fatalf("Parse(%q): unexpected error: %v", expr, err)
+	}
+	return s
+}
+
+// ── Next: basic stepping ────────────────────────────────────────────────
+
+func TestNext_EveryMinute(t *testing.T) {
+	s := mustParse(t, "* * * * *")
+	after := time.Date(2026, 6, 4, 10, 30, 15, 0, time.UTC)
+	got := s.Next(after)
+	want := time.Date(2026, 6, 4, 10, 31, 0, 0, time.UTC)
+	if !got.Equal(want) {
+		t.Errorf("Next = %v, want %v", got, want)
+	}
+}
+
+func TestNext_TopOfNextHour(t *testing.T) {
+	s := mustParse(t, "0 * * * *")
+	after := time.Date(2026, 6, 4, 10, 30, 0, 0, time.UTC)
+	got := s.Next(after)
+	want := time.Date(2026, 6, 4, 11, 0, 0, 0, time.UTC)
+	if !got.Equal(want) {
+		t.Errorf("Next = %v, want %v", got, want)
+	}
+}
+
+func TestNext_WeekdayNineAM(t *testing.T) {
+	s := mustParse(t, "0 9 * * 1-5")
+	// Friday 2026-06-05 10:00 → next is Monday 2026-06-08 09:00.
+	after := time.Date(2026, 6, 5, 10, 0, 0, 0, time.UTC)
+	got := s.Next(after)
+	want := time.Date(2026, 6, 8, 9, 0, 0, 0, time.UTC)
+	if !got.Equal(want) {
+		t.Errorf("Next = %v (weekday %s), want %v", got, got.Weekday(), want)
+	}
+}
+
+func TestNext_StepMinutes(t *testing.T) {
+	s := mustParse(t, "*/15 * * * *")
+	after := time.Date(2026, 6, 4, 10, 7, 0, 0, time.UTC)
+	want := []time.Time{
+		time.Date(2026, 6, 4, 10, 15, 0, 0, time.UTC),
+		time.Date(2026, 6, 4, 10, 30, 0, 0, time.UTC),
+		time.Date(2026, 6, 4, 10, 45, 0, 0, time.UTC),
+		time.Date(2026, 6, 4, 11, 0, 0, 0, time.UTC),
+	}
+	for i, w := range want {
+		after = s.Next(after)
+		if !after.Equal(w) {
+			t.Fatalf("step %d: Next = %v, want %v", i, after, w)
+		}
+	}
+}
+
+func TestNext_List(t *testing.T) {
+	s := mustParse(t, "0,30 * * * *")
+	after := time.Date(2026, 6, 4, 10, 10, 0, 0, time.UTC)
+	got := s.Next(after)
+	if want := time.Date(2026, 6, 4, 10, 30, 0, 0, time.UTC); !got.Equal(want) {
+		t.Errorf("Next = %v, want %v", got, want)
+	}
+	got = s.Next(got)
+	if want := time.Date(2026, 6, 4, 11, 0, 0, 0, time.UTC); !got.Equal(want) {
+		t.Errorf("Next = %v, want %v", got, want)
+	}
+}
+
+func TestNext_StrictlyAfter(t *testing.T) {
+	// When `after` is exactly a firing instant, Next must return the FOLLOWING
+	// one, never the same instant (prevents double-fire).
+	s := mustParse(t, "30 9 * * *")
+	at := time.Date(2026, 6, 4, 9, 30, 0, 0, time.UTC)
+	got := s.Next(at)
+	want := time.Date(2026, 6, 5, 9, 30, 0, 0, time.UTC)
+	if !got.Equal(want) {
+		t.Errorf("Next = %v, want %v (must be strictly after)", got, want)
+	}
+}
+
+// ── Vixie dom/dow coupling ──────────────────────────────────────────────
+
+func TestNext_DomDowUnion(t *testing.T) {
+	// Both fields restricted → union: 13th OR any Friday.
+	s := mustParse(t, "0 0 13 * 5")
+	// Start 2026-06-01 (Mon). June 5 is a Friday → first hit.
+	after := time.Date(2026, 6, 1, 0, 0, 0, 0, time.UTC)
+	got := s.Next(after)
+	want := time.Date(2026, 6, 5, 0, 0, 0, 0, time.UTC) // Friday, before the 13th
+	if !got.Equal(want) {
+		t.Errorf("Next = %v (%s), want %v", got, got.Weekday(), want)
+	}
+}
+
+func TestNext_DomRestrictedDowStar(t *testing.T) {
+	// dow is "*" → intersection: only the 1st and 15th.
+	s := mustParse(t, "0 0 1,15 * *")
+	after := time.Date(2026, 6, 4, 0, 0, 0, 0, time.UTC)
+	got := s.Next(after)
+	if want := time.Date(2026, 6, 15, 0, 0, 0, 0, time.UTC); !got.Equal(want) {
+		t.Errorf("Next = %v, want %v", got, want)
+	}
+	got = s.Next(got)
+	if want := time.Date(2026, 7, 1, 0, 0, 0, 0, time.UTC); !got.Equal(want) {
+		t.Errorf("Next = %v, want %v", got, want)
+	}
+}
+
+// ── Names and macros ────────────────────────────────────────────────────
+
+func TestParse_MonthAndDowNames(t *testing.T) {
+	s := mustParse(t, "0 0 * JAN MON")
+	// First Monday in Jan 2027 at 00:00. 2027-01-01 is a Friday; first Mon is 4th.
+	after := time.Date(2026, 12, 1, 0, 0, 0, 0, time.UTC)
+	got := s.Next(after)
+	want := time.Date(2027, 1, 4, 0, 0, 0, 0, time.UTC)
+	if !got.Equal(want) {
+		t.Errorf("Next = %v (%s), want %v", got, got.Weekday(), want)
+	}
+}
+
+func TestParse_Macros(t *testing.T) {
+	cases := map[string]string{
+		"@yearly":  "0 0 1 1 *",
+		"@monthly": "0 0 1 * *",
+		"@weekly":  "0 0 * * 0",
+		"@daily":   "0 0 * * *",
+		"@hourly":  "0 * * * *",
+	}
+	for macro, equiv := range cases {
+		ms := mustParse(t, macro)
+		es := mustParse(t, equiv)
+		after := time.Date(2026, 6, 4, 12, 34, 0, 0, time.UTC)
+		if !ms.Next(after).Equal(es.Next(after)) {
+			t.Errorf("%s and %q disagree: %v vs %v", macro, equiv,
+				ms.Next(after), es.Next(after))
+		}
+	}
+}
+
+func TestNext_DowSevenIsSunday(t *testing.T) {
+	s7 := mustParse(t, "0 0 * * 7")
+	s0 := mustParse(t, "0 0 * * 0")
+	after := time.Date(2026, 6, 4, 0, 0, 0, 0, time.UTC)
+	if !s7.Next(after).Equal(s0.Next(after)) {
+		t.Errorf("dow 7 != dow 0: %v vs %v", s7.Next(after), s0.Next(after))
+	}
+	if got := s7.Next(after); got.Weekday() != time.Sunday {
+		t.Errorf("Next weekday = %s, want Sunday", got.Weekday())
+	}
+}
+
+// ── Leap day ────────────────────────────────────────────────────────────
+
+func TestNext_LeapDay(t *testing.T) {
+	s := mustParse(t, "0 0 29 2 *")
+	// 2026 and 2027 are not leap years; next Feb 29 is 2028.
+	after := time.Date(2026, 3, 1, 0, 0, 0, 0, time.UTC)
+	got := s.Next(after)
+	want := time.Date(2028, 2, 29, 0, 0, 0, 0, time.UTC)
+	if !got.Equal(want) {
+		t.Errorf("Next = %v, want %v", got, want)
+	}
+}
+
+func TestNext_ImpossibleReturnsZero(t *testing.T) {
+	// Feb 30 never exists → Next exhausts the horizon and returns zero.
+	s := mustParse(t, "0 0 30 2 *")
+	got := s.Next(time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC))
+	if !got.IsZero() {
+		t.Errorf("Next = %v, want zero time", got)
+	}
+}
+
+// ── Timezone ────────────────────────────────────────────────────────────
+
+func TestNext_InLocation(t *testing.T) {
+	berlin, err := time.LoadLocation("Europe/Berlin")
+	if err != nil {
+		t.Skipf("tz data unavailable: %v", err)
+	}
+	s, err := ParseInLocation("0 9 * * *", berlin)
+	if err != nil {
+		t.Fatalf("ParseInLocation: %v", err)
+	}
+	// 08:00 UTC in summer (CEST = UTC+2) is 10:00 Berlin → already past 09:00,
+	// so next fire is 09:00 Berlin tomorrow = 07:00 UTC next day.
+	after := time.Date(2026, 6, 4, 8, 0, 0, 0, time.UTC)
+	got := s.Next(after)
+	wantBerlin := time.Date(2026, 6, 5, 9, 0, 0, 0, berlin)
+	if !got.Equal(wantBerlin) {
+		t.Errorf("Next = %v, want %v", got.In(berlin), wantBerlin)
+	}
+}
+
+// ── Matches ─────────────────────────────────────────────────────────────
+
+func TestMatches(t *testing.T) {
+	s := mustParse(t, "30 9 * * 1-5")
+	tests := []struct {
+		t    time.Time
+		want bool
+	}{
+		{time.Date(2026, 6, 4, 9, 30, 0, 0, time.UTC), true},   // Thu 09:30
+		{time.Date(2026, 6, 4, 9, 31, 0, 0, time.UTC), false},  // wrong minute
+		{time.Date(2026, 6, 4, 10, 30, 0, 0, time.UTC), false}, // wrong hour
+		{time.Date(2026, 6, 6, 9, 30, 0, 0, time.UTC), false},  // Saturday
+	}
+	for _, tc := range tests {
+		if got := s.Matches(tc.t); got != tc.want {
+			t.Errorf("Matches(%v) = %v, want %v", tc.t, got, tc.want)
+		}
+	}
+}
+
+// ── Parse errors ────────────────────────────────────────────────────────
+
+func TestParse_Errors(t *testing.T) {
+	bad := []string{
+		"",            // empty
+		"* * * *",     // 4 fields
+		"* * * * * *", // 6 fields
+		"60 * * * *",  // minute out of range
+		"* 24 * * *",  // hour out of range
+		"* * 0 * *",   // dom below range
+		"* * 32 * *",  // dom above range
+		"* * * 13 *",  // month out of range
+		"* * * * 8",   // dow above range (7 max)
+		"*/0 * * * *", // zero step
+		"5-1 * * * *", // inverted range
+		"a * * * *",   // non-numeric
+		"* * * FOO *", // bad month name
+		"@every 5m",   // unsupported macro
+		"@reboot",     // explicitly rejected
+		"@bogus",      // unknown macro
+	}
+	for _, expr := range bad {
+		if _, err := Parse(expr); err == nil {
+			t.Errorf("Parse(%q): expected error, got nil", expr)
+		}
+	}
+}
+
+func TestParse_StepFromValue(t *testing.T) {
+	// "a/n" means from a to max stepping n: 5,25,45 for minutes.
+	s := mustParse(t, "5/20 * * * *")
+	after := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
+	for _, w := range []int{5, 25, 45} {
+		after = s.Next(after)
+		if after.Minute() != w {
+			t.Fatalf("Next minute = %d, want %d", after.Minute(), w)
+		}
+	}
+	// After 45 the next is 05 of the following hour.
+	after = s.Next(after)
+	if after.Minute() != 5 || after.Hour() != 11 {
+		t.Errorf("Next = %02d:%02d, want 11:05", after.Hour(), after.Minute())
+	}
+}
+
+func TestString(t *testing.T) {
+	s := mustParse(t, "0 9 * * 1-5")
+	if s.String() != "0 9 * * 1-5" {
+		t.Errorf("String = %q", s.String())
+	}
+}
diff --git a/internal/schedule/scheduler.go b/internal/schedule/scheduler.go
new file mode 100644
index 0000000..ce7579a
--- /dev/null
+++ b/internal/schedule/scheduler.go
@@ -0,0 +1,336 @@
+package schedule
+
+import (
+	"context"
+	"sync"
+	"time"
+)
+
+// Runner executes one scheduled job's task and returns the agent's final text,
+// the tokens it consumed (for budgeting/telemetry; 0 if unknown), and any
+// error. Implementations live outside this package — the daemon and the
+// Telegram bot each build an agent-backed Runner — so the engine stays
+// decoupled from the agent and is trivially faked in tests.
+type Runner interface {
+	Run(ctx context.Context, job Job) (result string, tokens int64, err error)
+}
+
+// Deliverer routes a successful job result to its destination (Telegram chat,
+// stdout, a log file). It is called only when Run succeeded.
+type Deliverer interface {
+	Deliver(job Job, result string) error
+}
+
+// Logger is the minimal logging surface the engine needs, satisfied by the
+// Telegram file logger and by NopLogger. Key/value variadics mirror slog.
+type Logger interface {
+	Info(msg string, kv ...any)
+	Error(msg string, kv ...any)
+}
+
+// NopLogger discards all log output.
+type NopLogger struct{}
+
+func (NopLogger) Info(string, ...any)  {}
+func (NopLogger) Error(string, ...any) {}
+
+// Options configures a Scheduler. Zero values fall back to sensible defaults.
+type Options struct {
+	MaxConcurrent int              // max jobs running at once (default 2)
+	DefaultTZ     *time.Location   // timezone for jobs with no Timezone set (default UTC)
+	Catchup       bool             // global default: run a job once if a fire was missed while down
+	ReloadEvery   time.Duration    // how often to poll schedules.json mtime for changes (default 30s)
+	Logger        Logger           // defaults to NopLogger
+	Now           func() time.Time // injectable clock for decisions (default time.Now); tests override
+}
+
+const (
+	defaultMaxConcurrent = 2
+	defaultReloadEvery   = 30 * time.Second
+	maxSleep             = time.Hour // cap on a single idle sleep so the loop stays responsive
+	resultPreviewRunes   = 280       // how much of a result we persist as LastResult
+)
+
+// Scheduler fires jobs from a Store on their cron schedule, runs them through
+// a Runner, and routes results through a Deliverer. It is safe for a single
+// Run call; do not call Run concurrently on the same Scheduler.
+type Scheduler struct {
+	store     *Store
+	runner    Runner
+	deliverer Deliverer
+	opts      Options
+	log       Logger
+
+	mu       sync.Mutex
+	jobs     map[string]Job       // id → latest definition
+	compiled map[string]*Schedule // id → parsed cron
+	sig      map[string]string    // id → cron|tz signature, to detect changes on reload
+	next     map[string]time.Time // id → next fire time
+	runs     map[string]int       // id → total fires so far
+	running  map[string]bool      // id → currently executing (overlap guard)
+
+	sem chan struct{}  // bounds concurrent executions
+	wg  sync.WaitGroup // tracks in-flight executions for graceful drain
+}
+
+// New builds a Scheduler. The store, runner, and deliverer are required.
+func New(store *Store, runner Runner, deliverer Deliverer, opts Options) *Scheduler {
+	if opts.MaxConcurrent <= 0 {
+		opts.MaxConcurrent = defaultMaxConcurrent
+	}
+	if opts.DefaultTZ == nil {
+		opts.DefaultTZ = time.UTC
+	}
+	if opts.ReloadEvery <= 0 {
+		opts.ReloadEvery = defaultReloadEvery
+	}
+	if opts.Logger == nil {
+		opts.Logger = NopLogger{}
+	}
+	if opts.Now == nil {
+		opts.Now = time.Now
+	}
+	return &Scheduler{
+		store:     store,
+		runner:    runner,
+		deliverer: deliverer,
+		opts:      opts,
+		log:       opts.Logger,
+		jobs:      map[string]Job{},
+		compiled:  map[string]*Schedule{},
+		sig:       map[string]string{},
+		next:      map[string]time.Time{},
+		runs:      map[string]int{},
+		running:   map[string]bool{},
+		sem:       make(chan struct{}, opts.MaxConcurrent),
+	}
+}
+
+// Run drives the scheduler until ctx is cancelled. On cancellation it stops
+// scheduling new fires and waits for in-flight executions to finish before
+// returning ctx.Err().
+func (s *Scheduler) Run(ctx context.Context) error {
+	s.reconcile(s.opts.Now())
+	lastMod := s.store.ModTime()
+
+	reload := time.NewTicker(s.opts.ReloadEvery)
+	defer reload.Stop()
+
+	for {
+		now := s.opts.Now()
+		s.fireDue(ctx, now)
+
+		timer := time.NewTimer(s.timeToNext(now))
+		select {
+		case <-ctx.Done():
+			timer.Stop()
+			s.log.Info("scheduler: shutting down, draining in-flight jobs")
+			s.wg.Wait()
+			return ctx.Err()
+		case <-timer.C:
+			// Earliest fire is (about) due — loop and fireDue handles it.
+		case <-reload.C:
+			timer.Stop()
+			if m := s.store.ModTime(); m.After(lastMod) {
+				lastMod = m
+				s.log.Info("scheduler: schedules changed, reloading")
+				s.reconcile(s.opts.Now())
+			}
+		}
+	}
+}
+
+// Wait blocks until all in-flight executions complete. Intended for tests.
+func (s *Scheduler) Wait() { s.wg.Wait() }
+
+// reconcile loads job definitions, (re)compiles their schedules, and seeds the
+// next-fire time for any job not already tracked. Jobs that disappeared or
+// were disabled are dropped. It is called on startup and whenever the
+// schedules file changes.
+func (s *Scheduler) reconcile(now time.Time) {
+	jobs, err := s.store.List()
+	if err != nil {
+		s.log.Error("scheduler: list jobs failed", "error", err)
+		return
+	}
+	state, err := s.store.LoadState()
+	if err != nil {
+		s.log.Error("scheduler: load state failed", "error", err)
+		state = map[string]RunState{}
+	}
+
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	seen := make(map[string]bool, len(jobs))
+	for _, job := range jobs {
+		if !job.Enabled {
+			continue
+		}
+		sched, err := compile(job, s.opts.DefaultTZ)
+		if err != nil {
+			// A malformed job is skipped, not fatal — one bad entry must not
+			// stop every other schedule.
+			s.log.Error("scheduler: skipping job with invalid schedule", "id", job.ID, "name", job.Name, "error", err)
+			continue
+		}
+		seen[job.ID] = true
+		s.jobs[job.ID] = job
+		s.compiled[job.ID] = sched
+		s.runs[job.ID] = state[job.ID].Runs
+
+		newSig := job.Cron + "|" + job.Timezone
+		if _, tracked := s.next[job.ID]; tracked && s.sig[job.ID] == newSig {
+			// Unchanged and already scheduled — leave its next-fire intact so an
+			// unrelated file edit doesn't shift this job.
+			continue
+		}
+		s.sig[job.ID] = newSig
+
+		// Determine the first fire for a newly-seen or changed job, applying the
+		// missed-run policy against any persisted next-fire.
+		prevNext := state[job.ID].NextRun
+		catchup := job.Catchup || s.opts.Catchup
+		switch {
+		case !prevNext.IsZero() && prevNext.Before(now) && catchup:
+			// A fire was missed while we were down and catchup is on → run asap.
+			s.next[job.ID] = now
+		case !prevNext.IsZero() && prevNext.Before(now):
+			// Missed but no catchup → record the skip and move on.
+			s.next[job.ID] = sched.Next(now)
+			s.log.Info("scheduler: skipping missed fire", "id", job.ID, "name", job.Name)
+			_ = s.store.SaveState(RunState{
+				JobID: job.ID, LastStatus: StatusSkipped, LastRun: now,
+				NextRun: s.next[job.ID], Runs: s.runs[job.ID],
+			})
+		default:
+			s.next[job.ID] = sched.Next(now)
+		}
+	}
+
+	// Drop jobs that are gone or newly disabled.
+	for id := range s.next {
+		if !seen[id] {
+			delete(s.next, id)
+			delete(s.compiled, id)
+			delete(s.jobs, id)
+			delete(s.sig, id)
+			delete(s.runs, id)
+		}
+	}
+}
+
+// fireDue launches every job whose next-fire time is at or before now, then
+// advances each fired job's next-fire to the following occurrence. A job
+// already executing is not fired again (overlap guard); its schedule still
+// advances so it doesn't pile up.
+func (s *Scheduler) fireDue(ctx context.Context, now time.Time) {
+	s.mu.Lock()
+	var toFire []Job
+	for id, nt := range s.next {
+		if nt.After(now) {
+			continue
+		}
+		sched := s.compiled[id]
+		s.next[id] = sched.Next(now) // schedule the following fire regardless
+		if s.running[id] {
+			s.log.Info("scheduler: previous run still in flight, skipping this fire", "id", id)
+			continue
+		}
+		s.running[id] = true
+		s.runs[id]++
+		toFire = append(toFire, s.jobs[id])
+	}
+	s.mu.Unlock()
+
+	for _, job := range toFire {
+		s.sem <- struct{}{} // acquire (blocks if at MaxConcurrent)
+		s.wg.Add(1)
+		go func(job Job, firedAt time.Time) {
+			defer s.wg.Done()
+			defer func() { <-s.sem }()
+			s.execute(ctx, job, firedAt)
+		}(job, now)
+	}
+}
+
+// execute runs a single job, delivers its result, and persists run state.
+func (s *Scheduler) execute(ctx context.Context, job Job, firedAt time.Time) {
+	defer func() {
+		s.mu.Lock()
+		s.running[job.ID] = false
+		s.mu.Unlock()
+	}()
+
+	s.mu.Lock()
+	st := RunState{JobID: job.ID, LastRun: firedAt, Runs: s.runs[job.ID], NextRun: s.next[job.ID]}
+	s.mu.Unlock()
+
+	result, tokens, err := s.runner.Run(ctx, job)
+	switch {
+	case err != nil:
+		st.LastStatus = StatusError
+		st.LastError = err.Error()
+		s.log.Error("scheduler: job run failed", "id", job.ID, "name", job.Name, "error", err)
+	default:
+		if derr := s.deliverer.Deliver(job, result); derr != nil {
+			st.LastStatus = StatusError
+			st.LastError = "delivery: " + derr.Error()
+			s.log.Error("scheduler: delivery failed", "id", job.ID, "name", job.Name, "error", derr)
+		} else {
+			st.LastStatus = StatusOK
+			st.LastResult = preview(result)
+			s.log.Info("scheduler: job delivered", "id", job.ID, "name", job.Name, "tokens", tokens)
+		}
+	}
+	if serr := s.store.SaveState(st); serr != nil {
+		s.log.Error("scheduler: save state failed", "id", job.ID, "error", serr)
+	}
+}
+
+// timeToNext returns how long to sleep until the earliest pending fire,
+// clamped to [0, maxSleep]. With no jobs it returns maxSleep.
+func (s *Scheduler) timeToNext(now time.Time) time.Duration {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	earliest := time.Time{}
+	for _, nt := range s.next {
+		if earliest.IsZero() || nt.Before(earliest) {
+			earliest = nt
+		}
+	}
+	if earliest.IsZero() {
+		return maxSleep
+	}
+	d := earliest.Sub(now)
+	if d < 0 {
+		return 0
+	}
+	if d > maxSleep {
+		return maxSleep
+	}
+	return d
+}
+
+// compile parses a job's cron expression in its timezone (or the supplied
+// default when the job specifies none).
+func compile(job Job, defaultTZ *time.Location) (*Schedule, error) {
+	loc := defaultTZ
+	if job.Timezone != "" {
+		l, err := time.LoadLocation(job.Timezone)
+		if err != nil {
+			return nil, err
+		}
+		loc = l
+	}
+	return ParseInLocation(job.Cron, loc)
+}
+
+// preview truncates a result for storage in RunState.LastResult.
+func preview(s string) string {
+	r := []rune(s)
+	if len(r) <= resultPreviewRunes {
+		return s
+	}
+	return string(r[:resultPreviewRunes]) + "…"
+}
diff --git a/internal/schedule/scheduler_test.go b/internal/schedule/scheduler_test.go
new file mode 100644
index 0000000..8494e1a
--- /dev/null
+++ b/internal/schedule/scheduler_test.go
@@ -0,0 +1,369 @@
+package schedule
+
+import (
+	"context"
+	"strings"
+	"sync"
+	"testing"
+	"time"
+)
+
+// ── Test doubles ────────────────────────────────────────────────────────
+
+type fakeRunner struct {
+	mu      sync.Mutex
+	calls   []Job
+	result  string
+	err     error
+	block   chan struct{} // if non-nil, Run blocks on it (simulate a slow job)
+	started chan string   // if non-nil, receives job ID when Run begins
+}
+
+func (f *fakeRunner) Run(_ context.Context, job Job) (string, int64, error) {
+	f.mu.Lock()
+	f.calls = append(f.calls, job)
+	f.mu.Unlock()
+	if f.started != nil {
+		f.started <- job.ID
+	}
+	if f.block != nil {
+		<-f.block
+	}
+	return f.result, 7, f.err
+}
+
+func (f *fakeRunner) callCount() int {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+	return len(f.calls)
+}
+
+type fakeDeliverer struct {
+	mu        sync.Mutex
+	delivered []string
+	err       error
+}
+
+func (f *fakeDeliverer) Deliver(_ Job, result string) error {
+	f.mu.Lock()
+	defer f.mu.Unlock()
+	if f.err != nil {
+		return f.err
+	}
+	f.delivered = append(f.delivered, result)
+	return nil
+}
+
+// peekNext exposes a job's scheduled next-fire for assertions.
+func (s *Scheduler) peekNext(id string) time.Time {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return s.next[id]
+}
+
+// addJob is a helper that adds a job and returns its stored copy.
+func addJob(t *testing.T, st *Store, j Job) Job {
+	t.Helper()
+	got, err := st.Add(j)
+	if err != nil {
+		t.Fatalf("Add: %v", err)
+	}
+	return got
+}
+
+// ── fireDue ─────────────────────────────────────────────────────────────
+
+func TestFireDue_RunsAndDelivers(t *testing.T) {
+	st := newTestStore(t)
+	job := addJob(t, st, Job{Name: "j", Cron: "* * * * *", Task: "do it",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true})
+
+	runner := &fakeRunner{result: "hello"}
+	deliv := &fakeDeliverer{}
+	s := New(st, runner, deliv, Options{})
+
+	t0 := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
+	s.reconcile(t0)
+
+	// Not due yet (next is strictly after t0).
+	s.fireDue(context.Background(), t0)
+	s.Wait()
+	if runner.callCount() != 0 {
+		t.Fatalf("fired before due: %d calls", runner.callCount())
+	}
+
+	// Fire at the scheduled instant.
+	due := s.peekNext(job.ID)
+	s.fireDue(context.Background(), due)
+	s.Wait()
+
+	if runner.callCount() != 1 {
+		t.Fatalf("expected 1 run, got %d", runner.callCount())
+	}
+	if len(deliv.delivered) != 1 || deliv.delivered[0] != "hello" {
+		t.Fatalf("delivered = %v", deliv.delivered)
+	}
+	state, _ := st.LoadState()
+	if state[job.ID].LastStatus != StatusOK {
+		t.Errorf("status = %q, want ok", state[job.ID].LastStatus)
+	}
+	if state[job.ID].Runs != 1 {
+		t.Errorf("runs = %d, want 1", state[job.ID].Runs)
+	}
+	if state[job.ID].LastResult != "hello" {
+		t.Errorf("LastResult = %q", state[job.ID].LastResult)
+	}
+	// next must have advanced strictly past the fire instant.
+	if !s.peekNext(job.ID).After(due) {
+		t.Errorf("next did not advance after fire")
+	}
+}
+
+func TestFireDue_RunnerError(t *testing.T) {
+	st := newTestStore(t)
+	job := addJob(t, st, Job{Name: "j", Cron: "* * * * *", Task: "x",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true})
+	runner := &fakeRunner{err: context.DeadlineExceeded}
+	deliv := &fakeDeliverer{}
+	s := New(st, runner, deliv, Options{})
+
+	t0 := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
+	s.reconcile(t0)
+	s.fireDue(context.Background(), s.peekNext(job.ID))
+	s.Wait()
+
+	if len(deliv.delivered) != 0 {
+		t.Error("delivery should not happen when runner errored")
+	}
+	state, _ := st.LoadState()
+	if state[job.ID].LastStatus != StatusError || state[job.ID].LastError == "" {
+		t.Errorf("expected error state, got %+v", state[job.ID])
+	}
+}
+
+func TestFireDue_DeliveryError(t *testing.T) {
+	st := newTestStore(t)
+	job := addJob(t, st, Job{Name: "j", Cron: "* * * * *", Task: "x",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true})
+	runner := &fakeRunner{result: "ok"}
+	deliv := &fakeDeliverer{err: context.Canceled}
+	s := New(st, runner, deliv, Options{})
+
+	t0 := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
+	s.reconcile(t0)
+	s.fireDue(context.Background(), s.peekNext(job.ID))
+	s.Wait()
+
+	state, _ := st.LoadState()
+	if state[job.ID].LastStatus != StatusError {
+		t.Errorf("status = %q, want error", state[job.ID].LastStatus)
+	}
+	if !strings.HasPrefix(state[job.ID].LastError, "delivery:") {
+		t.Errorf("LastError = %q, want delivery: prefix", state[job.ID].LastError)
+	}
+}
+
+func TestFireDue_OverlapGuard(t *testing.T) {
+	st := newTestStore(t)
+	job := addJob(t, st, Job{Name: "slow", Cron: "* * * * *", Task: "x",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true})
+	runner := &fakeRunner{result: "ok", block: make(chan struct{}), started: make(chan string, 1)}
+	s := New(st, runner, &fakeDeliverer{}, Options{})
+
+	t0 := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
+	s.reconcile(t0)
+
+	// First fire starts and blocks inside the runner.
+	due1 := s.peekNext(job.ID)
+	s.fireDue(context.Background(), due1)
+	<-runner.started // ensure it's in flight
+
+	// A second due fire while the first is still running must be skipped.
+	due2 := s.peekNext(job.ID)
+	s.fireDue(context.Background(), due2)
+	if c := runner.callCount(); c != 1 {
+		t.Fatalf("overlap guard failed: %d concurrent runs", c)
+	}
+
+	close(runner.block) // let the first finish
+	s.Wait()
+	if c := runner.callCount(); c != 1 {
+		t.Errorf("expected exactly 1 run total, got %d", c)
+	}
+}
+
+// ── Missed-run policy ───────────────────────────────────────────────────
+
+func TestReconcile_MissedSkip(t *testing.T) {
+	st := newTestStore(t)
+	job := addJob(t, st, Job{Name: "j", Cron: "0 9 * * *", Task: "x",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true, Catchup: false})
+	// Pretend a fire was due in the past while we were down.
+	past := time.Date(2026, 6, 3, 9, 0, 0, 0, time.UTC)
+	_ = st.SaveState(RunState{JobID: job.ID, NextRun: past})
+
+	runner := &fakeRunner{}
+	s := New(st, runner, &fakeDeliverer{}, Options{})
+	now := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
+	s.reconcile(now)
+
+	// Next must be in the future (forward-scheduled), not the missed instant.
+	if !s.peekNext(job.ID).After(now) {
+		t.Errorf("missed fire not skipped forward: next=%v", s.peekNext(job.ID))
+	}
+	s.fireDue(context.Background(), now)
+	s.Wait()
+	if runner.callCount() != 0 {
+		t.Error("missed fire should not run when catchup is off")
+	}
+	state, _ := st.LoadState()
+	if state[job.ID].LastStatus != StatusSkipped {
+		t.Errorf("status = %q, want skipped", state[job.ID].LastStatus)
+	}
+}
+
+func TestReconcile_MissedCatchup(t *testing.T) {
+	st := newTestStore(t)
+	job := addJob(t, st, Job{Name: "j", Cron: "0 9 * * *", Task: "x",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true, Catchup: true})
+	past := time.Date(2026, 6, 3, 9, 0, 0, 0, time.UTC)
+	_ = st.SaveState(RunState{JobID: job.ID, NextRun: past})
+
+	runner := &fakeRunner{result: "caught up"}
+	deliv := &fakeDeliverer{}
+	s := New(st, runner, deliv, Options{})
+	now := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
+	s.reconcile(now)
+
+	// Catchup schedules an immediate fire.
+	if s.peekNext(job.ID).After(now) {
+		t.Fatalf("catchup did not schedule an immediate fire: next=%v", s.peekNext(job.ID))
+	}
+	s.fireDue(context.Background(), now)
+	s.Wait()
+	if runner.callCount() != 1 {
+		t.Errorf("catchup did not run the missed job: %d calls", runner.callCount())
+	}
+}
+
+// ── Reconcile lifecycle ─────────────────────────────────────────────────
+
+func TestReconcile_DropsDisabled(t *testing.T) {
+	st := newTestStore(t)
+	job := addJob(t, st, Job{Name: "j", Cron: "* * * * *", Task: "x",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true})
+	s := New(st, &fakeRunner{}, &fakeDeliverer{}, Options{})
+	now := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
+	s.reconcile(now)
+	if s.peekNext(job.ID).IsZero() {
+		t.Fatal("job not tracked after reconcile")
+	}
+	if err := st.SetEnabled(job.ID, false); err != nil {
+		t.Fatalf("SetEnabled: %v", err)
+	}
+	s.reconcile(now)
+	if !s.peekNext(job.ID).IsZero() {
+		t.Error("disabled job not dropped from schedule")
+	}
+}
+
+func TestReconcile_UnchangedKeepsNextFire(t *testing.T) {
+	st := newTestStore(t)
+	a := addJob(t, st, Job{Name: "a", Cron: "0 9 * * *", Task: "x",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true})
+	s := New(st, &fakeRunner{}, &fakeDeliverer{}, Options{})
+	now := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
+	s.reconcile(now)
+	firstNext := s.peekNext(a.ID)
+
+	// Adding an unrelated job and reconciling again must not shift a's fire.
+	addJob(t, st, Job{Name: "b", Cron: "0 10 * * *", Task: "y",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true})
+	s.reconcile(now.Add(time.Minute))
+	if !s.peekNext(a.ID).Equal(firstNext) {
+		t.Errorf("unrelated reconcile shifted job a: %v != %v", s.peekNext(a.ID), firstNext)
+	}
+}
+
+func TestReconcile_SkipsInvalidJobWrittenDirectly(t *testing.T) {
+	// A malformed job that bypassed Validate (e.g. hand-edited file) must be
+	// skipped without aborting the reconcile of healthy jobs.
+	st := newTestStore(t)
+	good := addJob(t, st, Job{Name: "good", Cron: "* * * * *", Task: "x",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true})
+	// Write a bad job directly into the doc, sidestepping Add's validation.
+	doc, _ := st.loadDoc()
+	doc.Jobs = append(doc.Jobs, Job{ID: "jb-bad", Name: "bad", Cron: "not-a-cron",
+		Task: "x", Deliver: Delivery{Kind: DeliverStdout}, Enabled: true})
+	_ = st.saveDoc(doc)
+
+	s := New(st, &fakeRunner{}, &fakeDeliverer{}, Options{})
+	s.reconcile(time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC))
+	if s.peekNext(good.ID).IsZero() {
+		t.Error("good job dropped because a sibling was invalid")
+	}
+	if !s.peekNext("jb-bad").IsZero() {
+		t.Error("invalid job should not be scheduled")
+	}
+}
+
+func TestReconcile_JobTimezone(t *testing.T) {
+	berlin, err := time.LoadLocation("Europe/Berlin")
+	if err != nil {
+		t.Skipf("tz data unavailable: %v", err)
+	}
+	st := newTestStore(t)
+	// Daily 09:00 Berlin; the engine's DefaultTZ is UTC, so the job's own
+	// Timezone must win when compiling.
+	job := addJob(t, st, Job{Name: "tz", Cron: "0 9 * * *", Task: "x",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true, Timezone: "Europe/Berlin"})
+	s := New(st, &fakeRunner{}, &fakeDeliverer{}, Options{DefaultTZ: time.UTC})
+
+	// 06:00 UTC = 08:00 Berlin (CEST) → next fire is 09:00 Berlin today.
+	now := time.Date(2026, 6, 4, 6, 0, 0, 0, time.UTC)
+	s.reconcile(now)
+	got := s.peekNext(job.ID)
+	want := time.Date(2026, 6, 4, 9, 0, 0, 0, berlin)
+	if !got.Equal(want) {
+		t.Errorf("next = %v, want %v (job timezone must override DefaultTZ)", got.In(berlin), want)
+	}
+}
+
+// ── Run lifecycle ───────────────────────────────────────────────────────
+
+func TestRun_FiresThenStopsCleanly(t *testing.T) {
+	st := newTestStore(t)
+	// Far-future cron, but a missed catchup fire forces an immediate run on
+	// startup — so we exercise Run's real loop without waiting a wall minute.
+	job := addJob(t, st, Job{Name: "j", Cron: "0 0 1 1 *", Task: "x",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true, Catchup: true})
+	_ = st.SaveState(RunState{JobID: job.ID, NextRun: time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC)})
+
+	runner := &fakeRunner{result: "ok", started: make(chan string, 1)}
+	s := New(st, runner, &fakeDeliverer{}, Options{ReloadEvery: 20 * time.Millisecond})
+
+	ctx, cancel := context.WithCancel(context.Background())
+	done := make(chan error, 1)
+	go func() { done <- s.Run(ctx) }()
+
+	select {
+	case <-runner.started:
+		// fired as expected
+	case <-time.After(2 * time.Second):
+		cancel()
+		t.Fatal("scheduler did not fire the catchup job")
+	}
+
+	cancel()
+	select {
+	case err := <-done:
+		if err != context.Canceled {
+			t.Errorf("Run returned %v, want context.Canceled", err)
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatal("Run did not return after cancel")
+	}
+	if runner.callCount() != 1 {
+		t.Errorf("expected 1 run, got %d", runner.callCount())
+	}
+}
diff --git a/internal/schedule/store.go b/internal/schedule/store.go
new file mode 100644
index 0000000..9f792c4
--- /dev/null
+++ b/internal/schedule/store.go
@@ -0,0 +1,362 @@
+package schedule
+
+import (
+	"crypto/rand"
+	"encoding/hex"
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"sort"
+	"sync"
+	"time"
+)
+
+// File names under ~/.odek.
+const (
+	schedulesFile = "schedules.json"      // job definitions
+	stateFile     = "schedule-state.json" // runtime state, keyed by job ID
+)
+
+// scheduleDoc is the on-disk shape of schedules.json. Wrapping the slice in a
+// versioned object leaves room to evolve the format without a breaking change.
+type scheduleDoc struct {
+	Version int   `json:"version"`
+	Jobs    []Job `json:"jobs"`
+}
+
+// stateDoc is the on-disk shape of schedule-state.json.
+type stateDoc struct {
+	Version int                 `json:"version"`
+	States  map[string]RunState `json:"states"`
+}
+
+// Store persists schedule definitions and runtime state as two JSON files
+// under a directory (normally ~/.odek). It is a thin, mutex-guarded file
+// manager in the same spirit as session.Store: all Job fields are public, so
+// callers read a Job, mutate it, and write it back.
+type Store struct {
+	dir string
+	mu  sync.Mutex
+}
+
+// NewStore opens the schedule store rooted at ~/.odek, creating the directory
+// if needed.
+func NewStore() (*Store, error) {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return nil, fmt.Errorf("schedule: home dir: %w", err)
+	}
+	return NewStoreAt(filepath.Join(home, ".odek"))
+}
+
+// NewStoreAt opens the schedule store rooted at dir. Used by tests and by
+// callers that resolve ~/.odek themselves.
+func NewStoreAt(dir string) (*Store, error) {
+	if err := os.MkdirAll(dir, 0755); err != nil {
+		return nil, fmt.Errorf("schedule: create dir: %w", err)
+	}
+	return &Store{dir: dir}, nil
+}
+
+// ── Validation ──────────────────────────────────────────────────────────
+
+// Validate checks that a job is well-formed enough to persist and run: a
+// parseable cron expression, a known delivery kind, a non-empty task, and a
+// loadable timezone if one is set. It does not assign IDs or defaults.
+func (j Job) Validate() error {
+	if j.Task == "" {
+		return fmt.Errorf("schedule: job %q has an empty task", j.Name)
+	}
+	loc := time.UTC
+	if j.Timezone != "" {
+		l, err := time.LoadLocation(j.Timezone)
+		if err != nil {
+			return fmt.Errorf("schedule: job %q: invalid timezone %q: %w", j.Name, j.Timezone, err)
+		}
+		loc = l
+	}
+	if _, err := ParseInLocation(j.Cron, loc); err != nil {
+		return fmt.Errorf("schedule: job %q: %w", j.Name, err)
+	}
+	switch j.Deliver.Kind {
+	case DeliverTelegram, DeliverStdout, DeliverLog:
+	case "":
+		return fmt.Errorf("schedule: job %q has no delivery kind", j.Name)
+	default:
+		return fmt.Errorf("schedule: job %q has unknown delivery kind %q", j.Name, j.Deliver.Kind)
+	}
+	return nil
+}
+
+// ── Job CRUD ────────────────────────────────────────────────────────────
+
+// Add validates and appends a job. If job.ID is empty a stable ID is
+// generated; if job.CreatedAt is zero it is stamped with now. The stored job
+// (with ID/CreatedAt filled in) is returned.
+func (s *Store) Add(job Job) (Job, error) {
+	if err := job.Validate(); err != nil {
+		return Job{}, err
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	doc, err := s.loadDoc()
+	if err != nil {
+		return Job{}, err
+	}
+	if job.ID == "" {
+		job.ID = newJobID()
+	}
+	for _, existing := range doc.Jobs {
+		if existing.ID == job.ID {
+			return Job{}, fmt.Errorf("schedule: job ID %q already exists", job.ID)
+		}
+	}
+	if job.CreatedAt.IsZero() {
+		job.CreatedAt = time.Now().UTC()
+	}
+	doc.Jobs = append(doc.Jobs, job)
+	if err := s.saveDoc(doc); err != nil {
+		return Job{}, err
+	}
+	return job, nil
+}
+
+// List returns all jobs, sorted by creation time then ID for stable output.
+func (s *Store) List() ([]Job, error) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	doc, err := s.loadDoc()
+	if err != nil {
+		return nil, err
+	}
+	sort.Slice(doc.Jobs, func(i, j int) bool {
+		if doc.Jobs[i].CreatedAt.Equal(doc.Jobs[j].CreatedAt) {
+			return doc.Jobs[i].ID < doc.Jobs[j].ID
+		}
+		return doc.Jobs[i].CreatedAt.Before(doc.Jobs[j].CreatedAt)
+	})
+	return doc.Jobs, nil
+}
+
+// Get returns the job with the given ID. The bool reports whether it was found.
+func (s *Store) Get(id string) (Job, bool, error) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	doc, err := s.loadDoc()
+	if err != nil {
+		return Job{}, false, err
+	}
+	for _, j := range doc.Jobs {
+		if j.ID == id {
+			return j, true, nil
+		}
+	}
+	return Job{}, false, nil
+}
+
+// Put upserts a job by ID: it replaces an existing job with the same ID, or
+// appends it if absent. The job is validated first.
+func (s *Store) Put(job Job) error {
+	if err := job.Validate(); err != nil {
+		return err
+	}
+	if job.ID == "" {
+		return fmt.Errorf("schedule: Put requires a job ID")
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	doc, err := s.loadDoc()
+	if err != nil {
+		return err
+	}
+	for i := range doc.Jobs {
+		if doc.Jobs[i].ID == job.ID {
+			if job.CreatedAt.IsZero() {
+				job.CreatedAt = doc.Jobs[i].CreatedAt
+			}
+			doc.Jobs[i] = job
+			return s.saveDoc(doc)
+		}
+	}
+	if job.CreatedAt.IsZero() {
+		job.CreatedAt = time.Now().UTC()
+	}
+	doc.Jobs = append(doc.Jobs, job)
+	return s.saveDoc(doc)
+}
+
+// Remove deletes a job (and its runtime state) by ID. Removing a job that
+// does not exist returns an error so the CLI can report it.
+func (s *Store) Remove(id string) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	doc, err := s.loadDoc()
+	if err != nil {
+		return err
+	}
+	idx := -1
+	for i := range doc.Jobs {
+		if doc.Jobs[i].ID == id {
+			idx = i
+			break
+		}
+	}
+	if idx < 0 {
+		return fmt.Errorf("schedule: no job with ID %q", id)
+	}
+	doc.Jobs = append(doc.Jobs[:idx], doc.Jobs[idx+1:]...)
+	if err := s.saveDoc(doc); err != nil {
+		return err
+	}
+	// Best-effort cleanup of orphaned runtime state.
+	sd, err := s.loadState()
+	if err == nil {
+		if _, ok := sd.States[id]; ok {
+			delete(sd.States, id)
+			_ = s.saveState(sd)
+		}
+	}
+	return nil
+}
+
+// SetEnabled flips a job's Enabled flag.
+func (s *Store) SetEnabled(id string, enabled bool) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	doc, err := s.loadDoc()
+	if err != nil {
+		return err
+	}
+	for i := range doc.Jobs {
+		if doc.Jobs[i].ID == id {
+			doc.Jobs[i].Enabled = enabled
+			return s.saveDoc(doc)
+		}
+	}
+	return fmt.Errorf("schedule: no job with ID %q", id)
+}
+
+// ModTime returns the last-modified time of the schedules file, or the zero
+// time if it does not exist yet. The engine polls this for cheap hot-reload
+// detection without parsing the file.
+func (s *Store) ModTime() time.Time {
+	info, err := os.Stat(filepath.Join(s.dir, schedulesFile))
+	if err != nil {
+		return time.Time{}
+	}
+	return info.ModTime()
+}
+
+// ── Runtime state ───────────────────────────────────────────────────────
+
+// LoadState returns runtime state for all jobs, keyed by job ID. A missing
+// state file yields an empty (non-nil) map.
+func (s *Store) LoadState() (map[string]RunState, error) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	sd, err := s.loadState()
+	if err != nil {
+		return nil, err
+	}
+	return sd.States, nil
+}
+
+// SaveState writes (or replaces) the runtime state for a single job. Other
+// jobs' state is preserved.
+func (s *Store) SaveState(st RunState) error {
+	if st.JobID == "" {
+		return fmt.Errorf("schedule: SaveState requires a JobID")
+	}
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	sd, err := s.loadState()
+	if err != nil {
+		return err
+	}
+	sd.States[st.JobID] = st
+	return s.saveState(sd)
+}
+
+// ── Internal IO (callers hold s.mu) ─────────────────────────────────────
+
+func (s *Store) loadDoc() (*scheduleDoc, error) {
+	doc := &scheduleDoc{Version: 1}
+	if err := readJSON(filepath.Join(s.dir, schedulesFile), doc); err != nil {
+		return nil, err
+	}
+	return doc, nil
+}
+
+func (s *Store) saveDoc(doc *scheduleDoc) error {
+	if doc.Version == 0 {
+		doc.Version = 1
+	}
+	return writeJSONAtomic(filepath.Join(s.dir, schedulesFile), doc)
+}
+
+func (s *Store) loadState() (*stateDoc, error) {
+	sd := &stateDoc{Version: 1, States: map[string]RunState{}}
+	if err := readJSON(filepath.Join(s.dir, stateFile), sd); err != nil {
+		return nil, err
+	}
+	if sd.States == nil {
+		sd.States = map[string]RunState{}
+	}
+	return sd, nil
+}
+
+func (s *Store) saveState(sd *stateDoc) error {
+	if sd.Version == 0 {
+		sd.Version = 1
+	}
+	return writeJSONAtomic(filepath.Join(s.dir, stateFile), sd)
+}
+
+// readJSON decodes path into v. A missing file is not an error — v is left at
+// its zero/default value so callers start from an empty document.
+func readJSON(path string, v any) error {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil
+		}
+		return fmt.Errorf("schedule: read %s: %w", filepath.Base(path), err)
+	}
+	if len(data) == 0 {
+		return nil
+	}
+	if err := json.Unmarshal(data, v); err != nil {
+		return fmt.Errorf("schedule: parse %s: %w", filepath.Base(path), err)
+	}
+	return nil
+}
+
+// writeJSONAtomic marshals v and writes it to path via a temp file + rename,
+// so a reader never observes a half-written file and a swapped-in symlink is
+// replaced rather than followed. Files are 0600 since tasks may reference
+// secrets.
+func writeJSONAtomic(path string, v any) error {
+	data, err := json.MarshalIndent(v, "", "  ")
+	if err != nil {
+		return fmt.Errorf("schedule: marshal %s: %w", filepath.Base(path), err)
+	}
+	tmp := path + ".tmp"
+	if err := os.WriteFile(tmp, data, 0600); err != nil {
+		os.Remove(tmp)
+		return fmt.Errorf("schedule: write %s: %w", filepath.Base(path), err)
+	}
+	if err := os.Rename(tmp, path); err != nil {
+		os.Remove(tmp)
+		return fmt.Errorf("schedule: rename %s: %w", filepath.Base(path), err)
+	}
+	return nil
+}
+
+// newJobID returns a stable, collision-resistant job ID like "jb-1a2b3c4d".
+func newJobID() string {
+	buf := make([]byte, 4)
+	rand.Read(buf) //nolint:errcheck // crypto/rand.Read never returns an error on supported platforms
+	return "jb-" + hex.EncodeToString(buf)
+}
diff --git a/internal/schedule/store_test.go b/internal/schedule/store_test.go
new file mode 100644
index 0000000..31c60fc
--- /dev/null
+++ b/internal/schedule/store_test.go
@@ -0,0 +1,293 @@
+package schedule
+
+import (
+	"os"
+	"path/filepath"
+	"sync"
+	"testing"
+	"time"
+)
+
+func newTestStore(t *testing.T) *Store {
+	t.Helper()
+	st, err := NewStoreAt(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStoreAt: %v", err)
+	}
+	return st
+}
+
+func sampleJob() Job {
+	return Job{
+		Name:    "standup",
+		Cron:    "0 9 * * 1-5",
+		Task:    "Remind me about standup",
+		Deliver: Delivery{Kind: DeliverStdout},
+		Enabled: true,
+	}
+}
+
+// ── Validation ──────────────────────────────────────────────────────────
+
+func TestValidate(t *testing.T) {
+	tests := []struct {
+		name    string
+		mutate  func(*Job)
+		wantErr bool
+	}{
+		{"valid", func(*Job) {}, false},
+		{"empty task", func(j *Job) { j.Task = "" }, true},
+		{"bad cron", func(j *Job) { j.Cron = "nope" }, true},
+		{"bad timezone", func(j *Job) { j.Timezone = "Mars/Phobos" }, true},
+		{"good timezone", func(j *Job) { j.Timezone = "Europe/Berlin" }, false},
+		{"empty deliver kind", func(j *Job) { j.Deliver.Kind = "" }, true},
+		{"unknown deliver kind", func(j *Job) { j.Deliver.Kind = "carrier-pigeon" }, true},
+		{"telegram deliver", func(j *Job) { j.Deliver = Delivery{Kind: DeliverTelegram, ChatID: 42} }, false},
+	}
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			j := sampleJob()
+			tc.mutate(&j)
+			err := j.Validate()
+			if (err != nil) != tc.wantErr {
+				t.Errorf("Validate() err = %v, wantErr %v", err, tc.wantErr)
+			}
+		})
+	}
+}
+
+// ── CRUD ────────────────────────────────────────────────────────────────
+
+func TestAdd_AssignsIDAndCreatedAt(t *testing.T) {
+	st := newTestStore(t)
+	got, err := st.Add(sampleJob())
+	if err != nil {
+		t.Fatalf("Add: %v", err)
+	}
+	if got.ID == "" {
+		t.Error("Add did not assign an ID")
+	}
+	if got.CreatedAt.IsZero() {
+		t.Error("Add did not stamp CreatedAt")
+	}
+}
+
+func TestAdd_RejectsInvalid(t *testing.T) {
+	st := newTestStore(t)
+	bad := sampleJob()
+	bad.Cron = "garbage"
+	if _, err := st.Add(bad); err == nil {
+		t.Error("Add accepted an invalid job")
+	}
+	// Nothing should have been persisted.
+	jobs, _ := st.List()
+	if len(jobs) != 0 {
+		t.Errorf("expected 0 jobs after rejected Add, got %d", len(jobs))
+	}
+}
+
+func TestAdd_DuplicateID(t *testing.T) {
+	st := newTestStore(t)
+	j := sampleJob()
+	j.ID = "jb-fixed"
+	if _, err := st.Add(j); err != nil {
+		t.Fatalf("first Add: %v", err)
+	}
+	if _, err := st.Add(j); err == nil {
+		t.Error("Add accepted a duplicate ID")
+	}
+}
+
+func TestGetAndList(t *testing.T) {
+	st := newTestStore(t)
+	a, _ := st.Add(sampleJob())
+	b := sampleJob()
+	b.Name = "second"
+	bAdded, _ := st.Add(b)
+
+	got, ok, err := st.Get(a.ID)
+	if err != nil || !ok {
+		t.Fatalf("Get(%s) ok=%v err=%v", a.ID, ok, err)
+	}
+	if got.Name != "standup" {
+		t.Errorf("Get returned wrong job: %+v", got)
+	}
+
+	_, ok, _ = st.Get("jb-missing")
+	if ok {
+		t.Error("Get returned ok for a missing ID")
+	}
+
+	jobs, _ := st.List()
+	if len(jobs) != 2 {
+		t.Fatalf("List len = %d, want 2", len(jobs))
+	}
+	_ = bAdded
+}
+
+func TestPut_UpsertAndPreservesCreatedAt(t *testing.T) {
+	st := newTestStore(t)
+	a, _ := st.Add(sampleJob())
+	created := a.CreatedAt
+
+	a.Task = "updated task"
+	a.CreatedAt = time.Time{} // simulate a caller that didn't carry it
+	if err := st.Put(a); err != nil {
+		t.Fatalf("Put: %v", err)
+	}
+	got, _, _ := st.Get(a.ID)
+	if got.Task != "updated task" {
+		t.Errorf("Put did not update task: %q", got.Task)
+	}
+	if !got.CreatedAt.Equal(created) {
+		t.Errorf("Put did not preserve CreatedAt: got %v want %v", got.CreatedAt, created)
+	}
+
+	// Put with a fresh ID inserts.
+	n := sampleJob()
+	n.ID = "jb-new"
+	if err := st.Put(n); err != nil {
+		t.Fatalf("Put insert: %v", err)
+	}
+	if jobs, _ := st.List(); len(jobs) != 2 {
+		t.Errorf("expected 2 jobs after Put-insert, got %d", len(jobs))
+	}
+}
+
+func TestRemove(t *testing.T) {
+	st := newTestStore(t)
+	a, _ := st.Add(sampleJob())
+	// Seed runtime state so we can confirm it is cleaned up.
+	if err := st.SaveState(RunState{JobID: a.ID, LastStatus: StatusOK}); err != nil {
+		t.Fatalf("SaveState: %v", err)
+	}
+	if err := st.Remove(a.ID); err != nil {
+		t.Fatalf("Remove: %v", err)
+	}
+	if jobs, _ := st.List(); len(jobs) != 0 {
+		t.Errorf("expected 0 jobs after Remove, got %d", len(jobs))
+	}
+	states, _ := st.LoadState()
+	if _, ok := states[a.ID]; ok {
+		t.Error("Remove did not clean up runtime state")
+	}
+	if err := st.Remove("jb-missing"); err == nil {
+		t.Error("Remove of missing ID should error")
+	}
+}
+
+func TestSetEnabled(t *testing.T) {
+	st := newTestStore(t)
+	a, _ := st.Add(sampleJob())
+	if err := st.SetEnabled(a.ID, false); err != nil {
+		t.Fatalf("SetEnabled: %v", err)
+	}
+	got, _, _ := st.Get(a.ID)
+	if got.Enabled {
+		t.Error("SetEnabled(false) did not disable the job")
+	}
+	if err := st.SetEnabled("jb-missing", true); err == nil {
+		t.Error("SetEnabled on missing ID should error")
+	}
+}
+
+// ── Persistence / round-trip ────────────────────────────────────────────
+
+func TestPersistenceRoundTrip(t *testing.T) {
+	dir := t.TempDir()
+	st1, _ := NewStoreAt(dir)
+	a, _ := st1.Add(sampleJob())
+
+	// Re-open from the same dir — data must survive.
+	st2, _ := NewStoreAt(dir)
+	got, ok, err := st2.Get(a.ID)
+	if err != nil || !ok {
+		t.Fatalf("reopened Get ok=%v err=%v", ok, err)
+	}
+	if got.Cron != "0 9 * * 1-5" || got.Name != "standup" {
+		t.Errorf("round-trip mismatch: %+v", got)
+	}
+}
+
+func TestState_RoundTripAndIsolation(t *testing.T) {
+	st := newTestStore(t)
+	now := time.Now().UTC().Truncate(time.Second)
+	if err := st.SaveState(RunState{JobID: "jb-1", LastStatus: StatusOK, LastRun: now}); err != nil {
+		t.Fatalf("SaveState 1: %v", err)
+	}
+	if err := st.SaveState(RunState{JobID: "jb-2", LastStatus: StatusError, LastError: "boom"}); err != nil {
+		t.Fatalf("SaveState 2: %v", err)
+	}
+	// Updating jb-2 must not disturb jb-1.
+	if err := st.SaveState(RunState{JobID: "jb-2", LastStatus: StatusOK}); err != nil {
+		t.Fatalf("SaveState 2b: %v", err)
+	}
+	states, _ := st.LoadState()
+	// jb-1 must be untouched by jb-2's writes.
+	if states["jb-1"].LastStatus != StatusOK || !states["jb-1"].LastRun.Equal(now) {
+		t.Errorf("jb-1 state corrupted: %+v", states["jb-1"])
+	}
+	// jb-2 was re-saved; a fresh write replaces the whole entry, so the prior
+	// LastError ("boom") must be gone.
+	if states["jb-2"].LastStatus != StatusOK {
+		t.Errorf("jb-2 status not updated: %+v", states["jb-2"])
+	}
+	if states["jb-2"].LastError != "" {
+		t.Errorf("jb-2 LastError should be cleared by full-entry replace, got %q", states["jb-2"].LastError)
+	}
+	if err := st.SaveState(RunState{}); err == nil {
+		t.Error("SaveState with empty JobID should error")
+	}
+}
+
+func TestModTime(t *testing.T) {
+	st := newTestStore(t)
+	if !st.ModTime().IsZero() {
+		t.Error("ModTime should be zero before any write")
+	}
+	if _, err := st.Add(sampleJob()); err != nil {
+		t.Fatalf("Add: %v", err)
+	}
+	if st.ModTime().IsZero() {
+		t.Error("ModTime should be set after a write")
+	}
+}
+
+// ── Atomicity / no temp leftovers ───────────────────────────────────────
+
+func TestAtomicWrite_NoTempLeftover(t *testing.T) {
+	dir := t.TempDir()
+	st, _ := NewStoreAt(dir)
+	if _, err := st.Add(sampleJob()); err != nil {
+		t.Fatalf("Add: %v", err)
+	}
+	entries, _ := os.ReadDir(dir)
+	for _, e := range entries {
+		if filepath.Ext(e.Name()) == ".tmp" {
+			t.Errorf("temp file left behind: %s", e.Name())
+		}
+	}
+}
+
+// ── Concurrency (run with -race) ────────────────────────────────────────
+
+func TestConcurrentStateWrites(t *testing.T) {
+	st := newTestStore(t)
+	var wg sync.WaitGroup
+	for i := range 20 {
+		wg.Add(1)
+		go func(n int) {
+			defer wg.Done()
+			id := "jb-" + string(rune('a'+n%5))
+			_ = st.SaveState(RunState{JobID: id, Runs: n, LastStatus: StatusOK})
+		}(i)
+	}
+	wg.Wait()
+	states, err := st.LoadState()
+	if err != nil {
+		t.Fatalf("LoadState: %v", err)
+	}
+	if len(states) != 5 {
+		t.Errorf("expected 5 distinct job states, got %d", len(states))
+	}
+}
diff --git a/internal/schedule/types.go b/internal/schedule/types.go
new file mode 100644
index 0000000..50e9228
--- /dev/null
+++ b/internal/schedule/types.go
@@ -0,0 +1,74 @@
+// Package schedule provides a native, in-process task scheduler for odek.
+//
+// It runs agent tasks on a cron schedule from inside a long-lived process
+// (the Telegram bot, the `odek schedule daemon`, or `odek serve`) and
+// delivers each result somewhere (Telegram, stdout, a log file). Running
+// in-process is the whole point: the host process has already resolved its
+// configuration (API key, model, bot token, default chat) into memory, so a
+// scheduled task sees exactly what an interactive one does — no environment
+// inheritance games, no external cron daemon, no container-only behaviour.
+//
+// The package is deliberately decoupled from the agent and Telegram packages.
+// The firing engine (Scheduler) talks to the rest of odek through two small
+// interfaces, Runner and Deliverer, so it can be unit-tested against fakes
+// and reused by every host process unchanged.
+//
+// Layout on disk (mirrors the rest of ~/.odek):
+//
+//	~/.odek/schedules.json        job definitions (user-editable, 0600)
+//	~/.odek/schedule-state.json   runtime state: last/next run, status (0600)
+//
+// Definitions and runtime state are kept in separate files on purpose: the
+// definitions file is something a human edits or the CLI rewrites, while the
+// state file churns on every fire. Keeping them apart means a hand-edit never
+// races with a state write and the definitions file stays diff-clean.
+package schedule
+
+import "time"
+
+// Delivery kinds. A job's result is routed to exactly one destination.
+const (
+	DeliverTelegram = "telegram" // send via the bot to ChatID (0 = default_chat_id)
+	DeliverStdout   = "stdout"   // print to the daemon's stdout
+	DeliverLog      = "log"      // append to the schedule run log
+)
+
+// Run-status values recorded in RunState.LastStatus.
+const (
+	StatusOK      = "ok"      // task ran and delivered
+	StatusError   = "error"   // task or delivery failed (see LastError)
+	StatusSkipped = "skipped" // a due fire was intentionally not run (e.g. missed while down, catchup off)
+)
+
+// Delivery describes where a job's result is sent.
+type Delivery struct {
+	Kind   string `json:"kind"`              // one of the Deliver* constants
+	ChatID int64  `json:"chat_id,omitempty"` // telegram only; 0 = use the configured default_chat_id
+}
+
+// Job is a single scheduled agent task. Definitions live in schedules.json.
+// All fields are exported so the CLI layer can construct and mutate jobs
+// directly, matching the convention used by session.Session.
+type Job struct {
+	ID        string    `json:"id"`                 // stable short id, e.g. "jb-ab12cd"
+	Name      string    `json:"name"`               // human-readable label
+	Cron      string    `json:"cron"`               // 5-field expression or @macro (see cronexpr.go)
+	Task      string    `json:"task"`               // the prompt handed to the agent
+	Deliver   Delivery  `json:"deliver"`            // where the result goes
+	Enabled   bool      `json:"enabled"`            // disabled jobs are parsed but never fired
+	Catchup   bool      `json:"catchup,omitempty"`  // if a fire was missed while the process was down, run once on startup
+	Timezone  string    `json:"timezone,omitempty"` // IANA name (e.g. "Europe/Berlin"); "" = scheduler default
+	CreatedAt time.Time `json:"created_at"`         // when the job was added
+}
+
+// RunState is the mutable runtime state for one job, persisted in
+// schedule-state.json keyed by Job.ID. It is updated after every fire.
+type RunState struct {
+	JobID      string    `json:"job_id"`
+	LastRun    time.Time `json:"last_run,omitzero"`     // omitzero (not omitempty) — time.Time is a struct
+	LastStatus string    `json:"last_status,omitempty"` // one of the Status* constants
+	LastError  string    `json:"last_error,omitempty"`  // populated when LastStatus == StatusError
+	LastResult string    `json:"last_result,omitempty"` // truncated preview of the delivered text
+	NextRun    time.Time `json:"next_run,omitzero"`     // computed projected next fire
+	Runs       int       `json:"runs,omitempty"`        // total successful + failed fires
+}

From f6753e2261e7c82183ec064d4195b4c4ec06798d Mon Sep 17 00:00:00 2001
From: Rolando Santamaria Maso <kyberneees@gmail.com>
Date: Thu, 4 Jun 2026 21:10:15 +0200
Subject: [PATCH 02/11] feat(schedule): odek schedule CLI + headless
 runner/deliverers (phase 2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wire the scheduler core into the CLI and give it a way to actually run tasks.

- cmd/odek/schedule.go: `odek schedule <list|add|rm|enable|disable|run|next|daemon>`.
  * add: flag-parsed (--name/--cron/--deliver/--tz/--catchup/--disabled) with
    a trailing task; validates and shows the next fire.
  * list: tabular view with computed next-fire (local time) and last status.
  * next: previews upcoming fires for a job ID or a raw expression.
  * run: fires one job immediately and delivers (test a job).
  * daemon: foreground scheduler with a singleton pid lock (refuses a second
    instance rather than usurping a live one) and graceful SIGINT/SIGTERM drain.
- runTaskHeadless: builds a fresh agent with a silent (io.Discard) renderer,
  interaction off, and no approver — the resolved danger policy governs what an
  unattended task may do, mirroring non-interactive `odek run`.
- agentRunner / cliDeliverer implement the schedule.Runner / schedule.Deliverer
  interfaces; delivery routes to stdout, ~/.odek/schedule.log, or Telegram
  (honouring a per-job chat ID, falling back to default_chat_id).
- dispatch + printUsage wired for the new command.

Tests cover parseDeliver, deliverString, firstWords, jobSchedule, and the
deliverer branches (log append, telegram misconfig errors, unknown kind).
Smoke-tested end to end: add/list/next/enable/disable/rm, schedules.json at
0600, and daemon start → second-instance refused → clean SIGINT drain.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 cmd/odek/dispatch.go      |   2 +
 cmd/odek/main.go          |   5 +
 cmd/odek/schedule.go      | 532 ++++++++++++++++++++++++++++++++++++++
 cmd/odek/schedule_test.go | 135 ++++++++++
 4 files changed, 674 insertions(+)
 create mode 100644 cmd/odek/schedule.go
 create mode 100644 cmd/odek/schedule_test.go

diff --git a/cmd/odek/dispatch.go b/cmd/odek/dispatch.go
index 12be9cf..95c9db2 100644
--- a/cmd/odek/dispatch.go
+++ b/cmd/odek/dispatch.go
@@ -51,6 +51,8 @@ func dispatch(args []string) int {
 		return cliExit(mcpCmd(rest))
 	case "telegram":
 		return cliExit(telegramCmd(rest))
+	case "schedule":
+		return cliExit(scheduleCmd(rest))
 	default:
 		fmt.Fprintf(os.Stderr, "odek: unknown command %q\n", cmd)
 		printUsage()
diff --git a/cmd/odek/main.go b/cmd/odek/main.go
index 32155d8..d17f06e 100644
--- a/cmd/odek/main.go
+++ b/cmd/odek/main.go
@@ -476,6 +476,7 @@ func printUsage() {
   odek skill <list|view|save|delete|import|curate>
   odek mcp [--sandbox]
   odek telegram
+  odek schedule <list|add|rm|enable|disable|run|next|daemon>
   odek version
 
 Commands:
@@ -497,6 +498,10 @@ Commands:
   mcp                 Start MCP server (Model Context Protocol) over stdio
                         Exposes all built-in tools for Claude Code, Cursor, etc.
   telegram            Start Telegram bot (long-polling mode)
+  schedule            Manage native in-process scheduled tasks (cron)
+                       Subcommands: list, add, rm, enable, disable, run, next, daemon
+                       The daemon (or the Telegram bot) fires jobs and delivers
+                       results to stdout, a log, or a Telegram chat.
   init                Create a config file (default: ./odek.json)
   version             Print version and exit
 
diff --git a/cmd/odek/schedule.go b/cmd/odek/schedule.go
new file mode 100644
index 0000000..e10b87c
--- /dev/null
+++ b/cmd/odek/schedule.go
@@ -0,0 +1,532 @@
+package main
+
+import (
+	"context"
+	"flag"
+	"fmt"
+	"io"
+	"os"
+	"os/signal"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"syscall"
+	"text/tabwriter"
+	"time"
+
+	"github.com/BackendStack21/odek"
+	"github.com/BackendStack21/odek/internal/config"
+	"github.com/BackendStack21/odek/internal/llm"
+	"github.com/BackendStack21/odek/internal/render"
+	"github.com/BackendStack21/odek/internal/schedule"
+	"github.com/BackendStack21/odek/internal/telegram"
+)
+
+// scheduleCmd is the entry point for "odek schedule" — management of native,
+// in-process scheduled agent tasks (see internal/schedule).
+func scheduleCmd(args []string) error {
+	if len(args) == 0 {
+		printScheduleUsage()
+		return nil
+	}
+
+	// daemon and run resolve their own config; the rest only touch the store.
+	switch args[0] {
+	case "daemon":
+		return scheduleDaemon(args[1:])
+	case "run":
+		return scheduleRunNow(args[1:])
+	}
+
+	st, err := schedule.NewStore()
+	if err != nil {
+		return err
+	}
+	switch args[0] {
+	case "list", "ls":
+		return scheduleList(st)
+	case "add":
+		return scheduleAdd(st, args[1:])
+	case "rm", "remove", "delete":
+		return scheduleRemove(st, args[1:])
+	case "enable":
+		return scheduleSetEnabled(st, args[1:], true)
+	case "disable":
+		return scheduleSetEnabled(st, args[1:], false)
+	case "next":
+		return scheduleNext(st, args[1:])
+	default:
+		return fmt.Errorf("unknown schedule command %q (use list, add, rm, enable, disable, run, next, daemon)", args[0])
+	}
+}
+
+func printScheduleUsage() {
+	fmt.Println(`Usage: odek schedule <command>
+
+Commands:
+  list                      List scheduled jobs (id, next fire, last status)
+  add --cron "<expr>" <task>  Add a job (see flags below)
+  rm <id>                   Remove a job
+  enable <id>               Enable a job
+  disable <id>              Disable a job (kept, but never fires)
+  run <id>                  Run a job once now and deliver (test it)
+  next <id|cron-expr>       Show the next few fire times
+  daemon                    Run the scheduler in the foreground
+
+Add flags:
+  --name <label>            Human label (defaults to the first words of the task)
+  --cron "<expr>"           5-field cron or @macro (@hourly @daily @weekly @monthly @yearly)
+  --deliver <dest>          stdout (default) | log | telegram | telegram:<chatID>
+  --tz <IANA>               Timezone, e.g. Europe/Berlin (default UTC)
+  --catchup                 Run once on startup if a fire was missed while down
+  --disabled                Add without enabling
+
+Examples:
+  odek schedule add --cron "0 9 * * 1-5" --deliver telegram "Summarize today's calendar"
+  odek schedule next "*/15 * * * *"
+  odek schedule daemon`)
+}
+
+// ── list ────────────────────────────────────────────────────────────────
+
+func scheduleList(st *schedule.Store) error {
+	jobs, err := st.List()
+	if err != nil {
+		return err
+	}
+	if len(jobs) == 0 {
+		fmt.Println(`No scheduled jobs. Add one with:
+  odek schedule add --cron "0 9 * * 1-5" --deliver telegram "your task"`)
+		return nil
+	}
+	state, _ := st.LoadState()
+	now := time.Now()
+
+	w := tabwriter.NewWriter(os.Stdout, 0, 2, 2, ' ', 0)
+	fmt.Fprintln(w, "ID\tON\tCRON\tNEXT (local)\tLAST\tNAME")
+	for _, j := range jobs {
+		next := "—"
+		if s, err := jobSchedule(j); err != nil {
+			next = "invalid"
+		} else if nt := s.Next(now); !nt.IsZero() {
+			next = nt.Local().Format("Mon 02 Jan 15:04")
+		}
+		last := "—"
+		if rs, ok := state[j.ID]; ok && rs.LastStatus != "" {
+			last = rs.LastStatus
+		}
+		on := "yes"
+		if !j.Enabled {
+			on = "no"
+		}
+		fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%s\n", j.ID, on, j.Cron, next, last, j.Name)
+	}
+	return w.Flush()
+}
+
+// ── add ─────────────────────────────────────────────────────────────────
+
+func scheduleAdd(st *schedule.Store, args []string) error {
+	fs := flag.NewFlagSet("schedule add", flag.ContinueOnError)
+	name := fs.String("name", "", "human label")
+	cron := fs.String("cron", "", "cron expression or @macro")
+	deliver := fs.String("deliver", "stdout", "stdout | log | telegram | telegram:<chatID>")
+	tz := fs.String("tz", "", "IANA timezone (default UTC)")
+	catchup := fs.Bool("catchup", false, "run once on startup if a fire was missed while down")
+	disabled := fs.Bool("disabled", false, "add without enabling")
+	if err := fs.Parse(args); err != nil {
+		return err
+	}
+	task := strings.TrimSpace(strings.Join(fs.Args(), " "))
+	if *cron == "" || task == "" {
+		return fmt.Errorf(`usage: odek schedule add --cron "<expr>" [flags] <task>`)
+	}
+	del, err := parseDeliver(*deliver)
+	if err != nil {
+		return err
+	}
+	job := schedule.Job{
+		Name:     *name,
+		Cron:     *cron,
+		Task:     task,
+		Deliver:  del,
+		Timezone: *tz,
+		Catchup:  *catchup,
+		Enabled:  !*disabled,
+	}
+	if job.Name == "" {
+		job.Name = firstWords(task, 6)
+	}
+	saved, err := st.Add(job)
+	if err != nil {
+		return err
+	}
+	next := "—"
+	if s, err := jobSchedule(saved); err == nil {
+		if nt := s.Next(time.Now()); !nt.IsZero() {
+			next = nt.Local().Format(time.RFC1123)
+		}
+	}
+	state := "enabled"
+	if !saved.Enabled {
+		state = "disabled"
+	}
+	fmt.Printf("Added %s (%s, %s)\n  cron:    %s\n  deliver: %s\n  next:    %s\n",
+		saved.ID, saved.Name, state, saved.Cron, deliverString(saved.Deliver), next)
+	return nil
+}
+
+// ── rm / enable / disable ───────────────────────────────────────────────
+
+func scheduleRemove(st *schedule.Store, args []string) error {
+	if len(args) < 1 {
+		return fmt.Errorf("usage: odek schedule rm <id>")
+	}
+	if err := st.Remove(args[0]); err != nil {
+		return err
+	}
+	fmt.Printf("Removed %s\n", args[0])
+	return nil
+}
+
+func scheduleSetEnabled(st *schedule.Store, args []string, enabled bool) error {
+	verb := "enable"
+	if !enabled {
+		verb = "disable"
+	}
+	if len(args) < 1 {
+		return fmt.Errorf("usage: odek schedule %s <id>", verb)
+	}
+	if err := st.SetEnabled(args[0], enabled); err != nil {
+		return err
+	}
+	fmt.Printf("%sd %s\n", verb, args[0])
+	return nil
+}
+
+// ── next ────────────────────────────────────────────────────────────────
+
+func scheduleNext(st *schedule.Store, args []string) error {
+	if len(args) < 1 {
+		return fmt.Errorf(`usage: odek schedule next <id|"cron-expr">`)
+	}
+	var s *schedule.Schedule
+	if job, ok, _ := st.Get(args[0]); ok && len(args) == 1 {
+		sc, err := jobSchedule(job)
+		if err != nil {
+			return err
+		}
+		s = sc
+		fmt.Printf("Job %s (%s): %s\n", job.ID, job.Name, job.Cron)
+	} else {
+		expr := strings.Join(args, " ")
+		sc, err := schedule.Parse(expr)
+		if err != nil {
+			return err
+		}
+		s = sc
+		fmt.Printf("Expression: %s (UTC)\n", expr)
+	}
+	t := time.Now()
+	for range 5 {
+		t = s.Next(t)
+		if t.IsZero() {
+			fmt.Println("  (no further fires within the search horizon)")
+			break
+		}
+		fmt.Printf("  %s\n", t.Local().Format(time.RFC1123))
+	}
+	return nil
+}
+
+// ── run (once, now) ─────────────────────────────────────────────────────
+
+func scheduleRunNow(args []string) error {
+	if len(args) < 1 {
+		return fmt.Errorf("usage: odek schedule run <id>")
+	}
+	st, err := schedule.NewStore()
+	if err != nil {
+		return err
+	}
+	job, ok, err := st.Get(args[0])
+	if err != nil {
+		return err
+	}
+	if !ok {
+		return fmt.Errorf("no job with ID %q", args[0])
+	}
+
+	resolved := config.LoadConfig(config.CLIFlags{})
+	system := buildSystemPrompt(resolved)
+	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
+	defer cancel()
+
+	fmt.Fprintf(os.Stderr, "Running %s (%s)…\n", job.ID, job.Name)
+	result, _, err := runTaskHeadless(ctx, resolved, system, job.Task)
+	if err != nil {
+		return fmt.Errorf("run: %w", err)
+	}
+	if err := (cliDeliverer{resolved: resolved}).Deliver(job, result); err != nil {
+		return fmt.Errorf("deliver: %w", err)
+	}
+	return nil
+}
+
+// ── daemon ──────────────────────────────────────────────────────────────
+
+func scheduleDaemon(_ []string) error {
+	unlock, err := acquireScheduleLock()
+	if err != nil {
+		return err
+	}
+	defer unlock()
+
+	resolved := config.LoadConfig(config.CLIFlags{})
+	system := buildSystemPrompt(resolved)
+	st, err := schedule.NewStore()
+	if err != nil {
+		return err
+	}
+
+	sched := schedule.New(st,
+		agentRunner{resolved: resolved, system: system},
+		cliDeliverer{resolved: resolved},
+		schedule.Options{Logger: stderrLogger{}},
+	)
+
+	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
+	defer cancel()
+
+	jobs, _ := st.List()
+	enabled := 0
+	for _, j := range jobs {
+		if j.Enabled {
+			enabled++
+		}
+	}
+	fmt.Fprintf(os.Stderr, "odek schedule daemon ⏰  %d job(s) loaded (%d enabled). Ctrl-C to stop.\n", len(jobs), enabled)
+
+	if err := sched.Run(ctx); err != nil && err != context.Canceled {
+		return err
+	}
+	fmt.Fprintln(os.Stderr, "odek schedule daemon: stopped")
+	return nil
+}
+
+// ── Runner / Deliverer implementations ──────────────────────────────────
+
+// agentRunner runs each job's task through a fresh, headless odek agent using
+// the config resolved at daemon startup.
+type agentRunner struct {
+	resolved config.ResolvedConfig
+	system   string
+}
+
+func (r agentRunner) Run(ctx context.Context, job schedule.Job) (string, int64, error) {
+	return runTaskHeadless(ctx, r.resolved, r.system, job.Task)
+}
+
+// cliDeliverer routes results to stdout, a log file, or Telegram, honouring a
+// per-job chat ID (falling back to the configured default_chat_id).
+type cliDeliverer struct {
+	resolved config.ResolvedConfig
+}
+
+func (d cliDeliverer) Deliver(job schedule.Job, result string) error {
+	switch job.Deliver.Kind {
+	case schedule.DeliverStdout:
+		fmt.Printf("\n── %s · %s ──\n%s\n", job.Name, time.Now().Format(time.RFC1123), result)
+		return nil
+	case schedule.DeliverLog:
+		return appendScheduleLog(job, result)
+	case schedule.DeliverTelegram:
+		return d.deliverTelegram(job, result)
+	default:
+		return fmt.Errorf("unknown delivery kind %q", job.Deliver.Kind)
+	}
+}
+
+func (d cliDeliverer) deliverTelegram(job schedule.Job, result string) error {
+	if d.resolved.Telegram.Token == "" {
+		return fmt.Errorf("telegram bot_token not configured")
+	}
+	chatID := job.Deliver.ChatID
+	if chatID == 0 {
+		chatID = d.resolved.Telegram.DefaultChatID
+	}
+	if chatID == 0 {
+		return fmt.Errorf("no chat id (set the job's telegram:<chatID> or telegram.default_chat_id)")
+	}
+	bot := telegram.NewBot(d.resolved.Telegram.Token)
+	_, err := bot.SendMessage(chatID, result, nil)
+	return err
+}
+
+// ── headless agent execution ────────────────────────────────────────────
+
+// runTaskHeadless builds a fresh agent with no terminal renderer and no
+// interactive approver and runs one task to completion, returning the final
+// text. It mirrors run()'s construction but for unattended use: the danger
+// policy from resolved.Dangerous governs what the task may do (no human is
+// present to approve), exactly as for `odek run` invoked non-interactively.
+// Token usage is not yet surfaced (returns 0); the Telegram integration will
+// supply its own Runner that accounts for the daily budget.
+func runTaskHeadless(ctx context.Context, resolved config.ResolvedConfig, system, task string) (string, int64, error) {
+	tools := builtinTools(resolved.Dangerous, nil, nil, resolved.MaxConcurrency, resolved.APIKey, resolved.Transcription, nil)
+
+	if len(resolved.MCPServers) > 0 {
+		cleanup, err := loadMCPTools(resolved.MCPServers, &tools)
+		if err != nil {
+			return "", 0, fmt.Errorf("mcp: %w", err)
+		}
+		defer cleanup()
+	}
+
+	agent, err := odek.New(odek.Config{
+		Model:           resolved.Model,
+		BaseURL:         resolved.BaseURL,
+		APIKey:          resolved.APIKey,
+		MaxIterations:   resolved.MaxIter,
+		MaxToolParallel: resolved.MaxToolParallel,
+		SystemMessage:   system,
+		RuntimeContext:  odek.BuildRuntimeContext("schedule"),
+		NoProjectFile:   resolved.NoAgents,
+		Thinking:        resolved.Thinking,
+		Temperature:     0,
+		Tools:           tools,
+		Renderer:        render.New(io.Discard, false), // silent: unattended
+		InteractionMode: "off",
+		PromptCaching:   resolved.PromptCaching,
+	})
+	if err != nil {
+		return "", 0, err
+	}
+	defer agent.Close()
+
+	var messages []llm.Message
+	if system != "" {
+		messages = append(messages, llm.Message{Role: "system", Content: system})
+	}
+	messages = append(messages, llm.Message{Role: "user", Content: task})
+
+	result, _, err := agent.RunWithMessages(ctx, messages)
+	return result, 0, err
+}
+
+// ── helpers ─────────────────────────────────────────────────────────────
+
+// jobSchedule compiles a job's cron in its timezone (or UTC) for display.
+func jobSchedule(j schedule.Job) (*schedule.Schedule, error) {
+	loc := time.UTC
+	if j.Timezone != "" {
+		l, err := time.LoadLocation(j.Timezone)
+		if err != nil {
+			return nil, err
+		}
+		loc = l
+	}
+	return schedule.ParseInLocation(j.Cron, loc)
+}
+
+// parseDeliver parses a --deliver value: "stdout", "log", "telegram", or
+// "telegram:<chatID>".
+func parseDeliver(s string) (schedule.Delivery, error) {
+	kind, rest, _ := strings.Cut(s, ":")
+	switch kind {
+	case "", schedule.DeliverStdout:
+		return schedule.Delivery{Kind: schedule.DeliverStdout}, nil
+	case schedule.DeliverLog:
+		return schedule.Delivery{Kind: schedule.DeliverLog}, nil
+	case schedule.DeliverTelegram:
+		d := schedule.Delivery{Kind: schedule.DeliverTelegram}
+		if rest != "" {
+			id, err := strconv.ParseInt(rest, 10, 64)
+			if err != nil {
+				return d, fmt.Errorf("invalid telegram chat id %q", rest)
+			}
+			d.ChatID = id
+		}
+		return d, nil
+	default:
+		return schedule.Delivery{}, fmt.Errorf("unknown delivery %q (use stdout, log, telegram, telegram:<chatID>)", s)
+	}
+}
+
+func deliverString(d schedule.Delivery) string {
+	if d.Kind == schedule.DeliverTelegram && d.ChatID != 0 {
+		return fmt.Sprintf("telegram:%d", d.ChatID)
+	}
+	return d.Kind
+}
+
+// firstWords returns up to n whitespace-separated words of s, for a default
+// job label.
+func firstWords(s string, n int) string {
+	fields := strings.Fields(s)
+	if len(fields) > n {
+		fields = fields[:n]
+	}
+	return strings.Join(fields, " ")
+}
+
+// appendScheduleLog appends a delivered result to ~/.odek/schedule.log.
+func appendScheduleLog(job schedule.Job, result string) error {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return err
+	}
+	dir := filepath.Join(home, ".odek")
+	if err := os.MkdirAll(dir, 0755); err != nil {
+		return err
+	}
+	path := filepath.Join(dir, "schedule.log")
+	f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0600)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	_, err = fmt.Fprintf(f, "[%s] %s (%s)\n%s\n\n", time.Now().Format(time.RFC3339), job.Name, job.ID, result)
+	return err
+}
+
+// stderrLogger is a minimal schedule.Logger that writes key/value lines to
+// stderr, matching the daemon's foreground logging style.
+type stderrLogger struct{}
+
+func (stderrLogger) Info(msg string, kv ...any)  { logKV("INFO", msg, kv) }
+func (stderrLogger) Error(msg string, kv ...any) { logKV("ERROR", msg, kv) }
+
+func logKV(level, msg string, kv []any) {
+	var b strings.Builder
+	fmt.Fprintf(&b, "%s schedule: %s", level, msg)
+	for i := 0; i+1 < len(kv); i += 2 {
+		fmt.Fprintf(&b, " %v=%v", kv[i], kv[i+1])
+	}
+	fmt.Fprintln(os.Stderr, b.String())
+}
+
+// acquireScheduleLock prevents two schedule daemons from firing the same jobs.
+// Unlike the Telegram lock it refuses to start when a live daemon is found
+// rather than killing it — a running scheduler should not be silently usurped.
+func acquireScheduleLock() (func(), error) {
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return nil, err
+	}
+	pidFile := filepath.Join(home, ".odek", "schedule.pid")
+	if err := os.MkdirAll(filepath.Dir(pidFile), 0755); err != nil {
+		return nil, err
+	}
+	if data, err := os.ReadFile(pidFile); err == nil {
+		if pid, _ := strconv.Atoi(strings.TrimSpace(string(data))); pid > 1 {
+			if err := syscall.Kill(pid, 0); err == nil {
+				return nil, fmt.Errorf("another schedule daemon is already running (PID %d)", pid)
+			}
+		}
+	}
+	if err := os.WriteFile(pidFile, []byte(strconv.Itoa(os.Getpid())), 0644); err != nil {
+		return nil, err
+	}
+	return func() { os.Remove(pidFile) }, nil
+}
diff --git a/cmd/odek/schedule_test.go b/cmd/odek/schedule_test.go
new file mode 100644
index 0000000..773b94a
--- /dev/null
+++ b/cmd/odek/schedule_test.go
@@ -0,0 +1,135 @@
+package main
+
+import (
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"github.com/BackendStack21/odek/internal/config"
+	"github.com/BackendStack21/odek/internal/schedule"
+	"github.com/BackendStack21/odek/internal/telegram"
+)
+
+func TestParseDeliver(t *testing.T) {
+	tests := []struct {
+		in       string
+		wantKind string
+		wantChat int64
+		wantErr  bool
+	}{
+		{"", schedule.DeliverStdout, 0, false},
+		{"stdout", schedule.DeliverStdout, 0, false},
+		{"log", schedule.DeliverLog, 0, false},
+		{"telegram", schedule.DeliverTelegram, 0, false},
+		{"telegram:12345", schedule.DeliverTelegram, 12345, false},
+		{"telegram:-100999", schedule.DeliverTelegram, -100999, false},
+		{"telegram:notanid", "", 0, true},
+		{"smoke-signal", "", 0, true},
+	}
+	for _, tc := range tests {
+		got, err := parseDeliver(tc.in)
+		if (err != nil) != tc.wantErr {
+			t.Errorf("parseDeliver(%q) err=%v wantErr=%v", tc.in, err, tc.wantErr)
+			continue
+		}
+		if tc.wantErr {
+			continue
+		}
+		if got.Kind != tc.wantKind || got.ChatID != tc.wantChat {
+			t.Errorf("parseDeliver(%q) = %+v, want kind=%s chat=%d", tc.in, got, tc.wantKind, tc.wantChat)
+		}
+	}
+}
+
+func TestDeliverString(t *testing.T) {
+	cases := map[schedule.Delivery]string{
+		{Kind: schedule.DeliverStdout}:                   "stdout",
+		{Kind: schedule.DeliverLog}:                      "log",
+		{Kind: schedule.DeliverTelegram}:                 "telegram",
+		{Kind: schedule.DeliverTelegram, ChatID: 42}:     "telegram:42",
+		{Kind: schedule.DeliverTelegram, ChatID: -10042}: "telegram:-10042",
+	}
+	for d, want := range cases {
+		if got := deliverString(d); got != want {
+			t.Errorf("deliverString(%+v) = %q, want %q", d, got, want)
+		}
+	}
+}
+
+func TestFirstWords(t *testing.T) {
+	tests := []struct {
+		in   string
+		n    int
+		want string
+	}{
+		{"one two three four", 2, "one two"},
+		{"short", 6, "short"},
+		{"  extra   spaces   here ", 2, "extra spaces"},
+		{"", 3, ""},
+	}
+	for _, tc := range tests {
+		if got := firstWords(tc.in, tc.n); got != tc.want {
+			t.Errorf("firstWords(%q,%d) = %q, want %q", tc.in, tc.n, got, tc.want)
+		}
+	}
+}
+
+func TestJobSchedule(t *testing.T) {
+	// Valid, default UTC.
+	if _, err := jobSchedule(schedule.Job{Cron: "0 9 * * *"}); err != nil {
+		t.Errorf("valid job: unexpected error %v", err)
+	}
+	// Valid with timezone.
+	if _, err := jobSchedule(schedule.Job{Cron: "0 9 * * *", Timezone: "Europe/Berlin"}); err != nil {
+		t.Errorf("tz job: unexpected error %v", err)
+	}
+	// Bad timezone.
+	if _, err := jobSchedule(schedule.Job{Cron: "0 9 * * *", Timezone: "Mars/Phobos"}); err == nil {
+		t.Error("bad timezone should error")
+	}
+	// Bad cron.
+	if _, err := jobSchedule(schedule.Job{Cron: "nope"}); err == nil {
+		t.Error("bad cron should error")
+	}
+}
+
+func TestCliDeliverer_Log(t *testing.T) {
+	home := t.TempDir()
+	t.Setenv("HOME", home)
+	d := cliDeliverer{resolved: config.ResolvedConfig{}}
+	job := schedule.Job{ID: "jb-1", Name: "logjob", Deliver: schedule.Delivery{Kind: schedule.DeliverLog}}
+	if err := d.Deliver(job, "hello from cron"); err != nil {
+		t.Fatalf("Deliver(log): %v", err)
+	}
+	data, err := os.ReadFile(filepath.Join(home, ".odek", "schedule.log"))
+	if err != nil {
+		t.Fatalf("read log: %v", err)
+	}
+	if !strings.Contains(string(data), "hello from cron") || !strings.Contains(string(data), "jb-1") {
+		t.Errorf("log missing content: %q", string(data))
+	}
+}
+
+func TestCliDeliverer_TelegramErrors(t *testing.T) {
+	// No token configured → error.
+	d := cliDeliverer{resolved: config.ResolvedConfig{}}
+	job := schedule.Job{Deliver: schedule.Delivery{Kind: schedule.DeliverTelegram}}
+	if err := d.Deliver(job, "x"); err == nil {
+		t.Error("expected error when telegram token is unset")
+	}
+
+	// Token set but no chat id anywhere → error.
+	d = cliDeliverer{resolved: config.ResolvedConfig{Telegram: telegram.TelegramConfig{Token: "t"}}}
+	if err := d.Deliver(job, "x"); err == nil {
+		t.Error("expected error when no chat id is resolvable")
+	}
+}
+
+func TestCliDeliverer_UnknownKind(t *testing.T) {
+	d := cliDeliverer{resolved: config.ResolvedConfig{}}
+	job := schedule.Job{Deliver: schedule.Delivery{Kind: "pigeon"}}
+	if err := d.Deliver(job, "x"); err == nil {
+		t.Error("unknown delivery kind should error")
+	}
+}

From 8471801698fdff8ca4fb0f7a6e17ce54cde6747e Mon Sep 17 00:00:00 2001
From: Rolando Santamaria Maso <kyberneees@gmail.com>
Date: Thu, 4 Jun 2026 21:16:33 +0200
Subject: [PATCH 03/11] feat(schedule): run the scheduler inside odek telegram
 (phase 3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Telegram bot now hosts the scheduler in-process, so reminders and the bot
share one runtime — the whole reason to go native. No separate cron daemon, no
environment-inheritance problem.

- startSchedulerForBot: launched after the poller, stopped on ctx cancel. It
  acquires the shared schedule pid-lock; if an external `odek schedule daemon`
  already holds it, the bot defers (logs and skips) rather than double-firing.
- telegramRunner: runs each job headless and accounts token usage against the
  bot's daily budget — pre-flight refuse when exhausted, bill the run after.
- telegramDeliverer: delivers via the LIVE bot for telegram jobs (sharing its
  client and 429 backoff) and falls back to the CLI deliverer for stdout/log.
- runTaskHeadless now captures cumulative tokens via an IterationCallback, so
  the Runner's token count is real (engine logs it; bot bills it).
- Graceful restart releases the schedule lock before os.Exit, mirroring the
  Telegram instance lock, so the restarted child's scheduler re-acquires cleanly.

Tests: embedded deliverer routing — live-bot send, default-chat fallback,
no-chat error, and stdout/log fallback — via the recording test bot. Full
cmd/odek suite green under -race; whole module suite green, vet + fmt clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 cmd/odek/schedule.go      | 132 +++++++++++++++++++++++++++++++++-----
 cmd/odek/schedule_test.go |  62 ++++++++++++++++++
 cmd/odek/telegram.go      |  13 ++++
 3 files changed, 192 insertions(+), 15 deletions(-)

diff --git a/cmd/odek/schedule.go b/cmd/odek/schedule.go
index e10b87c..d35f34b 100644
--- a/cmd/odek/schedule.go
+++ b/cmd/odek/schedule.go
@@ -17,6 +17,7 @@ import (
 	"github.com/BackendStack21/odek"
 	"github.com/BackendStack21/odek/internal/config"
 	"github.com/BackendStack21/odek/internal/llm"
+	"github.com/BackendStack21/odek/internal/loop"
 	"github.com/BackendStack21/odek/internal/render"
 	"github.com/BackendStack21/odek/internal/schedule"
 	"github.com/BackendStack21/odek/internal/telegram"
@@ -363,6 +364,100 @@ func (d cliDeliverer) deliverTelegram(job schedule.Job, result string) error {
 	return err
 }
 
+// ── embedded scheduler (inside `odek telegram`) ─────────────────────────
+
+// scheduleUnlockRef holds the embedded scheduler's pid-lock releaser so the
+// Telegram graceful-restart path can release it before os.Exit(0), mirroring
+// instanceLockRef. Without this the restarted child would see a live lock and
+// skip starting its scheduler.
+var scheduleUnlockRef func()
+
+// telegramRunner runs a job's task headlessly and accounts its token usage
+// against the bot's daily budget.
+type telegramRunner struct {
+	resolved config.ResolvedConfig
+	system   string
+	bot      *telegram.Bot
+}
+
+func (r telegramRunner) Run(ctx context.Context, job schedule.Job) (string, int64, error) {
+	budgeted := r.resolved.Telegram.DailyTokenBudget > 0
+	if budgeted {
+		// Pre-flight: refuse if the budget is already exhausted so a runaway
+		// schedule can't keep spending. Mirrors the chat-message pre-check.
+		if err := r.bot.CheckDailyBudget(1); err != nil {
+			return "", 0, fmt.Errorf("daily token budget exhausted: %w", err)
+		}
+	}
+	result, tokens, err := runTaskHeadless(ctx, r.resolved, r.system, job.Task)
+	if err == nil && budgeted && tokens > 0 {
+		_ = r.bot.CheckDailyBudget(tokens) // bill the run (best-effort)
+	}
+	return result, tokens, err
+}
+
+// telegramDeliverer delivers via the live bot for telegram jobs (sharing its
+// client and rate limiting) and falls back to the CLI deliverer for stdout/log.
+type telegramDeliverer struct {
+	bot      *telegram.Bot
+	fallback cliDeliverer
+}
+
+func (d telegramDeliverer) Deliver(job schedule.Job, result string) error {
+	if job.Deliver.Kind != schedule.DeliverTelegram {
+		return d.fallback.Deliver(job, result)
+	}
+	chatID := job.Deliver.ChatID
+	if chatID == 0 {
+		chatID = d.fallback.resolved.Telegram.DefaultChatID
+	}
+	if chatID == 0 {
+		return fmt.Errorf("no chat id (set the job's telegram:<chatID> or telegram.default_chat_id)")
+	}
+	_, err := d.bot.SendMessage(chatID, result, nil)
+	return err
+}
+
+// startSchedulerForBot starts the embedded scheduler unless an external
+// `odek schedule daemon` already holds the lock (in which case the bot defers
+// to it, to avoid double-firing). It returns a stop func that releases the
+// lock; the scheduler goroutine itself stops when ctx is cancelled.
+func startSchedulerForBot(ctx context.Context, bot *telegram.Bot, resolved config.ResolvedConfig, system string, log telegram.Logger) func() {
+	unlock, err := acquireScheduleLock()
+	if err != nil {
+		log.Info("schedule: embedded scheduler not started", "reason", err.Error())
+		return func() {}
+	}
+	st, err := schedule.NewStore()
+	if err != nil {
+		log.Error("schedule: store init failed", "error", err)
+		unlock()
+		return func() {}
+	}
+	sched := schedule.New(st,
+		telegramRunner{resolved: resolved, system: system, bot: bot},
+		telegramDeliverer{bot: bot, fallback: cliDeliverer{resolved: resolved}},
+		schedule.Options{Logger: log},
+	)
+	scheduleUnlockRef = unlock
+	go func() { _ = sched.Run(ctx) }()
+
+	enabled := 0
+	if jobs, err := st.List(); err == nil {
+		for _, j := range jobs {
+			if j.Enabled {
+				enabled++
+			}
+		}
+	}
+	log.Info("schedule: embedded scheduler started", "enabled_jobs", enabled)
+
+	return func() {
+		scheduleUnlockRef = nil
+		unlock()
+	}
+}
+
 // ── headless agent execution ────────────────────────────────────────────
 
 // runTaskHeadless builds a fresh agent with no terminal renderer and no
@@ -383,21 +478,27 @@ func runTaskHeadless(ctx context.Context, resolved config.ResolvedConfig, system
 		defer cleanup()
 	}
 
+	// Capture cumulative token usage from the final iteration so the Runner
+	// can report it (the engine logs it; the bot bills it against the budget).
+	// RunWithMessages drives the loop synchronously on this goroutine, so the
+	// callback needs no synchronisation.
+	var lastInfo loop.IterationInfo
 	agent, err := odek.New(odek.Config{
-		Model:           resolved.Model,
-		BaseURL:         resolved.BaseURL,
-		APIKey:          resolved.APIKey,
-		MaxIterations:   resolved.MaxIter,
-		MaxToolParallel: resolved.MaxToolParallel,
-		SystemMessage:   system,
-		RuntimeContext:  odek.BuildRuntimeContext("schedule"),
-		NoProjectFile:   resolved.NoAgents,
-		Thinking:        resolved.Thinking,
-		Temperature:     0,
-		Tools:           tools,
-		Renderer:        render.New(io.Discard, false), // silent: unattended
-		InteractionMode: "off",
-		PromptCaching:   resolved.PromptCaching,
+		Model:             resolved.Model,
+		BaseURL:           resolved.BaseURL,
+		APIKey:            resolved.APIKey,
+		MaxIterations:     resolved.MaxIter,
+		MaxToolParallel:   resolved.MaxToolParallel,
+		SystemMessage:     system,
+		RuntimeContext:    odek.BuildRuntimeContext("schedule"),
+		NoProjectFile:     resolved.NoAgents,
+		Thinking:          resolved.Thinking,
+		Temperature:       0,
+		Tools:             tools,
+		Renderer:          render.New(io.Discard, false), // silent: unattended
+		InteractionMode:   "off",
+		PromptCaching:     resolved.PromptCaching,
+		IterationCallback: func(info loop.IterationInfo) { lastInfo = info },
 	})
 	if err != nil {
 		return "", 0, err
@@ -411,7 +512,8 @@ func runTaskHeadless(ctx context.Context, resolved config.ResolvedConfig, system
 	messages = append(messages, llm.Message{Role: "user", Content: task})
 
 	result, _, err := agent.RunWithMessages(ctx, messages)
-	return result, 0, err
+	tokens := int64(lastInfo.InputTokens + lastInfo.OutputTokens)
+	return result, tokens, err
 }
 
 // ── helpers ─────────────────────────────────────────────────────────────
diff --git a/cmd/odek/schedule_test.go b/cmd/odek/schedule_test.go
index 773b94a..e40ba7f 100644
--- a/cmd/odek/schedule_test.go
+++ b/cmd/odek/schedule_test.go
@@ -5,6 +5,7 @@ import (
 	"path/filepath"
 	"strings"
 	"testing"
+	"time"
 
 	"github.com/BackendStack21/odek/internal/config"
 	"github.com/BackendStack21/odek/internal/schedule"
@@ -133,3 +134,64 @@ func TestCliDeliverer_UnknownKind(t *testing.T) {
 		t.Error("unknown delivery kind should error")
 	}
 }
+
+// ── embedded (bot) deliverer ────────────────────────────────────────────
+
+func TestTelegramDeliverer_SendsViaLiveBot(t *testing.T) {
+	bot, msgCh := newRecordingTestBot(t)
+	d := telegramDeliverer{bot: bot, fallback: cliDeliverer{resolved: config.ResolvedConfig{}}}
+	job := schedule.Job{Deliver: schedule.Delivery{Kind: schedule.DeliverTelegram, ChatID: 555}}
+	if err := d.Deliver(job, "scheduled hello"); err != nil {
+		t.Fatalf("Deliver: %v", err)
+	}
+	select {
+	case got := <-msgCh:
+		if got != "scheduled hello" {
+			t.Errorf("sent %q, want %q", got, "scheduled hello")
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatal("bot did not receive a sendMessage")
+	}
+}
+
+func TestTelegramDeliverer_UsesDefaultChatID(t *testing.T) {
+	bot, msgCh := newRecordingTestBot(t)
+	d := telegramDeliverer{
+		bot:      bot,
+		fallback: cliDeliverer{resolved: config.ResolvedConfig{Telegram: telegram.TelegramConfig{DefaultChatID: 999}}},
+	}
+	// No per-job chat ID → falls back to default_chat_id.
+	job := schedule.Job{Deliver: schedule.Delivery{Kind: schedule.DeliverTelegram}}
+	if err := d.Deliver(job, "to default"); err != nil {
+		t.Fatalf("Deliver: %v", err)
+	}
+	select {
+	case <-msgCh:
+	case <-time.After(2 * time.Second):
+		t.Fatal("bot did not receive a sendMessage to the default chat")
+	}
+}
+
+func TestTelegramDeliverer_NoChatErrors(t *testing.T) {
+	bot, _ := newRecordingTestBot(t)
+	d := telegramDeliverer{bot: bot, fallback: cliDeliverer{resolved: config.ResolvedConfig{}}}
+	job := schedule.Job{Deliver: schedule.Delivery{Kind: schedule.DeliverTelegram}}
+	if err := d.Deliver(job, "x"); err == nil {
+		t.Error("telegram delivery with no chat id should error")
+	}
+}
+
+func TestTelegramDeliverer_FallsBackForLog(t *testing.T) {
+	home := t.TempDir()
+	t.Setenv("HOME", home)
+	// Non-telegram kinds route to the CLI deliverer; the bot is untouched.
+	d := telegramDeliverer{bot: nil, fallback: cliDeliverer{resolved: config.ResolvedConfig{}}}
+	job := schedule.Job{ID: "jb-x", Name: "logjob", Deliver: schedule.Delivery{Kind: schedule.DeliverLog}}
+	if err := d.Deliver(job, "logged via fallback"); err != nil {
+		t.Fatalf("Deliver(log): %v", err)
+	}
+	data, err := os.ReadFile(filepath.Join(home, ".odek", "schedule.log"))
+	if err != nil || !strings.Contains(string(data), "logged via fallback") {
+		t.Errorf("fallback log path failed: err=%v content=%q", err, string(data))
+	}
+}
diff --git a/cmd/odek/telegram.go b/cmd/odek/telegram.go
index a21dad8..63efabf 100644
--- a/cmd/odek/telegram.go
+++ b/cmd/odek/telegram.go
@@ -623,6 +623,14 @@ func telegramCmd(args []string) error {
 		}
 	}()
 
+	// 16b. Start the embedded scheduler. It fires scheduled jobs (see
+	// `odek schedule`) and delivers results to Telegram, sharing this
+	// process's resolved config — so no environment-inheritance problem and no
+	// separate cron daemon. If an external `odek schedule daemon` already holds
+	// the lock, this defers to it instead of double-firing.
+	stopScheduler := startSchedulerForBot(ctx, bot, resolved, systemMessage, handlerLog)
+	defer stopScheduler()
+
 	// 17. Process updates until the channel is closed (ctx cancelled).
 	for upd := range updates {
 		handler.HandleUpdate(upd)
@@ -889,6 +897,11 @@ func gracefulRestart(bot *telegram.Bot) {
 	if instanceLockRef != nil {
 		instanceLockRef.release()
 	}
+	// Release the schedule lock too, so the restarted child's embedded
+	// scheduler can re-acquire it instead of finding a (briefly) live owner.
+	if scheduleUnlockRef != nil {
+		scheduleUnlockRef()
+	}
 	os.Exit(0)
 }
 

From 5088afedd0c12050eb550e8c274ce9396fda0d22 Mon Sep 17 00:00:00 2001
From: Rolando Santamaria Maso <kyberneees@gmail.com>
Date: Thu, 4 Jun 2026 21:33:17 +0200
Subject: [PATCH 04/11] feat(schedule): schedules config section + docs (phase
 4)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make the scheduler configurable and documented.

- internal/config: new `schedules` section (enabled, max_concurrent, timezone,
  catchup) with the same file→env→default layering as every other section.
  resolveSchedules + ODEK_SCHEDULES_* env overrides + overlayFile handling.
  Defaults: enabled=true, max_concurrent=2, timezone=UTC, catchup=false.
- cmd/odek: the daemon and the embedded (bot) scheduler now build their engine
  Options from resolved.Schedules via a shared schedulerOptions helper
  (max-concurrent, default timezone, catchup). The embedded scheduler is gated
  on schedules.enabled so it can be turned off in favour of a standalone daemon.
- docs: new docs/SCHEDULES.md (canonical guide — runtime models, CLI, cron
  syntax incl. Vixie dom/dow coupling, delivery, the unattended-safety policy,
  config, missed-run behaviour); a Schedules section in CONFIG.md; a feature
  bullet in README.

Tests: resolveSchedules defaults/overrides/partial, and LoadConfig wiring for
defaults and ODEK_SCHEDULES_* env. Full config + schedule + cmd suites green,
vet + fmt clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 README.md                         |   3 +
 cmd/odek/schedule.go              |  27 ++++-
 docs/CONFIG.md                    |  26 +++++
 docs/SCHEDULES.md                 | 159 ++++++++++++++++++++++++++++++
 internal/config/loader.go         |  81 +++++++++++++++
 internal/config/schedules_test.go |  83 ++++++++++++++++
 6 files changed, 377 insertions(+), 2 deletions(-)
 create mode 100644 docs/SCHEDULES.md
 create mode 100644 internal/config/schedules_test.go

diff --git a/README.md b/README.md
index 489bcda..de95344 100644
--- a/README.md
+++ b/README.md
@@ -62,6 +62,9 @@ Any OpenAI-compatible endpoint: Deepseek, OpenAI, Anthropic, Ollama, vLLM, Groq,
 ### 🤖 Telegram Bot
 Run agent tasks directly from Telegram via long-polling. Supports slash commands (`/plan`, `/sessions`, `/resume`, `/prune`, `/help`, etc.), voice message transcription, photo analysis, conversation persistence across restarts, saved plan files, and daily token budgeting. No external Telegram libraries — built on stdlib `net/http`. [docs/TELEGRAM.md](docs/TELEGRAM.md)
 
+### ⏰ Scheduled Tasks (native cron)
+Run agent tasks on a cron schedule and deliver results to Telegram, stdout, or a log — no external cron daemon. The scheduler runs **in-process** (inside `odek telegram` or a standalone `odek schedule daemon`), so a scheduled task sees the same resolved config (API key, model, bot token) an interactive run does. Stdlib-only cron parser with Vixie day-of-month/day-of-week semantics, per-job timezones, missed-run catchup, and a singleton lock so jobs never double-fire. `odek schedule add --cron "0 9 * * 1-5" --deliver telegram "..."`. [docs/SCHEDULES.md](docs/SCHEDULES.md)
+
 ### 📎 File Attachments
 Attach files to any prompt with `--ctx` / `-c` (CLI), `@filename` inline references (CLI + REPL + Web UI), or drag-and-drop (Web UI). File content is injected as context blocks before the task — no tool calls needed. Comma-separate multiple files: `--ctx main.go,lib.go`. [docs/CLI.md#file-attachments](docs/CLI.md#file-attachments)
 
diff --git a/cmd/odek/schedule.go b/cmd/odek/schedule.go
index d35f34b..38a07ad 100644
--- a/cmd/odek/schedule.go
+++ b/cmd/odek/schedule.go
@@ -293,7 +293,7 @@ func scheduleDaemon(_ []string) error {
 	sched := schedule.New(st,
 		agentRunner{resolved: resolved, system: system},
 		cliDeliverer{resolved: resolved},
-		schedule.Options{Logger: stderrLogger{}},
+		schedulerOptions(resolved.Schedules, stderrLogger{}),
 	)
 
 	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
@@ -423,6 +423,10 @@ func (d telegramDeliverer) Deliver(job schedule.Job, result string) error {
 // to it, to avoid double-firing). It returns a stop func that releases the
 // lock; the scheduler goroutine itself stops when ctx is cancelled.
 func startSchedulerForBot(ctx context.Context, bot *telegram.Bot, resolved config.ResolvedConfig, system string, log telegram.Logger) func() {
+	if !resolved.Schedules.Enabled {
+		log.Info("schedule: embedded scheduler disabled by config")
+		return func() {}
+	}
 	unlock, err := acquireScheduleLock()
 	if err != nil {
 		log.Info("schedule: embedded scheduler not started", "reason", err.Error())
@@ -437,7 +441,7 @@ func startSchedulerForBot(ctx context.Context, bot *telegram.Bot, resolved confi
 	sched := schedule.New(st,
 		telegramRunner{resolved: resolved, system: system, bot: bot},
 		telegramDeliverer{bot: bot, fallback: cliDeliverer{resolved: resolved}},
-		schedule.Options{Logger: log},
+		schedulerOptions(resolved.Schedules, log),
 	)
 	scheduleUnlockRef = unlock
 	go func() { _ = sched.Run(ctx) }()
@@ -518,6 +522,25 @@ func runTaskHeadless(ctx context.Context, resolved config.ResolvedConfig, system
 
 // ── helpers ─────────────────────────────────────────────────────────────
 
+// schedulerOptions builds engine options from the resolved config, falling
+// back to UTC if the configured default timezone can't be loaded.
+func schedulerOptions(sc config.ScheduleConfig, logger schedule.Logger) schedule.Options {
+	loc := time.UTC
+	if sc.Timezone != "" {
+		if l, err := time.LoadLocation(sc.Timezone); err == nil {
+			loc = l
+		} else {
+			logger.Error("schedule: invalid default timezone, using UTC", "timezone", sc.Timezone, "error", err)
+		}
+	}
+	return schedule.Options{
+		MaxConcurrent: sc.MaxConcurrent,
+		DefaultTZ:     loc,
+		Catchup:       sc.Catchup,
+		Logger:        logger,
+	}
+}
+
 // jobSchedule compiles a job's cron in its timezone (or UTC) for display.
 func jobSchedule(j schedule.Job) (*schedule.Schedule, error) {
 	loc := time.UTC
diff --git a/docs/CONFIG.md b/docs/CONFIG.md
index 73ea014..4e9383b 100644
--- a/docs/CONFIG.md
+++ b/docs/CONFIG.md
@@ -325,6 +325,32 @@ odek run "Daily summary" --deliver
 
 See [docs/TELEGRAM.md](docs/TELEGRAM.md#cron-integration) for full cron setup instructions.
 
+## Schedules
+
+Configures the native in-process task scheduler (`odek schedule`). Job
+definitions live in `~/.odek/schedules.json`; this section only tunes the
+engine. Every field has an `ODEK_SCHEDULES_*` environment override.
+
+```json
+{
+  "schedules": {
+    "enabled": true,
+    "max_concurrent": 2,
+    "timezone": "UTC",
+    "catchup": false
+  }
+}
+```
+
+| Field | Env | Default | Description |
+|---|---|---|---|
+| `enabled` | `ODEK_SCHEDULES_ENABLED` | `true` | Run the embedded scheduler inside `odek telegram`. Set false to run only a standalone `odek schedule daemon`. |
+| `max_concurrent` | `ODEK_SCHEDULES_MAX_CONCURRENT` | `2` | Maximum scheduled jobs running at once. |
+| `timezone` | `ODEK_SCHEDULES_TIMEZONE` | `UTC` | Default timezone for jobs that don't set their own `--tz`. |
+| `catchup` | `ODEK_SCHEDULES_CATCHUP` | `false` | Global default for the missed-run policy: run a missed fire once on startup. |
+
+Full guide: [docs/SCHEDULES.md](SCHEDULES.md).
+
 ## Tool Progress
 
 Controls how per-tool progress messages appear inside the Telegram bot during agent runs. Independent from `interaction_mode` — you can have engaging terminal output with minimal Telegram progress, or verbose terminal with rich progress bubbles.
diff --git a/docs/SCHEDULES.md b/docs/SCHEDULES.md
new file mode 100644
index 0000000..6da83f3
--- /dev/null
+++ b/docs/SCHEDULES.md
@@ -0,0 +1,159 @@
+# Scheduled Tasks (native cron)
+
+odek can run agent tasks on a cron schedule and deliver each result somewhere —
+a Telegram chat, stdout, or a log file. The scheduler is **native and
+in-process**: it runs inside a long-lived odek process that has already
+resolved its configuration (API key, model, bot token, default chat) into
+memory. A scheduled task therefore sees exactly what an interactive `odek run`
+does — no environment-inheritance problems, no external cron daemon, no
+container-only behaviour.
+
+```bash
+# A weekday stand-up nudge delivered to Telegram
+odek schedule add --cron "0 9 * * 1-5" --deliver telegram "Remind me: stand-up in 15 minutes"
+
+# Run the scheduler (headless), or just start `odek telegram` — it hosts one too
+odek schedule daemon
+```
+
+---
+
+## Where it runs
+
+The same engine runs in two places; pick whichever fits your deployment:
+
+| | Use when |
+|---|---|
+| **Inside `odek telegram`** | You already run the bot. The scheduler starts automatically as part of the bot process — one process for chat + reminders. |
+| **`odek schedule daemon`** | You don't run the bot (headless server, CI box). A dedicated foreground process that only schedules. |
+
+A shared lock (`~/.odek/schedule.pid`) coordinates the two: whichever starts
+first owns scheduling; the other defers, so jobs never fire twice. (Disable the
+bot's embedded scheduler with `schedules.enabled = false` if you prefer to run
+the daemon separately.)
+
+---
+
+## Managing jobs
+
+```text
+odek schedule list                          List jobs: id, on/off, cron, next fire, last status
+odek schedule add --cron "<expr>" <task>    Add a job (flags below)
+odek schedule rm <id>                       Remove a job
+odek schedule enable  <id>                  Enable a job
+odek schedule disable <id>                  Disable a job (kept, never fires)
+odek schedule run  <id>                     Run a job once now and deliver (test it)
+odek schedule next <id|"cron-expr">         Preview the next few fire times
+odek schedule daemon                        Run the scheduler in the foreground
+```
+
+### `add` flags
+
+| Flag | Meaning |
+|---|---|
+| `--cron "<expr>"` | 5-field cron or `@macro` (required) |
+| `--name <label>` | Human label (defaults to the first words of the task) |
+| `--deliver <dest>` | `stdout` (default), `log`, `telegram`, or `telegram:<chatID>` |
+| `--tz <IANA>` | Timezone, e.g. `Europe/Berlin` (default UTC) |
+| `--catchup` | If a fire was missed while the process was down, run once on startup |
+| `--disabled` | Add without enabling |
+
+Definitions are stored in `~/.odek/schedules.json` (mode `0600`); runtime state
+(last run, status, next fire) lives in `~/.odek/schedule-state.json`. A running
+scheduler picks up edits to the definitions file automatically (no restart).
+
+---
+
+## Cron syntax
+
+Standard 5-field Vixie cron:
+
+```text
+┌ minute        0-59
+│ ┌ hour        0-23
+│ │ ┌ day-of-month 1-31
+│ │ │ ┌ month   1-12 or JAN-DEC
+│ │ │ │ ┌ day-of-week 0-6 or SUN-SAT (0 and 7 are both Sunday)
+* * * * *
+```
+
+Each field accepts `*`, a value, a range `a-b`, a step `*/n` / `a-b/n` / `a/n`,
+and comma-separated lists. Macros: `@hourly`, `@daily` (`@midnight`),
+`@weekly`, `@monthly`, `@yearly` (`@annually`).
+
+Granularity is **one minute** (no seconds field). Times are in the job's `--tz`
+or, failing that, the scheduler's default timezone (UTC unless configured).
+
+**Day-of-month / day-of-week coupling** follows Vixie semantics: when *both*
+fields are restricted, a day matches if *either* matches. So `0 0 13 * 5` fires
+on the 13th **or** any Friday — not only Friday the 13th.
+
+```bash
+odek schedule next "0 9 * * 1-5"   # validate an expression and see upcoming fires
+```
+
+---
+
+## Delivery
+
+| Kind | Result goes to |
+|---|---|
+| `stdout` | the daemon's stdout (or the bot's container logs) |
+| `log` | appended to `~/.odek/schedule.log` |
+| `telegram` | the configured `telegram.default_chat_id` |
+| `telegram:<chatID>` | a specific chat |
+
+Telegram delivery needs `telegram.bot_token` and a chat ID
+(`ODEK_TELEGRAM_BOT_TOKEN` + `ODEK_TELEGRAM_DEFAULT_CHAT_ID`, or per-job
+`telegram:<chatID>`). When delivering from inside `odek telegram`, the live bot
+client is reused (shared rate limiting).
+
+---
+
+## Safety: unattended tasks
+
+A scheduled task runs with **no human present to approve actions**. It inherits
+the process's existing danger policy (`dangerous` in config) exactly as a
+non-interactive `odek run` would:
+
+- **Restricted profile** → destructive / code-execution / network-write
+  operations are denied; read/summarise/deliver tasks work.
+- **Godmode profile** → full access, unattended. Only point scheduled jobs at
+  godmode if you trust every task definition.
+
+Task definitions in `schedules.json` are owner-authored (same trust level as
+`config.json`); the file is written `0600`.
+
+---
+
+## Configuration
+
+The `schedules` config section (in `~/.odek/config.json` or `./odek.json`) tunes
+the engine. Every field also has an `ODEK_SCHEDULES_*` environment override.
+
+```json
+{
+  "schedules": {
+    "enabled": true,
+    "max_concurrent": 2,
+    "timezone": "UTC",
+    "catchup": false
+  }
+}
+```
+
+| Field | Env | Default | Meaning |
+|---|---|---|---|
+| `enabled` | `ODEK_SCHEDULES_ENABLED` | `true` | Run the embedded scheduler inside `odek telegram` |
+| `max_concurrent` | `ODEK_SCHEDULES_MAX_CONCURRENT` | `2` | Max jobs running at once |
+| `timezone` | `ODEK_SCHEDULES_TIMEZONE` | `UTC` | Default timezone for jobs without `--tz` |
+| `catchup` | `ODEK_SCHEDULES_CATCHUP` | `false` | Global default for the missed-run policy |
+
+---
+
+## Missed runs
+
+If the scheduler was down when a job was due, on startup it either **skips**
+(default — reschedules forward and records a `skipped` status) or **runs once**
+(when the job's `--catchup` or `schedules.catchup` is set). A burst of missed
+ticks never stampedes: at most one catch-up fire per job.
diff --git a/internal/config/loader.go b/internal/config/loader.go
index ce755af..186add9 100644
--- a/internal/config/loader.go
+++ b/internal/config/loader.go
@@ -170,6 +170,9 @@ type FileConfig struct {
 	// Transcription configures local audio transcription (whisper.cpp).
 	Transcription *TranscriptionConfig `json:"transcription,omitempty"`
 
+	// Schedules configures the native in-process task scheduler.
+	Schedules *SchedulesConfig `json:"schedules,omitempty"`
+
 	// GithubRepoDirectory is the path to the local clone of the project
 	// repository. Injected into the system prompt so the agent knows
 	// where source code lives and can self-correct.
@@ -287,6 +290,10 @@ type ResolvedConfig struct {
 	// Default: auto_transcribe=true, model="tiny", language="", no binary_path.
 	Transcription TranscriptionConfig
 
+	// Schedules is the resolved scheduler config.
+	// Default: enabled=true, max_concurrent=2, timezone="UTC", catchup=false.
+	Schedules ScheduleConfig
+
 	// GithubRepoDirectory is the path to the local clone of the project
 	// repository. Injected into the system prompt.
 	GithubRepoDirectory string
@@ -584,6 +591,33 @@ func LoadConfig(cli CLIFlags) ResolvedConfig {
 		cfg.InteractionMode = v
 	}
 
+	// Schedules env overrides (ODEK_SCHEDULES_*): lets the scheduler be tuned
+	// from the environment, like everything else in a containerised deploy.
+	if v := envBool("SCHEDULES_ENABLED"); v != nil {
+		if cfg.Schedules == nil {
+			cfg.Schedules = &SchedulesConfig{}
+		}
+		cfg.Schedules.Enabled = v
+	}
+	if v := envInt("SCHEDULES_MAX_CONCURRENT"); v > 0 {
+		if cfg.Schedules == nil {
+			cfg.Schedules = &SchedulesConfig{}
+		}
+		cfg.Schedules.MaxConcurrent = v
+	}
+	if v := envString("SCHEDULES_TIMEZONE"); v != "" {
+		if cfg.Schedules == nil {
+			cfg.Schedules = &SchedulesConfig{}
+		}
+		cfg.Schedules.Timezone = v
+	}
+	if v := envBool("SCHEDULES_CATCHUP"); v != nil {
+		if cfg.Schedules == nil {
+			cfg.Schedules = &SchedulesConfig{}
+		}
+		cfg.Schedules.Catchup = v
+	}
+
 	// Telegram env overrides: merge env vars on top of file config.
 	baseTelegram := telegram.DefaultConfig()
 	if cfg.Telegram != nil {
@@ -676,6 +710,7 @@ func LoadConfig(cli CLIFlags) ResolvedConfig {
 		MCPServers:          cfg.MCPServers,
 		Telegram:            resolveTelegram(cfg.Telegram),
 		Transcription:       resolveTranscription(cfg.Transcription),
+		Schedules:           resolveSchedules(cfg.Schedules),
 		GithubRepoDirectory: cfg.GithubRepoDirectory,
 		GithubRepoUrl:       cfg.GithubRepoUrl,
 		InteractionMode:     ifZero(cfg.InteractionMode, "engaging"),
@@ -932,6 +967,49 @@ func resolveTranscription(cfg *TranscriptionConfig) TranscriptionConfig {
 	}
 }
 
+// SchedulesConfig is the file-level scheduler configuration. Tri-state fields
+// use pointers so "unset" is distinguishable from an explicit false.
+type SchedulesConfig struct {
+	Enabled       *bool  `json:"enabled,omitempty"`        // run the embedded scheduler inside `odek telegram` (default true)
+	MaxConcurrent int    `json:"max_concurrent,omitempty"` // max jobs running at once (default 2)
+	Timezone      string `json:"timezone,omitempty"`       // default timezone for jobs with none (default UTC)
+	Catchup       *bool  `json:"catchup,omitempty"`        // global default: run a missed fire once on startup (default false)
+}
+
+// ScheduleConfig is the resolved scheduler config (all fields concrete).
+type ScheduleConfig struct {
+	Enabled       bool
+	MaxConcurrent int
+	Timezone      string
+	Catchup       bool
+}
+
+// resolveSchedules merges file-level scheduler config with defaults.
+func resolveSchedules(cfg *SchedulesConfig) ScheduleConfig {
+	out := ScheduleConfig{
+		Enabled:       true,
+		MaxConcurrent: 2,
+		Timezone:      "UTC",
+		Catchup:       false,
+	}
+	if cfg == nil {
+		return out
+	}
+	if cfg.Enabled != nil {
+		out.Enabled = *cfg.Enabled
+	}
+	if cfg.MaxConcurrent > 0 {
+		out.MaxConcurrent = cfg.MaxConcurrent
+	}
+	if cfg.Timezone != "" {
+		out.Timezone = cfg.Timezone
+	}
+	if cfg.Catchup != nil {
+		out.Catchup = *cfg.Catchup
+	}
+	return out
+}
+
 // overlayFile overlays a higher-priority FileConfig onto a lower-priority one.
 // Only fields that are explicitly set (non-zero for scalars, non-nil for
 // pointers) override the base value.
@@ -1039,6 +1117,9 @@ func overlayFile(base, override FileConfig) FileConfig {
 	if override.Transcription != nil {
 		base.Transcription = override.Transcription
 	}
+	if override.Schedules != nil {
+		base.Schedules = override.Schedules
+	}
 	return base
 }
 
diff --git a/internal/config/schedules_test.go b/internal/config/schedules_test.go
new file mode 100644
index 0000000..6f1db70
--- /dev/null
+++ b/internal/config/schedules_test.go
@@ -0,0 +1,83 @@
+package config
+
+import "testing"
+
+func TestResolveSchedules_Defaults(t *testing.T) {
+	got := resolveSchedules(nil)
+	if !got.Enabled {
+		t.Error("Enabled should default to true")
+	}
+	if got.MaxConcurrent != 2 {
+		t.Errorf("MaxConcurrent = %d, want 2", got.MaxConcurrent)
+	}
+	if got.Timezone != "UTC" {
+		t.Errorf("Timezone = %q, want UTC", got.Timezone)
+	}
+	if got.Catchup {
+		t.Error("Catchup should default to false")
+	}
+}
+
+func TestResolveSchedules_Overrides(t *testing.T) {
+	got := resolveSchedules(&SchedulesConfig{
+		Enabled:       boolPtr(false),
+		MaxConcurrent: 5,
+		Timezone:      "Europe/Berlin",
+		Catchup:       boolPtr(true),
+	})
+	if got.Enabled {
+		t.Error("Enabled should be false")
+	}
+	if got.MaxConcurrent != 5 {
+		t.Errorf("MaxConcurrent = %d, want 5", got.MaxConcurrent)
+	}
+	if got.Timezone != "Europe/Berlin" {
+		t.Errorf("Timezone = %q", got.Timezone)
+	}
+	if !got.Catchup {
+		t.Error("Catchup should be true")
+	}
+}
+
+func TestResolveSchedules_PartialKeepsDefaults(t *testing.T) {
+	// Only MaxConcurrent set; the rest keep defaults.
+	got := resolveSchedules(&SchedulesConfig{MaxConcurrent: 8})
+	if !got.Enabled || got.Timezone != "UTC" || got.Catchup {
+		t.Errorf("partial override disturbed defaults: %+v", got)
+	}
+	if got.MaxConcurrent != 8 {
+		t.Errorf("MaxConcurrent = %d, want 8", got.MaxConcurrent)
+	}
+}
+
+func TestLoadConfig_SchedulesDefault(t *testing.T) {
+	t.Setenv("HOME", t.TempDir())
+	cfg := LoadConfig(CLIFlags{})
+	if !cfg.Schedules.Enabled {
+		t.Error("Schedules.Enabled should default to true")
+	}
+	if cfg.Schedules.MaxConcurrent != 2 {
+		t.Errorf("MaxConcurrent = %d, want 2", cfg.Schedules.MaxConcurrent)
+	}
+}
+
+func TestLoadConfig_SchedulesEnv(t *testing.T) {
+	t.Setenv("HOME", t.TempDir())
+	t.Setenv("ODEK_SCHEDULES_ENABLED", "false")
+	t.Setenv("ODEK_SCHEDULES_MAX_CONCURRENT", "4")
+	t.Setenv("ODEK_SCHEDULES_TIMEZONE", "Europe/Berlin")
+	t.Setenv("ODEK_SCHEDULES_CATCHUP", "true")
+	cfg := LoadConfig(CLIFlags{})
+	if cfg.Schedules.Enabled {
+		t.Error("ODEK_SCHEDULES_ENABLED=false should disable")
+	}
+	if cfg.Schedules.MaxConcurrent != 4 {
+		t.Errorf("MaxConcurrent = %d, want 4", cfg.Schedules.MaxConcurrent)
+	}
+	if cfg.Schedules.Timezone != "Europe/Berlin" {
+		t.Errorf("Timezone = %q, want Europe/Berlin", cfg.Schedules.Timezone)
+	}
+	if !cfg.Schedules.Catchup {
+		t.Error("ODEK_SCHEDULES_CATCHUP=true should enable catchup")
+	}
+}

From 68212be8cc1214cd416d12c8cbaf691db28c14ef Mon Sep 17 00:00:00 2001
From: Rolando Santamaria Maso <kyberneees@gmail.com>
Date: Thu, 4 Jun 2026 21:46:12 +0200
Subject: [PATCH 05/11] feat(docker): retire supercronic, use the native
 scheduler (phase 5)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bot now hosts the in-process scheduler (phase 3), so the container needs no
external cron at all. Remove the supercronic scaffolding entirely.

- Dockerfile: drop the supercronic download (and its ARG TARGETARCH/SHA pin),
  the ~/.crontabs dir, and the cron-entrypoint.sh wrapper. ENTRYPOINT is back to
  ["odek"]. The image no longer needs --build-arg TARGETARCH.
- docker-compose: remove the ./crontab bind mounts from both telegram services.
  Keep init: true (now justified generally — reap agent-spawned children and
  forward SIGTERM), with an honest comment.
- Delete docker/cron-entrypoint.sh and docker/crontab.
- spawnChild: remove the now-dead ODEK_ENTRYPOINT re-exec branch (it only
  existed to restart supercronic via the wrapper). A restarted `odek telegram`
  starts its own embedded scheduler goroutine; gracefulRestart still releases
  the schedule lock so the child re-acquires cleanly. Drop the two obsolete
  ODEK_ENTRYPOINT tests.
- docs: docker/README + .env.example now describe the native scheduler
  (`odek schedule`, jobs in ./.odek/schedules.json); TELEGRAM.md points to
  SCHEDULES.md from its OS-cron section.

Validated: image builds without TARGETARCH, supercronic absent from the image,
ENTRYPOINT runs odek, and `odek schedule next` works inside the container.
Compose config valid; full module suite green, vet + fmt clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .gitignore                |  3 ++
 cmd/odek/telegram.go      | 14 ++++-----
 cmd/odek/telegram_test.go | 27 -----------------
 docker/.env.example       | 12 ++++++--
 docker/Dockerfile         | 41 +++++--------------------
 docker/README.md          | 28 ++++++++---------
 docker/cron-entrypoint.sh | 63 ---------------------------------------
 docker/crontab            | 28 -----------------
 docker/docker-compose.yml | 18 ++++-------
 docs/TELEGRAM.md          |  9 +++++-
 10 files changed, 52 insertions(+), 191 deletions(-)
 delete mode 100755 docker/cron-entrypoint.sh
 delete mode 100644 docker/crontab

diff --git a/.gitignore b/.gitignore
index 23571f9..1a006cd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,6 @@ docker/workspace/*
 !docker/workspace/.gitkeep
 docker/.odek/*
 !docker/.odek/.gitkeep
+
+# Claude Code local artifacts
+.claude/
diff --git a/cmd/odek/telegram.go b/cmd/odek/telegram.go
index 63efabf..5d3ef0e 100644
--- a/cmd/odek/telegram.go
+++ b/cmd/odek/telegram.go
@@ -912,15 +912,11 @@ func spawnChild() error {
 	if err != nil {
 		return fmt.Errorf("executable: %w", err)
 	}
-	// When running inside the Docker container the entrypoint script exports
-	// ODEK_ENTRYPOINT=$0. Re-exec through the wrapper so supercronic is
-	// restarted alongside the new odek process. The wrapper reads
-	// ODEK_SUPERCRONIC_PID (also in childEnv via os.Environ()) and kills the
-	// previous supercronic before starting a new one — no duplicate instances.
-	if ep := os.Getenv("ODEK_ENTRYPOINT"); ep != "" {
-		exe = ep
-	}
-	// Copy args (same as current process).
+	// Copy args (same as current process). The restarted `odek telegram`
+	// process starts its own embedded scheduler goroutine, so there is nothing
+	// extra to re-exec through — the scheduler's lifecycle follows the bot's.
+	// (gracefulRestart releases the schedule lock before os.Exit so the child
+	// re-acquires it cleanly.)
 	argv := make([]string, len(os.Args))
 	copy(argv, os.Args)
 	argv[0] = exe
diff --git a/cmd/odek/telegram_test.go b/cmd/odek/telegram_test.go
index 85cc87f..0c33d5d 100644
--- a/cmd/odek/telegram_test.go
+++ b/cmd/odek/telegram_test.go
@@ -24,33 +24,6 @@ func TestSpawnChild_StartsChildProcess(t *testing.T) {
 	}
 }
 
-func TestSpawnChild_UsesODEKENTRYPOINT(t *testing.T) {
-	// When ODEK_ENTRYPOINT is set (injected by cron-entrypoint.sh inside the
-	// container), spawnChild must use that path as the executable so the
-	// wrapper restarts supercronic alongside the new odek process.
-	// /bin/sh is a universally present executable that accepts arbitrary args
-	// and exits immediately when given -c ''; it lets us verify the branch
-	// without spawning a real odek binary.
-	t.Setenv("ODEK_ENTRYPOINT", "/bin/sh")
-	err := spawnChild()
-	// /bin/sh exits quickly with a non-zero code because os.Args contains
-	// test flags it does not understand, but os.StartProcess itself succeeds
-	// (process started) — the important thing is no "executable not found" error.
-	if err != nil {
-		t.Logf("spawnChild with ODEK_ENTRYPOINT returned: %v", err)
-	}
-}
-
-func TestSpawnChild_ODEKENTRYPOINTEmpty_FallsBackToOdekBinary(t *testing.T) {
-	// When ODEK_ENTRYPOINT is empty (not set), the executable must remain
-	// the current odek binary — not some zero-value path.
-	t.Setenv("ODEK_ENTRYPOINT", "")
-	err := spawnChild()
-	if err != nil {
-		t.Logf("spawnChild (no ODEK_ENTRYPOINT) returned: %v", err)
-	}
-}
-
 func TestSpawnChild_ResolvedAPIKeyInjected(t *testing.T) {
 	// resolvedAPIKey is re-injected into the child env so config.LoadConfig
 	// (which clears env keys) does not leave the child without credentials.
diff --git a/docker/.env.example b/docker/.env.example
index fe102b4..011fb68 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -44,9 +44,17 @@ GIT_COMMITTER_EMAIL=you@example.com
 # ODEK_TELEGRAM_BOT_TOKEN=123456:ABC-your-bot-token
 # ODEK_TELEGRAM_ALLOWED_CHATS=11111111        # comma-separated chat IDs
 # ODEK_TELEGRAM_ALLOWED_USERS=11111111        # comma-separated user IDs (optional)
-# Chat ID that `odek run --deliver` (and supercronic reminders) send to.
-# Required for scheduled reminders; usually your own chat ID from ALLOWED_CHATS.
+# Chat ID that `odek run --deliver` and scheduled reminders send to.
+# Required for Telegram delivery; usually your own chat ID from ALLOWED_CHATS.
 # ODEK_TELEGRAM_DEFAULT_CHAT_ID=11111111
 # ODEK_TELEGRAM_DAILY_TOKEN_BUDGET=2000000    # optional cost cap; 0/unset = unlimited
 # ODEK_TELEGRAM_SESSION_TTL_HOURS=24          # optional
 # ODEK_TELEGRAM_HEALTH_ADDR=0.0.0.0:9090      # optional GET /health endpoint
+
+# ── Scheduled tasks (native cron; see docs/SCHEDULES.md) ─────────────────
+# The Telegram bot runs the scheduler in-process. Manage jobs with
+# `odek schedule add|list|...`; they persist in ./.odek/schedules.json.
+# ODEK_SCHEDULES_ENABLED=true                 # set false to disable the embedded scheduler
+# ODEK_SCHEDULES_MAX_CONCURRENT=2             # max jobs running at once
+# ODEK_SCHEDULES_TIMEZONE=UTC                 # default tz for jobs without their own
+# ODEK_SCHEDULES_CATCHUP=false                # run a missed fire once on startup
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 41f6773..64c1092 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -63,40 +63,19 @@ RUN apk add --no-cache ca-certificates git github-cli bash coreutils curl jq
 #   `adduser -D -u 1000 odek`  with  `useradd -m -u 1000 odek`
 # (the mkdir/chown, ENV HOME, USER, and WORKDIR lines all work unchanged).
 
-# ── supercronic — container-friendly cron for scheduled `odek run` jobs ──
-# Why not busybox crond: crond launches jobs with a SCRUBBED environment, so
-# env_file vars (ODEK_API_KEY, ODEK_TELEGRAM_BOT_TOKEN, …) never reach a cron
-# tick — and it wants root to setuid, clashing with the non-root user below.
-# supercronic runs as a normal user and passes ITS OWN environment through to
-# each job, so `odek run --deliver` from cron sees the same vars as the bot.
-#
-# Pinned to a release + SHA-256 computed from the official GitHub assets, so a
-# tampered or substituted binary fails the build. TARGETARCH is supplied by
-# BuildKit (this Dockerfile already opts in via the syntax= directive above).
-ARG SUPERCRONIC_VERSION=v0.2.46
-ARG TARGETARCH
-RUN set -eu; \
-    arch="${TARGETARCH:?TARGETARCH is empty — build with BuildKit (docker buildx build) or pass --build-arg TARGETARCH=amd64|arm64}"; \
-    case "$arch" in \
-      amd64) sha=5adff01c5a797663948e656d2b61d10932369ee437eb5cb54fa872b2960f222b ;; \
-      arm64) sha=c0576a8eb092e3f79108ed0a2155a25c7766af78456e5a6070e54757ef513bfe ;; \
-      *) echo "supercronic: unsupported TARGETARCH=$arch" >&2; exit 1 ;; \
-    esac; \
-    curl -fsSL "https://github.com/aptible/supercronic/releases/download/${SUPERCRONIC_VERSION}/supercronic-linux-${arch}" \
-      -o /usr/local/bin/supercronic; \
-    echo "${sha}  /usr/local/bin/supercronic" | sha256sum -c -; \
-    chmod +x /usr/local/bin/supercronic
+# Scheduled tasks ("cron") are handled natively by odek itself — the bot hosts
+# an in-process scheduler, and `odek schedule daemon` runs one headless. No
+# external cron binary is bundled; see docs/SCHEDULES.md.
 
 # Run as a non-root user — defense in depth even inside the container.
 # Pre-create ~/.odek owned by the user so it's writable for config, sessions,
-# and the Telegram lock (whether backed by an image dir or a mounted folder).
-# ~/.crontabs holds the (optional) bind-mounted crontab read by supercronic.
+# the Telegram lock, and schedules.json (whether backed by an image dir or a
+# mounted folder).
 RUN adduser -D -u 1000 odek \
- && mkdir -p /home/odek/.odek /home/odek/.crontabs /workspace \
- && chown -R odek:odek /home/odek/.odek /home/odek/.crontabs /workspace
+ && mkdir -p /home/odek/.odek /workspace \
+ && chown -R odek:odek /home/odek/.odek /workspace
 
 COPY --from=build /out/odek /usr/local/bin/odek
-COPY --chmod=0755 docker/cron-entrypoint.sh /usr/local/bin/cron-entrypoint.sh
 
 # Docker does NOT set $HOME from USER, but Odek resolves ~/.odek via $HOME.
 # Set it explicitly so config.json, sessions, and the Telegram lock land in
@@ -105,8 +84,4 @@ ENV HOME=/home/odek
 USER odek
 WORKDIR /workspace
 
-# The wrapper starts supercronic in the background IFF a crontab is mounted,
-# then `exec`s odek — so services without a crontab behave exactly as before
-# (odek stays the container's main process; signals and the singleton lock are
-# unchanged). The compose `command:` (serve/telegram/run …) flows through as $@.
-ENTRYPOINT ["/usr/local/bin/cron-entrypoint.sh"]
+ENTRYPOINT ["odek"]
diff --git a/docker/README.md b/docker/README.md
index e597f1a..5c9f35c 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -25,8 +25,6 @@ docker/
 ├── config.restricted.json   # Restricted permission policy
 ├── config.godmode.json      # Godmode (YOLO) permission policy
 ├── .env.example             # copy to .env, add your API key
-├── cron-entrypoint.sh       # starts supercronic (if a crontab is mounted), then execs odek
-├── crontab                  # scheduled reminders (edit + uncomment to enable)
 └── workspace/               # the dir the agent works in (mounted in)
 ```
 
@@ -103,26 +101,26 @@ local `./.odek` folder — an external host folder, just like `./workspace`.
 
 ### Scheduled reminders (cron)
 
-The Telegram profiles bundle [supercronic](https://github.com/aptible/supercronic), a
-container-friendly cron. Unlike the classic `crond`, it runs as the non-root user **and
-passes the container environment to each job** — so a scheduled `odek run --deliver`
-sees the same `.env` vars (API key, bot token) the bot does. No separate host crontab,
-no daemon juggling.
+The Telegram bot hosts odek's **native, in-process scheduler** — no extra
+container, no external cron. Because it runs inside the bot, a scheduled task
+sees the same resolved config (API key, bot token, default chat) the bot does.
+Full guide: [../docs/SCHEDULES.md](../docs/SCHEDULES.md).
 
 1. In `.env`, set **`ODEK_TELEGRAM_DEFAULT_CHAT_ID`** — the chat reminders are sent to
    (usually your own ID, the same as `ODEK_TELEGRAM_ALLOWED_CHATS`).
-2. Edit `crontab` and uncomment/add jobs (standard 5-field syntax; min granularity is
-   1 minute). Example — a weekday stand-up nudge:
+2. Add a job. Either run the CLI inside the container:
 
-   ```cron
-   0 9 * * 1-5  /usr/local/bin/odek run --deliver "Reminder: stand-up in 15 minutes."
+   ```bash
+   docker compose --profile telegram-restricted exec odek-telegram-restricted \
+     odek schedule add --cron "0 9 * * 1-5" --deliver telegram "Stand-up in 15 minutes"
    ```
 
-3. (Re)start a Telegram profile. On boot you'll see `cron-entrypoint: starting
-   supercronic …` in the logs; each job's result is delivered to your chat.
+   …or edit `./.odek/schedules.json` on the host directly. Jobs persist in the
+   `./.odek` volume and the running bot picks up changes automatically.
+3. Inspect with `odek schedule list` / `odek schedule next <id>`.
 
-Times are UTC unless you set `TZ` in `.env`. An empty/all-commented `crontab` is fine —
-supercronic simply schedules nothing.
+Don't run a separate `odek schedule daemon` against the same `./.odek` while the
+bot is up — a shared lock makes the second one defer, so jobs never double-fire.
 
 ## Verify the profiles differ
 
diff --git a/docker/cron-entrypoint.sh b/docker/cron-entrypoint.sh
deleted file mode 100755
index 47ee4fb..0000000
--- a/docker/cron-entrypoint.sh
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/bin/sh
-# cron-entrypoint.sh — container entrypoint for the odek image.
-#
-# If a crontab is mounted, start supercronic in the background, then hand the
-# container over to the real odek command (serve / telegram / run / …) via
-# `exec` so odek remains the main process: signals, graceful restart, and the
-# Telegram singleton lock all behave exactly as they did before this wrapper.
-#
-# supercronic inherits THIS process's environment and passes it to every cron
-# job, so a scheduled `odek run --deliver` sees the same env_file vars
-# (ODEK_API_KEY, ODEK_TELEGRAM_BOT_TOKEN, ODEK_TELEGRAM_DEFAULT_CHAT_ID, …)
-# that the bot does. That is the whole reason for using supercronic over the
-# classic crond, which scrubs the environment from its jobs.
-set -eu
-
-# Path to the crontab. Overridable so an operator can relocate the mount.
-CRONTAB="${ODEK_CRONTAB:-/home/odek/.crontabs/crontab}"
-
-# Graceful-restart support: odek's /restart command re-execs via this script
-# (see ODEK_ENTRYPOINT below). Kill any supercronic from the previous run so we
-# never end up with two instances scheduling the same crontab concurrently.
-if [ -n "${ODEK_SUPERCRONIC_PID:-}" ]; then
-    kill "$ODEK_SUPERCRONIC_PID" 2>/dev/null || true
-    unset ODEK_SUPERCRONIC_PID
-fi
-
-if [ -d "$CRONTAB" ]; then
-    # Docker creates a directory when the bind-mount source doesn't exist on the
-    # host. This is almost always a misconfiguration — warn loudly rather than
-    # silently skipping so the operator knows why reminders aren't firing.
-    echo "cron-entrypoint: WARNING: $CRONTAB is a directory, not a file" >&2
-    echo "cron-entrypoint: Docker created it because the host path was missing." >&2
-    echo "cron-entrypoint: Fix: remove the directory on the host and create the file." >&2
-    echo "cron-entrypoint: Skipping supercronic — cron jobs will NOT run." >&2
-elif [ -f "$CRONTAB" ]; then
-    echo "cron-entrypoint: starting supercronic for $CRONTAB" >&2
-    # -passthrough-logs keeps each job's own stdout/stderr intact in the
-    # container log alongside supercronic's scheduling lines.
-    supercronic -passthrough-logs "$CRONTAB" &
-    ODEK_SUPERCRONIC_PID=$!
-    export ODEK_SUPERCRONIC_PID
-    # Brief liveness check: supercronic parses the crontab at startup and exits
-    # immediately on a syntax error or missing binary. Neither is recoverable at
-    # runtime, so catching it here produces a clear warning rather than silent
-    # non-delivery. set -e does not cover backgrounded processes, so we check
-    # explicitly after a short window.
-    sleep 1
-    if ! kill -0 "$ODEK_SUPERCRONIC_PID" 2>/dev/null; then
-        echo "cron-entrypoint: WARNING: supercronic exited immediately — cron jobs will NOT run" >&2
-        unset ODEK_SUPERCRONIC_PID
-    fi
-else
-    echo "cron-entrypoint: no crontab at $CRONTAB — skipping supercronic" >&2
-fi
-
-# Advertise this script's own path so spawnChild (odek's /restart handler) can
-# re-exec through the wrapper instead of the bare binary. Without this, a
-# graceful restart would skip supercronic entirely.
-export ODEK_ENTRYPOINT="$0"
-
-# Default to printing usage if no command was provided (matches the previous
-# `ENTRYPOINT ["odek"]` behaviour for a bare `docker run`).
-exec odek "$@"
diff --git a/docker/crontab b/docker/crontab
deleted file mode 100644
index ffd5e03..0000000
--- a/docker/crontab
+++ /dev/null
@@ -1,28 +0,0 @@
-# odek reminders — supercronic crontab (standard 5-field cron syntax).
-#
-# This file is bind-mounted read-only into the container at
-# /home/odek/.crontabs/crontab (see docker-compose.yml). When present, the
-# entrypoint starts supercronic, which runs each job below on schedule.
-#
-# Each reminder is just `odek run --deliver "<task>"`. --deliver sends the
-# agent's final response to the Telegram chat in ODEK_TELEGRAM_DEFAULT_CHAT_ID
-# (set in .env). supercronic passes the container environment to every job, so
-# ODEK_API_KEY and the bot token are available here with no extra wiring.
-#
-#   ┌ minute (0-59)
-#   │ ┌ hour (0-23)
-#   │ │ ┌ day-of-month (1-31)
-#   │ │ │ ┌ month (1-12)
-#   │ │ │ │ ┌ day-of-week (0-6, Sun=0)
-#   │ │ │ │ │
-#   * * * * *  command
-#
-# Times are UTC unless you set TZ in .env. Use the absolute binary path.
-#
-# Uncomment / edit the examples below to enable reminders:
-
-# Weekdays at 09:00 — stand-up nudge:
-# 0 9 * * 1-5  /usr/local/bin/odek run --deliver "Reminder: daily stand-up starts in 15 minutes."
-
-# Every day at 18:30 — end-of-day wrap-up prompt:
-# 30 18 * * *  /usr/local/bin/odek run --deliver "End of day: summarize what I shipped and what's open for tomorrow."
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index e429760..dc4d5cc 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -62,20 +62,15 @@ services:
     image: odek:local
     env_file: .env
     command: ["telegram"]
-    # init: true adds Docker's built-in init (tini) as PID 1. This gives us:
-    # - Zombie reaping: supercronic child processes are reaped when they exit.
-    # - Signal forwarding: SIGTERM from `docker stop` reaches all children,
-    #   giving in-flight cron jobs a clean shutdown window.
-    # - Graceful restart safety: when odek exits during /restart, the spawned
-    #   child is reparented to the init rather than dying with odek.
+    # init: true runs Docker's built-in init (tini) as PID 1, which reaps any
+    # child processes the agent spawns (shell tool, sub-agents) and forwards
+    # SIGTERM from `docker stop` for a clean shutdown. The bot also hosts the
+    # native scheduler (see docs/SCHEDULES.md) — jobs live in ./.odek/schedules.json.
     init: true
     volumes:
       - ./workspace:/workspace
       - ./.odek:/home/odek/.odek
       - ./config.restricted.json:/home/odek/.odek/config.json:ro
-      # Scheduled reminders: supercronic runs the jobs in ./crontab and
-      # delivers results to ODEK_TELEGRAM_DEFAULT_CHAT_ID via `--deliver`.
-      - ./crontab:/home/odek/.crontabs/crontab:rw
     restart: unless-stopped
 
   # ── Telegram bot — Godmode (no prompts; unrestricted) ──
@@ -89,12 +84,9 @@ services:
     image: odek:local
     env_file: .env
     command: ["telegram"]
-    init: true   # zombie reaping + SIGTERM forwarding (see telegram-restricted)
+    init: true   # reap agent child processes + forward SIGTERM (see telegram-restricted)
     volumes:
       - ./workspace:/workspace
       - ./.odek:/home/odek/.odek
       - ./config.godmode.json:/home/odek/.odek/config.json:ro
-      # Scheduled reminders: supercronic runs the jobs in ./crontab and
-      # delivers results to ODEK_TELEGRAM_DEFAULT_CHAT_ID via `--deliver`.
-      - ./crontab:/home/odek/.crontabs/crontab:rw
     restart: unless-stopped
diff --git a/docs/TELEGRAM.md b/docs/TELEGRAM.md
index 29495a3..27ecdad 100644
--- a/docs/TELEGRAM.md
+++ b/docs/TELEGRAM.md
@@ -429,7 +429,14 @@ A fire-and-forget goroutine sends `sendChatAction("typing")` every 4 seconds whi
 
 ## Cron Integration
 
-odek can run fully offline agent tasks and deliver the result to Telegram, enabling system cron-based scheduled agent workflows — no daemon or scheduler required.
+> **Prefer the native scheduler.** odek now has a built-in, in-process
+> scheduler — `odek schedule add --cron "..." --deliver telegram "..."`. The
+> bot hosts it automatically, so there's no host crontab to manage and a
+> scheduled task sees the same resolved config the bot does. See
+> [SCHEDULES.md](SCHEDULES.md). The OS-cron approach below still works and is
+> handy when you'd rather drive scheduling from the host.
+
+odek can also run fully offline agent tasks from system cron and deliver the result to Telegram with `--deliver` — no long-running odek process required.
 
 ### How it works
 

From b48a3955b59d11a4601a32794ed18017dcf7889b Mon Sep 17 00:00:00 2001
From: Rolando Santamaria Maso <kyberneees@gmail.com>
Date: Thu, 4 Jun 2026 22:09:53 +0200
Subject: [PATCH 06/11] fix(schedule): address code-review findings

Ten findings from the high-effort review of the native scheduler:

#1 (security) Unattended tasks could silently run dangerous ops: a nil approver
with no TTY falls back to NonInteractiveAction(), which defaults to ALLOW. Set a
"deny" floor in runTaskHeadless when the policy doesn't explicitly choose one
(mirrors sub-agent hardening); explicit allow/deny (godmode/restricted) honoured.

#2 (correctness) cron parseField flagged a dom/dow field as a wildcard whenever
it merely started with "*", so a list like "*/2,15" broke the Vixie union rule
(AND instead of OR). Now star is set only when EVERY comma item is wildcard-based.

#3 (correctness) The Run loop did a blocking `sem <- {}` in fireDue, so
MaxConcurrent hung jobs wedged shutdown/reload. Now the sem acquire selects on
ctx (clearing the overlap guard for undispatched jobs), and each run is bounded
by Options.RunTimeout (default 15m).

#4 (correctness) Budget pre-check used CheckDailyBudget(1), which persists +1 per
fire. Switched to read-only DailyTokenUsage() for the gate; actual cost still
billed after the run.

#5 (robustness) acquireScheduleLock now does a /proc/<pid>/cmdline identity check
so a recycled PID can't make the scheduler refuse to start forever; pid file
tightened to 0600.

#6 (correctness) Missed-run detection trusted a persisted NextRun even after the
cron changed while down. RunState now records the schedule signature; reconcile
ignores NextRun when the sig differs (no spurious catchup/skip).

#7 (efficiency) MCP servers were reconnected per fire. They're now connected once
at daemon/bot startup and shared across fires (the MCP client is mutex-safe);
builtin tools stay fresh per fire.

#8 (efficiency) reconcile re-parsed cron + LoadLocation for unchanged jobs every
reload. The sig short-circuit now runs before compile().

#9 (cleanup) Hoisted the repeated `cfg.Schedules == nil` guard in loader.go.

#10 (cleanup) Daemon reuses telegram.NewFileLogger instead of a hand-rolled
stderrLogger (deleted).

Tests: cron union for step-lists + plain-step-still-wildcard; cron-changed-while-
down (no false catchup); fireDue unblocks on ctx cancel with a full semaphore.
Full suite green under -race, vet + fmt clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 cmd/odek/schedule.go                | 128 ++++++++++++++++++----------
 internal/config/loader.go           |  16 +---
 internal/schedule/cronexpr.go       |  13 ++-
 internal/schedule/cronexpr_test.go  |  31 +++++++
 internal/schedule/scheduler.go      |  77 +++++++++++++----
 internal/schedule/scheduler_test.go |  65 +++++++++++++-
 internal/schedule/types.go          |   1 +
 7 files changed, 250 insertions(+), 81 deletions(-)

diff --git a/cmd/odek/schedule.go b/cmd/odek/schedule.go
index 38a07ad..5be5654 100644
--- a/cmd/odek/schedule.go
+++ b/cmd/odek/schedule.go
@@ -263,8 +263,14 @@ func scheduleRunNow(args []string) error {
 	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
 	defer cancel()
 
+	mcpTools, mcpCleanup, err := buildScheduledMCPTools(resolved)
+	if err != nil {
+		return err
+	}
+	defer mcpCleanup()
+
 	fmt.Fprintf(os.Stderr, "Running %s (%s)…\n", job.ID, job.Name)
-	result, _, err := runTaskHeadless(ctx, resolved, system, job.Task)
+	result, _, err := runTaskHeadless(ctx, resolved, system, job.Task, mcpTools)
 	if err != nil {
 		return fmt.Errorf("run: %w", err)
 	}
@@ -290,10 +296,18 @@ func scheduleDaemon(_ []string) error {
 		return err
 	}
 
+	// Connect MCP servers once (if any) and share them across every fire.
+	mcpTools, mcpCleanup, err := buildScheduledMCPTools(resolved)
+	if err != nil {
+		return err
+	}
+	defer mcpCleanup()
+
+	logger := telegram.NewFileLogger(telegram.LogInfo, "") // "" → stderr
 	sched := schedule.New(st,
-		agentRunner{resolved: resolved, system: system},
+		agentRunner{resolved: resolved, system: system, mcpTools: mcpTools},
 		cliDeliverer{resolved: resolved},
-		schedulerOptions(resolved.Schedules, stderrLogger{}),
+		schedulerOptions(resolved.Schedules, logger),
 	)
 
 	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
@@ -318,14 +332,15 @@ func scheduleDaemon(_ []string) error {
 // ── Runner / Deliverer implementations ──────────────────────────────────
 
 // agentRunner runs each job's task through a fresh, headless odek agent using
-// the config resolved at daemon startup.
+// the config resolved at daemon startup. mcpTools are connected once and reused.
 type agentRunner struct {
 	resolved config.ResolvedConfig
 	system   string
+	mcpTools []odek.Tool
 }
 
 func (r agentRunner) Run(ctx context.Context, job schedule.Job) (string, int64, error) {
-	return runTaskHeadless(ctx, r.resolved, r.system, job.Task)
+	return runTaskHeadless(ctx, r.resolved, r.system, job.Task, r.mcpTools)
 }
 
 // cliDeliverer routes results to stdout, a log file, or Telegram, honouring a
@@ -378,18 +393,20 @@ type telegramRunner struct {
 	resolved config.ResolvedConfig
 	system   string
 	bot      *telegram.Bot
+	mcpTools []odek.Tool
 }
 
 func (r telegramRunner) Run(ctx context.Context, job schedule.Job) (string, int64, error) {
 	budgeted := r.resolved.Telegram.DailyTokenBudget > 0
 	if budgeted {
-		// Pre-flight: refuse if the budget is already exhausted so a runaway
-		// schedule can't keep spending. Mirrors the chat-message pre-check.
-		if err := r.bot.CheckDailyBudget(1); err != nil {
-			return "", 0, fmt.Errorf("daily token budget exhausted: %w", err)
+		// Pre-flight: refuse if the budget is already exhausted, WITHOUT mutating
+		// it (DailyTokenUsage is read-only; CheckDailyBudget would charge the
+		// probe amount on every fire).
+		if used, limit := r.bot.DailyTokenUsage(); limit > 0 && used >= limit {
+			return "", 0, fmt.Errorf("daily token budget exhausted (%d/%d tokens)", used, limit)
 		}
 	}
-	result, tokens, err := runTaskHeadless(ctx, r.resolved, r.system, job.Task)
+	result, tokens, err := runTaskHeadless(ctx, r.resolved, r.system, job.Task, r.mcpTools)
 	if err == nil && budgeted && tokens > 0 {
 		_ = r.bot.CheckDailyBudget(tokens) // bill the run (best-effort)
 	}
@@ -438,8 +455,15 @@ func startSchedulerForBot(ctx context.Context, bot *telegram.Bot, resolved confi
 		unlock()
 		return func() {}
 	}
+	// Connect MCP servers once and share across fires. A failure here must not
+	// take down the bot — log and continue without MCP tools for scheduled jobs.
+	mcpTools, mcpCleanup, err := buildScheduledMCPTools(resolved)
+	if err != nil {
+		log.Error("schedule: MCP connect failed, scheduled jobs run without MCP tools", "error", err)
+		mcpTools, mcpCleanup = nil, func() {}
+	}
 	sched := schedule.New(st,
-		telegramRunner{resolved: resolved, system: system, bot: bot},
+		telegramRunner{resolved: resolved, system: system, bot: bot, mcpTools: mcpTools},
 		telegramDeliverer{bot: bot, fallback: cliDeliverer{resolved: resolved}},
 		schedulerOptions(resolved.Schedules, log),
 	)
@@ -458,6 +482,7 @@ func startSchedulerForBot(ctx context.Context, bot *telegram.Bot, resolved confi
 
 	return func() {
 		scheduleUnlockRef = nil
+		mcpCleanup()
 		unlock()
 	}
 }
@@ -466,22 +491,27 @@ func startSchedulerForBot(ctx context.Context, bot *telegram.Bot, resolved confi
 
 // runTaskHeadless builds a fresh agent with no terminal renderer and no
 // interactive approver and runs one task to completion, returning the final
-// text. It mirrors run()'s construction but for unattended use: the danger
-// policy from resolved.Dangerous governs what the task may do (no human is
-// present to approve), exactly as for `odek run` invoked non-interactively.
-// Token usage is not yet surfaced (returns 0); the Telegram integration will
-// supply its own Runner that accounts for the daily budget.
-func runTaskHeadless(ctx context.Context, resolved config.ResolvedConfig, system, task string) (string, int64, error) {
-	tools := builtinTools(resolved.Dangerous, nil, nil, resolved.MaxConcurrency, resolved.APIKey, resolved.Transcription, nil)
-
-	if len(resolved.MCPServers) > 0 {
-		cleanup, err := loadMCPTools(resolved.MCPServers, &tools)
-		if err != nil {
-			return "", 0, fmt.Errorf("mcp: %w", err)
-		}
-		defer cleanup()
+// text and the tokens it consumed. mcpTools are pre-connected MCP tools shared
+// across fires (the builtin tools are rebuilt per call — they're cheap and
+// must not be shared concurrently); pass nil for none.
+//
+// Safety: a scheduled task runs unattended, so there is no human to answer an
+// approval prompt. builtinTools is given a nil approver, which means a
+// Prompt-class op would fall back to DangerousConfig.NonInteractiveAction() —
+// and that DEFAULTS TO ALLOW when the policy doesn't say otherwise. To avoid
+// silently granting dangerous operations when no policy is configured, we set a
+// "deny" floor whenever NonInteractive is unset, matching the hardening that
+// sub-agents apply. An explicit allow/deny (e.g. the godmode profile, or the
+// restricted profile's "deny") is honoured unchanged.
+func runTaskHeadless(ctx context.Context, resolved config.ResolvedConfig, system, task string, mcpTools []odek.Tool) (string, int64, error) {
+	if resolved.Dangerous.NonInteractive == nil {
+		deny := "deny"
+		resolved.Dangerous.NonInteractive = &deny
 	}
 
+	tools := builtinTools(resolved.Dangerous, nil, nil, resolved.MaxConcurrency, resolved.APIKey, resolved.Transcription, nil)
+	tools = append(tools, mcpTools...)
+
 	// Capture cumulative token usage from the final iteration so the Runner
 	// can report it (the engine logs it; the bot bills it against the budget).
 	// RunWithMessages drives the loop synchronously on this goroutine, so the
@@ -520,6 +550,23 @@ func runTaskHeadless(ctx context.Context, resolved config.ResolvedConfig, system
 	return result, tokens, err
 }
 
+// buildScheduledMCPTools connects the configured MCP servers ONCE so the
+// connections can be reused across every scheduled fire (the MCP client
+// serialises calls with a mutex, so sharing across concurrent runs is safe),
+// instead of reconnecting per fire. Returns the tools, a cleanup to close the
+// connections, and any error. With no MCP servers it's a no-op.
+func buildScheduledMCPTools(resolved config.ResolvedConfig) ([]odek.Tool, func(), error) {
+	if len(resolved.MCPServers) == 0 {
+		return nil, func() {}, nil
+	}
+	var tools []odek.Tool
+	cleanup, err := loadMCPTools(resolved.MCPServers, &tools)
+	if err != nil {
+		return nil, func() {}, fmt.Errorf("mcp: %w", err)
+	}
+	return tools, cleanup, nil
+}
+
 // ── helpers ─────────────────────────────────────────────────────────────
 
 // schedulerOptions builds engine options from the resolved config, falling
@@ -615,22 +662,6 @@ func appendScheduleLog(job schedule.Job, result string) error {
 	return err
 }
 
-// stderrLogger is a minimal schedule.Logger that writes key/value lines to
-// stderr, matching the daemon's foreground logging style.
-type stderrLogger struct{}
-
-func (stderrLogger) Info(msg string, kv ...any)  { logKV("INFO", msg, kv) }
-func (stderrLogger) Error(msg string, kv ...any) { logKV("ERROR", msg, kv) }
-
-func logKV(level, msg string, kv []any) {
-	var b strings.Builder
-	fmt.Fprintf(&b, "%s schedule: %s", level, msg)
-	for i := 0; i+1 < len(kv); i += 2 {
-		fmt.Fprintf(&b, " %v=%v", kv[i], kv[i+1])
-	}
-	fmt.Fprintln(os.Stderr, b.String())
-}
-
 // acquireScheduleLock prevents two schedule daemons from firing the same jobs.
 // Unlike the Telegram lock it refuses to start when a live daemon is found
 // rather than killing it — a running scheduler should not be silently usurped.
@@ -644,13 +675,22 @@ func acquireScheduleLock() (func(), error) {
 		return nil, err
 	}
 	if data, err := os.ReadFile(pidFile); err == nil {
-		if pid, _ := strconv.Atoi(strings.TrimSpace(string(data))); pid > 1 {
-			if err := syscall.Kill(pid, 0); err == nil {
+		if pid, _ := strconv.Atoi(strings.TrimSpace(string(data))); pid > 1 && syscall.Kill(pid, 0) == nil {
+			// The PID is alive, but after an unclean exit the OS may have recycled
+			// it onto an unrelated process. Confirm it's actually an odek process
+			// (mirrors the Telegram instance lock) before refusing — otherwise a
+			// recycled PID would make us refuse to start forever. On platforms
+			// without /proc the read fails and we stay conservative (treat as live).
+			owned := true
+			if cmdline, err := os.ReadFile(fmt.Sprintf("/proc/%d/cmdline", pid)); err == nil {
+				owned = strings.Contains(string(cmdline), "odek")
+			}
+			if owned {
 				return nil, fmt.Errorf("another schedule daemon is already running (PID %d)", pid)
 			}
 		}
 	}
-	if err := os.WriteFile(pidFile, []byte(strconv.Itoa(os.Getpid())), 0644); err != nil {
+	if err := os.WriteFile(pidFile, []byte(strconv.Itoa(os.Getpid())), 0600); err != nil {
 		return nil, err
 	}
 	return func() { os.Remove(pidFile) }, nil
diff --git a/internal/config/loader.go b/internal/config/loader.go
index 186add9..899ebc0 100644
--- a/internal/config/loader.go
+++ b/internal/config/loader.go
@@ -593,28 +593,20 @@ func LoadConfig(cli CLIFlags) ResolvedConfig {
 
 	// Schedules env overrides (ODEK_SCHEDULES_*): lets the scheduler be tuned
 	// from the environment, like everything else in a containerised deploy.
+	// Allocate once — an all-zero SchedulesConfig resolves identically to nil.
+	if cfg.Schedules == nil {
+		cfg.Schedules = &SchedulesConfig{}
+	}
 	if v := envBool("SCHEDULES_ENABLED"); v != nil {
-		if cfg.Schedules == nil {
-			cfg.Schedules = &SchedulesConfig{}
-		}
 		cfg.Schedules.Enabled = v
 	}
 	if v := envInt("SCHEDULES_MAX_CONCURRENT"); v > 0 {
-		if cfg.Schedules == nil {
-			cfg.Schedules = &SchedulesConfig{}
-		}
 		cfg.Schedules.MaxConcurrent = v
 	}
 	if v := envString("SCHEDULES_TIMEZONE"); v != "" {
-		if cfg.Schedules == nil {
-			cfg.Schedules = &SchedulesConfig{}
-		}
 		cfg.Schedules.Timezone = v
 	}
 	if v := envBool("SCHEDULES_CATCHUP"); v != nil {
-		if cfg.Schedules == nil {
-			cfg.Schedules = &SchedulesConfig{}
-		}
 		cfg.Schedules.Catchup = v
 	}
 
diff --git a/internal/schedule/cronexpr.go b/internal/schedule/cronexpr.go
index 79598d5..9a37c16 100644
--- a/internal/schedule/cronexpr.go
+++ b/internal/schedule/cronexpr.go
@@ -143,15 +143,20 @@ func expandMacro(m string) (string, error) {
 }
 
 // parseField parses one cron field into a bitset over [min,max]. star reports
-// whether the field began with "*" (a wildcard), which the caller needs for
-// the dom/dow union rule. names, if non-nil, maps lowercased symbolic names
-// (e.g. "mon") to values.
+// whether the field is an unrestricted wildcard — true only when EVERY
+// comma-separated item is wildcard-based ("*" or "*/n"). The caller needs this
+// for the Vixie dom/dow union rule: a field like "*/2,15" is restricted (it
+// lists an explicit member), so it must NOT count as a wildcard even though it
+// starts with "*". names, if non-nil, maps lowercased symbolic names to values.
 func parseField(field string, min, max int, names map[string]int) (mask uint64, star bool, err error) {
 	if field == "" {
 		return 0, false, fmt.Errorf("empty field")
 	}
-	star = strings.HasPrefix(field, "*")
+	star = true
 	for item := range strings.SplitSeq(field, ",") {
+		if !strings.HasPrefix(item, "*") {
+			star = false
+		}
 		m, err := parseItem(item, min, max, names)
 		if err != nil {
 			return 0, false, err
diff --git a/internal/schedule/cronexpr_test.go b/internal/schedule/cronexpr_test.go
index c541858..70a9da6 100644
--- a/internal/schedule/cronexpr_test.go
+++ b/internal/schedule/cronexpr_test.go
@@ -104,6 +104,37 @@ func TestNext_DomDowUnion(t *testing.T) {
 	}
 }
 
+func TestNext_StepListKeepsUnion(t *testing.T) {
+	// A dom field that is a comma-list beginning with a step (`*/2,15`) is
+	// RESTRICTED, not a wildcard, so the Vixie union with a restricted dow must
+	// apply: fire on (odd day or 15th) OR any Friday.
+	s := mustParse(t, "0 0 */2,15 * 5")
+	// 2026-06-12 is a Friday and day 12 (even, not in dom set, not the 15th).
+	// Under the union it must still fire because it's a Friday.
+	fri12 := time.Date(2026, 6, 12, 0, 0, 0, 0, time.UTC)
+	if !s.Matches(fri12) {
+		t.Errorf("expected fire on Friday 2026-06-12 via dom/dow union, got no match")
+	}
+	// 2026-06-01 is a Monday and day 1 (odd → in the dom set) → must fire.
+	mon1 := time.Date(2026, 6, 1, 0, 0, 0, 0, time.UTC)
+	if !s.Matches(mon1) {
+		t.Errorf("expected fire on day 1 (odd, in dom set), got no match")
+	}
+}
+
+func TestParse_PlainStepIsStillWildcard(t *testing.T) {
+	// A pure step (`*/2`) IS a wildcard for the coupling rule, so `0 0 */2 * 5`
+	// (dom wildcard-ish, dow restricted) uses intersection: only odd days that
+	// are Fridays. 2026-06-12 (Fri, even) must NOT fire; 2026-06-05 (Fri, odd) must.
+	s := mustParse(t, "0 0 */2 * 5")
+	if s.Matches(time.Date(2026, 6, 12, 0, 0, 0, 0, time.UTC)) {
+		t.Errorf("`*/2` should be wildcard → intersection; even Friday must not fire")
+	}
+	if !s.Matches(time.Date(2026, 6, 5, 0, 0, 0, 0, time.UTC)) {
+		t.Errorf("odd Friday should fire under intersection")
+	}
+}
+
 func TestNext_DomRestrictedDowStar(t *testing.T) {
 	// dow is "*" → intersection: only the 1st and 15th.
 	s := mustParse(t, "0 0 1,15 * *")
diff --git a/internal/schedule/scheduler.go b/internal/schedule/scheduler.go
index ce7579a..07df56c 100644
--- a/internal/schedule/scheduler.go
+++ b/internal/schedule/scheduler.go
@@ -40,6 +40,7 @@ type Options struct {
 	DefaultTZ     *time.Location   // timezone for jobs with no Timezone set (default UTC)
 	Catchup       bool             // global default: run a job once if a fire was missed while down
 	ReloadEvery   time.Duration    // how often to poll schedules.json mtime for changes (default 30s)
+	RunTimeout    time.Duration    // max wall-clock per job run (default 15m; <=0 keeps the engine default)
 	Logger        Logger           // defaults to NopLogger
 	Now           func() time.Time // injectable clock for decisions (default time.Now); tests override
 }
@@ -47,8 +48,9 @@ type Options struct {
 const (
 	defaultMaxConcurrent = 2
 	defaultReloadEvery   = 30 * time.Second
-	maxSleep             = time.Hour // cap on a single idle sleep so the loop stays responsive
-	resultPreviewRunes   = 280       // how much of a result we persist as LastResult
+	defaultRunTimeout    = 15 * time.Minute // bounds a single job so a hung run can't hold a slot forever
+	maxSleep             = time.Hour        // cap on a single idle sleep so the loop stays responsive
+	resultPreviewRunes   = 280              // how much of a result we persist as LastResult
 )
 
 // Scheduler fires jobs from a Store on their cron schedule, runs them through
@@ -84,6 +86,9 @@ func New(store *Store, runner Runner, deliverer Deliverer, opts Options) *Schedu
 	if opts.ReloadEvery <= 0 {
 		opts.ReloadEvery = defaultReloadEvery
 	}
+	if opts.RunTimeout == 0 {
+		opts.RunTimeout = defaultRunTimeout
+	}
 	if opts.Logger == nil {
 		opts.Logger = NopLogger{}
 	}
@@ -167,29 +172,40 @@ func (s *Scheduler) reconcile(now time.Time) {
 		if !job.Enabled {
 			continue
 		}
+		newSig := jobSig(job)
+
+		// Unchanged and already scheduled — leave its next-fire intact (so an
+		// unrelated edit doesn't shift this job) and skip the relatively
+		// expensive re-parse + timezone load entirely.
+		if _, tracked := s.next[job.ID]; tracked && s.sig[job.ID] == newSig {
+			seen[job.ID] = true
+			s.jobs[job.ID] = job
+			s.runs[job.ID] = state[job.ID].Runs
+			continue
+		}
+
 		sched, err := compile(job, s.opts.DefaultTZ)
 		if err != nil {
 			// A malformed job is skipped, not fatal — one bad entry must not
-			// stop every other schedule.
+			// stop every other schedule. Leaving it out of `seen` also drops a
+			// previously-valid job that was just edited into an invalid one.
 			s.log.Error("scheduler: skipping job with invalid schedule", "id", job.ID, "name", job.Name, "error", err)
 			continue
 		}
 		seen[job.ID] = true
 		s.jobs[job.ID] = job
 		s.compiled[job.ID] = sched
-		s.runs[job.ID] = state[job.ID].Runs
-
-		newSig := job.Cron + "|" + job.Timezone
-		if _, tracked := s.next[job.ID]; tracked && s.sig[job.ID] == newSig {
-			// Unchanged and already scheduled — leave its next-fire intact so an
-			// unrelated file edit doesn't shift this job.
-			continue
-		}
 		s.sig[job.ID] = newSig
+		s.runs[job.ID] = state[job.ID].Runs
 
 		// Determine the first fire for a newly-seen or changed job, applying the
-		// missed-run policy against any persisted next-fire.
-		prevNext := state[job.ID].NextRun
+		// missed-run policy. Only trust the persisted NextRun if it was produced
+		// by the SAME schedule signature; otherwise the cron/timezone changed
+		// while we were down and the old slot is meaningless.
+		prevNext := time.Time{}
+		if st := state[job.ID]; st.Sig == newSig {
+			prevNext = st.NextRun
+		}
 		catchup := job.Catchup || s.opts.Catchup
 		switch {
 		case !prevNext.IsZero() && prevNext.Before(now) && catchup:
@@ -201,7 +217,7 @@ func (s *Scheduler) reconcile(now time.Time) {
 			s.log.Info("scheduler: skipping missed fire", "id", job.ID, "name", job.Name)
 			_ = s.store.SaveState(RunState{
 				JobID: job.ID, LastStatus: StatusSkipped, LastRun: now,
-				NextRun: s.next[job.ID], Runs: s.runs[job.ID],
+				NextRun: s.next[job.ID], Runs: s.runs[job.ID], Sig: newSig,
 			})
 		default:
 			s.next[job.ID] = sched.Next(now)
@@ -243,8 +259,21 @@ func (s *Scheduler) fireDue(ctx context.Context, now time.Time) {
 	}
 	s.mu.Unlock()
 
-	for _, job := range toFire {
-		s.sem <- struct{}{} // acquire (blocks if at MaxConcurrent)
+	for i, job := range toFire {
+		// Acquire a slot, but stay responsive to cancellation: if all slots are
+		// held by long-running jobs and ctx is cancelled, don't wedge here —
+		// release the overlap guard for every job we won't dispatch and bail so
+		// Run() can reach its shutdown path.
+		select {
+		case s.sem <- struct{}{}:
+		case <-ctx.Done():
+			s.mu.Lock()
+			for _, j := range toFire[i:] {
+				s.running[j.ID] = false
+			}
+			s.mu.Unlock()
+			return
+		}
 		s.wg.Add(1)
 		go func(job Job, firedAt time.Time) {
 			defer s.wg.Done()
@@ -263,10 +292,17 @@ func (s *Scheduler) execute(ctx context.Context, job Job, firedAt time.Time) {
 	}()
 
 	s.mu.Lock()
-	st := RunState{JobID: job.ID, LastRun: firedAt, Runs: s.runs[job.ID], NextRun: s.next[job.ID]}
+	st := RunState{JobID: job.ID, LastRun: firedAt, Runs: s.runs[job.ID], NextRun: s.next[job.ID], Sig: jobSig(job)}
 	s.mu.Unlock()
 
-	result, tokens, err := s.runner.Run(ctx, job)
+	// Bound the run so a hung agent/tool can't hold its concurrency slot forever.
+	runCtx := ctx
+	if s.opts.RunTimeout > 0 {
+		var cancel context.CancelFunc
+		runCtx, cancel = context.WithTimeout(ctx, s.opts.RunTimeout)
+		defer cancel()
+	}
+	result, tokens, err := s.runner.Run(runCtx, job)
 	switch {
 	case err != nil:
 		st.LastStatus = StatusError
@@ -312,6 +348,11 @@ func (s *Scheduler) timeToNext(now time.Time) time.Duration {
 	return d
 }
 
+// jobSig is the change signature for a job's schedule. Two jobs with the same
+// signature fire at the same times; a change means the persisted NextRun no
+// longer corresponds to the current schedule.
+func jobSig(j Job) string { return j.Cron + "|" + j.Timezone }
+
 // compile parses a job's cron expression in its timezone (or the supplied
 // default when the job specifies none).
 func compile(job Job, defaultTZ *time.Location) (*Schedule, error) {
diff --git a/internal/schedule/scheduler_test.go b/internal/schedule/scheduler_test.go
index 8494e1a..b8408b7 100644
--- a/internal/schedule/scheduler_test.go
+++ b/internal/schedule/scheduler_test.go
@@ -200,7 +200,7 @@ func TestReconcile_MissedSkip(t *testing.T) {
 		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true, Catchup: false})
 	// Pretend a fire was due in the past while we were down.
 	past := time.Date(2026, 6, 3, 9, 0, 0, 0, time.UTC)
-	_ = st.SaveState(RunState{JobID: job.ID, NextRun: past})
+	_ = st.SaveState(RunState{JobID: job.ID, NextRun: past, Sig: jobSig(job)})
 
 	runner := &fakeRunner{}
 	s := New(st, runner, &fakeDeliverer{}, Options{})
@@ -222,12 +222,71 @@ func TestReconcile_MissedSkip(t *testing.T) {
 	}
 }
 
+func TestReconcile_CronChangedWhileDownNotMissed(t *testing.T) {
+	// Persisted NextRun was produced by an OLD cron (different sig). On restart
+	// the job's cron has changed; the stale slot must NOT trigger a catchup.
+	st := newTestStore(t)
+	job := addJob(t, st, Job{Name: "j", Cron: "0 9 * * *", Task: "x",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true, Catchup: true})
+	// Seed state from a DIFFERENT schedule signature, with a past NextRun.
+	past := time.Date(2026, 6, 3, 9, 0, 0, 0, time.UTC)
+	_ = st.SaveState(RunState{JobID: job.ID, NextRun: past, Sig: "OLD|"})
+
+	runner := &fakeRunner{}
+	s := New(st, runner, &fakeDeliverer{}, Options{})
+	now := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
+	s.reconcile(now)
+
+	// The stale (different-sig) slot is ignored → next is forward-scheduled, no fire.
+	if !s.peekNext(job.ID).After(now) {
+		t.Errorf("stale-sig NextRun should be ignored; next=%v", s.peekNext(job.ID))
+	}
+	s.fireDue(context.Background(), now)
+	s.Wait()
+	if runner.callCount() != 0 {
+		t.Error("cron changed while down → must NOT catchup-fire on the old slot")
+	}
+}
+
+func TestFireDue_CancelDuringDispatchUnblocks(t *testing.T) {
+	// With all slots held by a blocked job, a cancelled ctx must let fireDue
+	// return instead of wedging on the semaphore, and undispatched jobs must
+	// have their overlap guard cleared.
+	st := newTestStore(t)
+	a := addJob(t, st, Job{Name: "a", Cron: "* * * * *", Task: "x", Deliver: Delivery{Kind: DeliverStdout}, Enabled: true})
+	b := addJob(t, st, Job{Name: "b", Cron: "* * * * *", Task: "y", Deliver: Delivery{Kind: DeliverStdout}, Enabled: true})
+	runner := &fakeRunner{block: make(chan struct{}), started: make(chan string, 2)}
+	s := New(st, runner, &fakeDeliverer{}, Options{MaxConcurrent: 1}) // one slot
+
+	t0 := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
+	s.reconcile(t0)
+	due := s.peekNext(a.ID) // both share the same next minute
+
+	ctx, cancel := context.WithCancel(context.Background())
+	done := make(chan struct{})
+	go func() { s.fireDue(ctx, due); close(done) }()
+
+	<-runner.started // first job took the only slot and is blocked
+	cancel()         // cancel while the second job can't acquire a slot
+
+	select {
+	case <-done:
+		// fireDue returned despite the full semaphore — good.
+	case <-time.After(2 * time.Second):
+		close(runner.block)
+		t.Fatal("fireDue wedged on the semaphore after ctx cancel")
+	}
+	close(runner.block)
+	s.Wait()
+	_ = b
+}
+
 func TestReconcile_MissedCatchup(t *testing.T) {
 	st := newTestStore(t)
 	job := addJob(t, st, Job{Name: "j", Cron: "0 9 * * *", Task: "x",
 		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true, Catchup: true})
 	past := time.Date(2026, 6, 3, 9, 0, 0, 0, time.UTC)
-	_ = st.SaveState(RunState{JobID: job.ID, NextRun: past})
+	_ = st.SaveState(RunState{JobID: job.ID, NextRun: past, Sig: jobSig(job)})
 
 	runner := &fakeRunner{result: "caught up"}
 	deliv := &fakeDeliverer{}
@@ -337,7 +396,7 @@ func TestRun_FiresThenStopsCleanly(t *testing.T) {
 	// startup — so we exercise Run's real loop without waiting a wall minute.
 	job := addJob(t, st, Job{Name: "j", Cron: "0 0 1 1 *", Task: "x",
 		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true, Catchup: true})
-	_ = st.SaveState(RunState{JobID: job.ID, NextRun: time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC)})
+	_ = st.SaveState(RunState{JobID: job.ID, NextRun: time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC), Sig: jobSig(job)})
 
 	runner := &fakeRunner{result: "ok", started: make(chan string, 1)}
 	s := New(st, runner, &fakeDeliverer{}, Options{ReloadEvery: 20 * time.Millisecond})
diff --git a/internal/schedule/types.go b/internal/schedule/types.go
index 50e9228..0595723 100644
--- a/internal/schedule/types.go
+++ b/internal/schedule/types.go
@@ -71,4 +71,5 @@ type RunState struct {
 	LastResult string    `json:"last_result,omitempty"` // truncated preview of the delivered text
 	NextRun    time.Time `json:"next_run,omitzero"`     // computed projected next fire
 	Runs       int       `json:"runs,omitempty"`        // total successful + failed fires
+	Sig        string    `json:"sig,omitempty"`         // schedule signature that produced NextRun (cron|tz); detects stale state after a cron edit
 }

From ac1c8d6f870b4d0a0f6ea8681f5e12211a8f5ea2 Mon Sep 17 00:00:00 2001
From: Rolando Santamaria Maso <kyberneees@gmail.com>
Date: Fri, 5 Jun 2026 08:06:42 +0200
Subject: [PATCH 07/11] fix(schedule): address ultrareview findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Nine findings from the cloud multi-agent review.

bug_005 (normal) Impossible cron expressions (e.g. "0 0 30 2 *", Feb 30) passed
Validate but Next() returns the zero time, which the engine treated as
perpetually due → fired every tick forever, burning tokens. Now Validate rejects
them at add time, and reconcile/fireDue defensively skip a zero next-fire (for
hand-edited files).

bug_009 (normal) The embedded scheduler's stop closure tore down shared MCP
connections and the lock without waiting for in-flight jobs to drain, causing
broken-pipe errors persisted as bogus failure state. The closure now waits on a
done channel (20s bound) before cleanup.

bug_013 (normal) gracefulRestart calls os.Exit(0), which skips deferred
stopScheduler → mcpCleanup never ran → MCP child processes (Playwright/Chromium)
leaked on every /restart. Added mcpCleanupRef, invoked before os.Exit like
scheduleUnlockRef.

bug_007 (normal) reconcile reseeded s.runs from disk in the unchanged branch,
clobbering an in-flight fire's increment → lost Runs counts. It now skips the
reseed for unchanged/running jobs. Also moved the missed-fire SaveState out of
the s.mu critical section and stopped swallowing its error.

bug_004 (normal) runTaskHeadless used RunWithMessages with a bare system
message, so RuntimeContext (host/cwd/date) never reached the LLM — date-aware
jobs ("summarize today's calendar") had no notion of "today". Switched to
agent.Run, which prepends the engine's runtime-context-inclusive system message.

bug_014 (nit) Deliverer.Deliver took no context, so a stuck Telegram send blocked
the drain. Added ctx to the interface + bot.SendMessageContext; the scheduler
passes the run ctx through.

bug_015 (nit) Concurrent CLI mutations could lose writes (read-modify-write with
only an in-process mutex). Added an flock on ~/.odek/schedules.lock around the
store's write methods.

bug_006 (nit) scheduleNext swallowed store errors → misleading "bad cron" on a
corrupt store. It now returns the store error.

bug_002 (nit) docker/README + SCHEDULES.md misdescribed the lock as symmetric;
reworded to note the bot defers silently while the daemon refuses to start.

Tests: impossible-cron rejected (Validate) + skipped (reconcile); unchanged
reconcile preserves in-memory Runs. Full suite green under -race, vet + fmt clean.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 cmd/odek/schedule.go                | 59 ++++++++++++++++++++---------
 cmd/odek/schedule_test.go           | 17 +++++----
 cmd/odek/telegram.go                |  6 +++
 docker/README.md                    |  5 ++-
 docs/SCHEDULES.md                   |  7 +++-
 internal/schedule/scheduler.go      | 50 +++++++++++++++++++-----
 internal/schedule/scheduler_test.go | 49 +++++++++++++++++++++++-
 internal/schedule/store.go          | 38 ++++++++++++++++++-
 internal/schedule/store_test.go     |  2 +
 internal/telegram/bot.go            |  9 ++++-
 10 files changed, 201 insertions(+), 41 deletions(-)

diff --git a/cmd/odek/schedule.go b/cmd/odek/schedule.go
index 5be5654..d2f179b 100644
--- a/cmd/odek/schedule.go
+++ b/cmd/odek/schedule.go
@@ -16,7 +16,6 @@ import (
 
 	"github.com/BackendStack21/odek"
 	"github.com/BackendStack21/odek/internal/config"
-	"github.com/BackendStack21/odek/internal/llm"
 	"github.com/BackendStack21/odek/internal/loop"
 	"github.com/BackendStack21/odek/internal/render"
 	"github.com/BackendStack21/odek/internal/schedule"
@@ -211,8 +210,12 @@ func scheduleNext(st *schedule.Store, args []string) error {
 	if len(args) < 1 {
 		return fmt.Errorf(`usage: odek schedule next <id|"cron-expr">`)
 	}
+	job, ok, err := st.Get(args[0])
+	if err != nil {
+		return err // surface a corrupt/unreadable store, not a misleading cron error
+	}
 	var s *schedule.Schedule
-	if job, ok, _ := st.Get(args[0]); ok && len(args) == 1 {
+	if ok && len(args) == 1 {
 		sc, err := jobSchedule(job)
 		if err != nil {
 			return err
@@ -274,7 +277,7 @@ func scheduleRunNow(args []string) error {
 	if err != nil {
 		return fmt.Errorf("run: %w", err)
 	}
-	if err := (cliDeliverer{resolved: resolved}).Deliver(job, result); err != nil {
+	if err := (cliDeliverer{resolved: resolved}).Deliver(ctx, job, result); err != nil {
 		return fmt.Errorf("deliver: %w", err)
 	}
 	return nil
@@ -349,7 +352,7 @@ type cliDeliverer struct {
 	resolved config.ResolvedConfig
 }
 
-func (d cliDeliverer) Deliver(job schedule.Job, result string) error {
+func (d cliDeliverer) Deliver(ctx context.Context, job schedule.Job, result string) error {
 	switch job.Deliver.Kind {
 	case schedule.DeliverStdout:
 		fmt.Printf("\n── %s · %s ──\n%s\n", job.Name, time.Now().Format(time.RFC1123), result)
@@ -357,13 +360,13 @@ func (d cliDeliverer) Deliver(job schedule.Job, result string) error {
 	case schedule.DeliverLog:
 		return appendScheduleLog(job, result)
 	case schedule.DeliverTelegram:
-		return d.deliverTelegram(job, result)
+		return d.deliverTelegram(ctx, job, result)
 	default:
 		return fmt.Errorf("unknown delivery kind %q", job.Deliver.Kind)
 	}
 }
 
-func (d cliDeliverer) deliverTelegram(job schedule.Job, result string) error {
+func (d cliDeliverer) deliverTelegram(ctx context.Context, job schedule.Job, result string) error {
 	if d.resolved.Telegram.Token == "" {
 		return fmt.Errorf("telegram bot_token not configured")
 	}
@@ -375,7 +378,7 @@ func (d cliDeliverer) deliverTelegram(job schedule.Job, result string) error {
 		return fmt.Errorf("no chat id (set the job's telegram:<chatID> or telegram.default_chat_id)")
 	}
 	bot := telegram.NewBot(d.resolved.Telegram.Token)
-	_, err := bot.SendMessage(chatID, result, nil)
+	_, err := bot.SendMessageContext(ctx, chatID, result, nil)
 	return err
 }
 
@@ -387,6 +390,12 @@ func (d cliDeliverer) deliverTelegram(job schedule.Job, result string) error {
 // skip starting its scheduler.
 var scheduleUnlockRef func()
 
+// mcpCleanupRef holds the embedded scheduler's MCP-connection cleanup so the
+// graceful-restart path can run it before os.Exit(0). os.Exit skips deferred
+// functions, so without this the MCP child processes (e.g. Playwright/Chromium)
+// would leak across every /restart.
+var mcpCleanupRef func()
+
 // telegramRunner runs a job's task headlessly and accounts its token usage
 // against the bot's daily budget.
 type telegramRunner struct {
@@ -420,9 +429,9 @@ type telegramDeliverer struct {
 	fallback cliDeliverer
 }
 
-func (d telegramDeliverer) Deliver(job schedule.Job, result string) error {
+func (d telegramDeliverer) Deliver(ctx context.Context, job schedule.Job, result string) error {
 	if job.Deliver.Kind != schedule.DeliverTelegram {
-		return d.fallback.Deliver(job, result)
+		return d.fallback.Deliver(ctx, job, result)
 	}
 	chatID := job.Deliver.ChatID
 	if chatID == 0 {
@@ -431,7 +440,7 @@ func (d telegramDeliverer) Deliver(job schedule.Job, result string) error {
 	if chatID == 0 {
 		return fmt.Errorf("no chat id (set the job's telegram:<chatID> or telegram.default_chat_id)")
 	}
-	_, err := d.bot.SendMessage(chatID, result, nil)
+	_, err := d.bot.SendMessageContext(ctx, chatID, result, nil)
 	return err
 }
 
@@ -468,7 +477,12 @@ func startSchedulerForBot(ctx context.Context, bot *telegram.Bot, resolved confi
 		schedulerOptions(resolved.Schedules, log),
 	)
 	scheduleUnlockRef = unlock
-	go func() { _ = sched.Run(ctx) }()
+	mcpCleanupRef = mcpCleanup
+	done := make(chan struct{})
+	go func() {
+		defer close(done)
+		_ = sched.Run(ctx)
+	}()
 
 	enabled := 0
 	if jobs, err := st.List(); err == nil {
@@ -482,6 +496,16 @@ func startSchedulerForBot(ctx context.Context, bot *telegram.Bot, resolved confi
 
 	return func() {
 		scheduleUnlockRef = nil
+		mcpCleanupRef = nil
+		// Wait for the scheduler to drain in-flight jobs before tearing down the
+		// shared MCP connections (otherwise a draining run sees broken pipes and
+		// persists a misleading error state) and releasing the lock. Bounded so a
+		// stuck job can't block shutdown indefinitely.
+		select {
+		case <-done:
+		case <-time.After(20 * time.Second):
+			log.Error("schedule: drain timed out, proceeding with cleanup")
+		}
 		mcpCleanup()
 		unlock()
 	}
@@ -539,13 +563,12 @@ func runTaskHeadless(ctx context.Context, resolved config.ResolvedConfig, system
 	}
 	defer agent.Close()
 
-	var messages []llm.Message
-	if system != "" {
-		messages = append(messages, llm.Message{Role: "system", Content: system})
-	}
-	messages = append(messages, llm.Message{Role: "user", Content: task})
-
-	result, _, err := agent.RunWithMessages(ctx, messages)
+	// Use agent.Run (not RunWithMessages): the engine prepends its stored system
+	// message — which odek.New built as RuntimeContext + SystemMessage — so the
+	// host/cwd/date header actually reaches the model. RunWithMessages would take
+	// our messages verbatim and silently drop that context (breaking date-aware
+	// tasks like "summarize today's calendar").
+	result, err := agent.Run(ctx, task)
 	tokens := int64(lastInfo.InputTokens + lastInfo.OutputTokens)
 	return result, tokens, err
 }
diff --git a/cmd/odek/schedule_test.go b/cmd/odek/schedule_test.go
index e40ba7f..a03db83 100644
--- a/cmd/odek/schedule_test.go
+++ b/cmd/odek/schedule_test.go
@@ -1,6 +1,7 @@
 package main
 
 import (
+	"context"
 	"os"
 	"path/filepath"
 	"strings"
@@ -100,7 +101,7 @@ func TestCliDeliverer_Log(t *testing.T) {
 	t.Setenv("HOME", home)
 	d := cliDeliverer{resolved: config.ResolvedConfig{}}
 	job := schedule.Job{ID: "jb-1", Name: "logjob", Deliver: schedule.Delivery{Kind: schedule.DeliverLog}}
-	if err := d.Deliver(job, "hello from cron"); err != nil {
+	if err := d.Deliver(context.Background(), job, "hello from cron"); err != nil {
 		t.Fatalf("Deliver(log): %v", err)
 	}
 	data, err := os.ReadFile(filepath.Join(home, ".odek", "schedule.log"))
@@ -116,13 +117,13 @@ func TestCliDeliverer_TelegramErrors(t *testing.T) {
 	// No token configured → error.
 	d := cliDeliverer{resolved: config.ResolvedConfig{}}
 	job := schedule.Job{Deliver: schedule.Delivery{Kind: schedule.DeliverTelegram}}
-	if err := d.Deliver(job, "x"); err == nil {
+	if err := d.Deliver(context.Background(), job, "x"); err == nil {
 		t.Error("expected error when telegram token is unset")
 	}
 
 	// Token set but no chat id anywhere → error.
 	d = cliDeliverer{resolved: config.ResolvedConfig{Telegram: telegram.TelegramConfig{Token: "t"}}}
-	if err := d.Deliver(job, "x"); err == nil {
+	if err := d.Deliver(context.Background(), job, "x"); err == nil {
 		t.Error("expected error when no chat id is resolvable")
 	}
 }
@@ -130,7 +131,7 @@ func TestCliDeliverer_TelegramErrors(t *testing.T) {
 func TestCliDeliverer_UnknownKind(t *testing.T) {
 	d := cliDeliverer{resolved: config.ResolvedConfig{}}
 	job := schedule.Job{Deliver: schedule.Delivery{Kind: "pigeon"}}
-	if err := d.Deliver(job, "x"); err == nil {
+	if err := d.Deliver(context.Background(), job, "x"); err == nil {
 		t.Error("unknown delivery kind should error")
 	}
 }
@@ -141,7 +142,7 @@ func TestTelegramDeliverer_SendsViaLiveBot(t *testing.T) {
 	bot, msgCh := newRecordingTestBot(t)
 	d := telegramDeliverer{bot: bot, fallback: cliDeliverer{resolved: config.ResolvedConfig{}}}
 	job := schedule.Job{Deliver: schedule.Delivery{Kind: schedule.DeliverTelegram, ChatID: 555}}
-	if err := d.Deliver(job, "scheduled hello"); err != nil {
+	if err := d.Deliver(context.Background(), job, "scheduled hello"); err != nil {
 		t.Fatalf("Deliver: %v", err)
 	}
 	select {
@@ -162,7 +163,7 @@ func TestTelegramDeliverer_UsesDefaultChatID(t *testing.T) {
 	}
 	// No per-job chat ID → falls back to default_chat_id.
 	job := schedule.Job{Deliver: schedule.Delivery{Kind: schedule.DeliverTelegram}}
-	if err := d.Deliver(job, "to default"); err != nil {
+	if err := d.Deliver(context.Background(), job, "to default"); err != nil {
 		t.Fatalf("Deliver: %v", err)
 	}
 	select {
@@ -176,7 +177,7 @@ func TestTelegramDeliverer_NoChatErrors(t *testing.T) {
 	bot, _ := newRecordingTestBot(t)
 	d := telegramDeliverer{bot: bot, fallback: cliDeliverer{resolved: config.ResolvedConfig{}}}
 	job := schedule.Job{Deliver: schedule.Delivery{Kind: schedule.DeliverTelegram}}
-	if err := d.Deliver(job, "x"); err == nil {
+	if err := d.Deliver(context.Background(), job, "x"); err == nil {
 		t.Error("telegram delivery with no chat id should error")
 	}
 }
@@ -187,7 +188,7 @@ func TestTelegramDeliverer_FallsBackForLog(t *testing.T) {
 	// Non-telegram kinds route to the CLI deliverer; the bot is untouched.
 	d := telegramDeliverer{bot: nil, fallback: cliDeliverer{resolved: config.ResolvedConfig{}}}
 	job := schedule.Job{ID: "jb-x", Name: "logjob", Deliver: schedule.Delivery{Kind: schedule.DeliverLog}}
-	if err := d.Deliver(job, "logged via fallback"); err != nil {
+	if err := d.Deliver(context.Background(), job, "logged via fallback"); err != nil {
 		t.Fatalf("Deliver(log): %v", err)
 	}
 	data, err := os.ReadFile(filepath.Join(home, ".odek", "schedule.log"))
diff --git a/cmd/odek/telegram.go b/cmd/odek/telegram.go
index 5d3ef0e..e0cc266 100644
--- a/cmd/odek/telegram.go
+++ b/cmd/odek/telegram.go
@@ -897,6 +897,12 @@ func gracefulRestart(bot *telegram.Bot) {
 	if instanceLockRef != nil {
 		instanceLockRef.release()
 	}
+	// Close the embedded scheduler's MCP connections before exiting — os.Exit
+	// skips deferred cleanup, so without this the MCP child processes (e.g.
+	// Playwright/Chromium) would leak across every restart.
+	if mcpCleanupRef != nil {
+		mcpCleanupRef()
+	}
 	// Release the schedule lock too, so the restarted child's embedded
 	// scheduler can re-acquire it instead of finding a (briefly) live owner.
 	if scheduleUnlockRef != nil {
diff --git a/docker/README.md b/docker/README.md
index 5c9f35c..7ec948b 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -120,7 +120,10 @@ Full guide: [../docs/SCHEDULES.md](../docs/SCHEDULES.md).
 3. Inspect with `odek schedule list` / `odek schedule next <id>`.
 
 Don't run a separate `odek schedule daemon` against the same `./.odek` while the
-bot is up — a shared lock makes the second one defer, so jobs never double-fire.
+bot is up — a shared lock prevents double-firing, but the daemon will refuse to
+start (non-zero exit, "another schedule daemon is already running") when the bot
+holds it. In the reverse order (daemon up first), the bot's embedded scheduler
+just defers silently.
 
 ## Verify the profiles differ
 
diff --git a/docs/SCHEDULES.md b/docs/SCHEDULES.md
index 6da83f3..08cba83 100644
--- a/docs/SCHEDULES.md
+++ b/docs/SCHEDULES.md
@@ -27,8 +27,11 @@ The same engine runs in two places; pick whichever fits your deployment:
 | **Inside `odek telegram`** | You already run the bot. The scheduler starts automatically as part of the bot process — one process for chat + reminders. |
 | **`odek schedule daemon`** | You don't run the bot (headless server, CI box). A dedicated foreground process that only schedules. |
 
-A shared lock (`~/.odek/schedule.pid`) coordinates the two: whichever starts
-first owns scheduling; the other defers, so jobs never fire twice. (Disable the
+A shared lock (`~/.odek/schedule.pid`) coordinates the two so jobs never fire
+twice — but the two sides handle contention differently: if a daemon already
+holds the lock, the bot's embedded scheduler **defers silently** (the bot keeps
+running, just without scheduling); if the bot holds it, a standalone
+`odek schedule daemon` **refuses to start** and exits non-zero. (Disable the
 bot's embedded scheduler with `schedules.enabled = false` if you prefer to run
 the daemon separately.)
 
diff --git a/internal/schedule/scheduler.go b/internal/schedule/scheduler.go
index 07df56c..8eba077 100644
--- a/internal/schedule/scheduler.go
+++ b/internal/schedule/scheduler.go
@@ -16,9 +16,11 @@ type Runner interface {
 }
 
 // Deliverer routes a successful job result to its destination (Telegram chat,
-// stdout, a log file). It is called only when Run succeeded.
+// stdout, a log file). It is called only when Run succeeded. The context lets a
+// slow delivery (e.g. an unreachable Telegram endpoint) be cancelled on
+// shutdown instead of blocking the drain.
 type Deliverer interface {
-	Deliver(job Job, result string) error
+	Deliver(ctx context.Context, job Job, result string) error
 }
 
 // Logger is the minimal logging surface the engine needs, satisfied by the
@@ -165,7 +167,10 @@ func (s *Scheduler) reconcile(now time.Time) {
 	}
 
 	s.mu.Lock()
-	defer s.mu.Unlock()
+
+	// Skip records to persist are collected here and written AFTER the lock is
+	// released — SaveState does disk I/O and must not run under s.mu.
+	var skips []RunState
 
 	seen := make(map[string]bool, len(jobs))
 	for _, job := range jobs {
@@ -176,11 +181,12 @@ func (s *Scheduler) reconcile(now time.Time) {
 
 		// Unchanged and already scheduled — leave its next-fire intact (so an
 		// unrelated edit doesn't shift this job) and skip the relatively
-		// expensive re-parse + timezone load entirely.
+		// expensive re-parse + timezone load entirely. The in-memory Runs
+		// counter is authoritative here (a concurrent execute() may have already
+		// incremented it past the on-disk value), so it is NOT reseeded.
 		if _, tracked := s.next[job.ID]; tracked && s.sig[job.ID] == newSig {
 			seen[job.ID] = true
 			s.jobs[job.ID] = job
-			s.runs[job.ID] = state[job.ID].Runs
 			continue
 		}
 
@@ -192,11 +198,22 @@ func (s *Scheduler) reconcile(now time.Time) {
 			s.log.Error("scheduler: skipping job with invalid schedule", "id", job.ID, "name", job.Name, "error", err)
 			continue
 		}
+		// Reject expressions that parse but never match a real date (e.g. Feb 30,
+		// hand-edited past Validate). Their next-fire would be the zero time,
+		// which the engine would treat as perpetually due.
+		if sched.Next(now).IsZero() {
+			s.log.Error("scheduler: skipping job whose cron never matches a real date", "id", job.ID, "name", job.Name, "cron", job.Cron)
+			continue
+		}
 		seen[job.ID] = true
 		s.jobs[job.ID] = job
 		s.compiled[job.ID] = sched
 		s.sig[job.ID] = newSig
-		s.runs[job.ID] = state[job.ID].Runs
+		// Seed the run counter from disk only when this job isn't currently
+		// executing; an in-flight run owns the authoritative count.
+		if !s.running[job.ID] {
+			s.runs[job.ID] = state[job.ID].Runs
+		}
 
 		// Determine the first fire for a newly-seen or changed job, applying the
 		// missed-run policy. Only trust the persisted NextRun if it was produced
@@ -212,10 +229,10 @@ func (s *Scheduler) reconcile(now time.Time) {
 			// A fire was missed while we were down and catchup is on → run asap.
 			s.next[job.ID] = now
 		case !prevNext.IsZero() && prevNext.Before(now):
-			// Missed but no catchup → record the skip and move on.
+			// Missed but no catchup → record the skip (persisted after unlock).
 			s.next[job.ID] = sched.Next(now)
 			s.log.Info("scheduler: skipping missed fire", "id", job.ID, "name", job.Name)
-			_ = s.store.SaveState(RunState{
+			skips = append(skips, RunState{
 				JobID: job.ID, LastStatus: StatusSkipped, LastRun: now,
 				NextRun: s.next[job.ID], Runs: s.runs[job.ID], Sig: newSig,
 			})
@@ -234,6 +251,14 @@ func (s *Scheduler) reconcile(now time.Time) {
 			delete(s.runs, id)
 		}
 	}
+	s.mu.Unlock()
+
+	// Persist skip records outside the lock; log failures (don't swallow them).
+	for _, st := range skips {
+		if err := s.store.SaveState(st); err != nil {
+			s.log.Error("scheduler: save skip state failed", "id", st.JobID, "error", err)
+		}
+	}
 }
 
 // fireDue launches every job whose next-fire time is at or before now, then
@@ -244,6 +269,13 @@ func (s *Scheduler) fireDue(ctx context.Context, now time.Time) {
 	s.mu.Lock()
 	var toFire []Job
 	for id, nt := range s.next {
+		// A zero next-fire means the cron never matches (reconcile/Validate
+		// normally prevent this); never treat it as due (zero is before any
+		// real instant) and drop it so it can't spin.
+		if nt.IsZero() {
+			delete(s.next, id)
+			continue
+		}
 		if nt.After(now) {
 			continue
 		}
@@ -309,7 +341,7 @@ func (s *Scheduler) execute(ctx context.Context, job Job, firedAt time.Time) {
 		st.LastError = err.Error()
 		s.log.Error("scheduler: job run failed", "id", job.ID, "name", job.Name, "error", err)
 	default:
-		if derr := s.deliverer.Deliver(job, result); derr != nil {
+		if derr := s.deliverer.Deliver(runCtx, job, result); derr != nil {
 			st.LastStatus = StatusError
 			st.LastError = "delivery: " + derr.Error()
 			s.log.Error("scheduler: delivery failed", "id", job.ID, "name", job.Name, "error", derr)
diff --git a/internal/schedule/scheduler_test.go b/internal/schedule/scheduler_test.go
index b8408b7..61c21ab 100644
--- a/internal/schedule/scheduler_test.go
+++ b/internal/schedule/scheduler_test.go
@@ -44,7 +44,7 @@ type fakeDeliverer struct {
 	err       error
 }
 
-func (f *fakeDeliverer) Deliver(_ Job, result string) error {
+func (f *fakeDeliverer) Deliver(_ context.Context, _ Job, result string) error {
 	f.mu.Lock()
 	defer f.mu.Unlock()
 	if f.err != nil {
@@ -222,6 +222,53 @@ func TestReconcile_MissedSkip(t *testing.T) {
 	}
 }
 
+func TestReconcile_SkipsImpossibleCron(t *testing.T) {
+	// A hand-edited impossible cron (bypasses Validate) must not be scheduled —
+	// otherwise its zero next-fire would be treated as perpetually due.
+	st := newTestStore(t)
+	doc, _ := st.loadDoc()
+	doc.Jobs = append(doc.Jobs, Job{ID: "jb-feb30", Name: "feb30", Cron: "0 0 30 2 *",
+		Task: "x", Deliver: Delivery{Kind: DeliverStdout}, Enabled: true})
+	_ = st.saveDoc(doc)
+
+	runner := &fakeRunner{}
+	s := New(st, runner, &fakeDeliverer{}, Options{})
+	now := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
+	s.reconcile(now)
+	if !s.peekNext("jb-feb30").IsZero() {
+		t.Error("impossible cron should not be tracked")
+	}
+	s.fireDue(context.Background(), now)
+	s.Wait()
+	if runner.callCount() != 0 {
+		t.Error("impossible cron must never fire")
+	}
+}
+
+func TestReconcile_UnchangedDoesNotReseedRuns(t *testing.T) {
+	// An in-flight fire increments the in-memory Runs counter; a reload tick
+	// (unchanged job) must NOT clobber it back to the on-disk value.
+	st := newTestStore(t)
+	job := addJob(t, st, Job{Name: "j", Cron: "* * * * *", Task: "x",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true})
+	s := New(st, &fakeRunner{}, &fakeDeliverer{}, Options{})
+	now := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
+	s.reconcile(now)
+
+	// Simulate fireDue having incremented the counter (disk still at 0).
+	s.mu.Lock()
+	s.runs[job.ID] = 5
+	s.mu.Unlock()
+
+	s.reconcile(now.Add(time.Minute)) // unchanged job, reload
+	s.mu.Lock()
+	got := s.runs[job.ID]
+	s.mu.Unlock()
+	if got != 5 {
+		t.Errorf("reconcile clobbered in-memory Runs: got %d, want 5", got)
+	}
+}
+
 func TestReconcile_CronChangedWhileDownNotMissed(t *testing.T) {
 	// Persisted NextRun was produced by an OLD cron (different sig). On restart
 	// the job's cron has changed; the stale slot must NOT trigger a catchup.
diff --git a/internal/schedule/store.go b/internal/schedule/store.go
index 9f792c4..7f3f607 100644
--- a/internal/schedule/store.go
+++ b/internal/schedule/store.go
@@ -9,6 +9,7 @@ import (
 	"path/filepath"
 	"sort"
 	"sync"
+	"syscall"
 	"time"
 )
 
@@ -76,9 +77,16 @@ func (j Job) Validate() error {
 		}
 		loc = l
 	}
-	if _, err := ParseInLocation(j.Cron, loc); err != nil {
+	sched, err := ParseInLocation(j.Cron, loc)
+	if err != nil {
 		return fmt.Errorf("schedule: job %q: %w", j.Name, err)
 	}
+	// Reject expressions that parse but can never match a real date (e.g.
+	// "0 0 30 2 *" — Feb 30). Such a job would otherwise have an all-zero
+	// next-fire and the engine would treat it as perpetually due.
+	if sched.Next(time.Now()).IsZero() {
+		return fmt.Errorf("schedule: job %q: cron %q never matches a real date", j.Name, j.Cron)
+	}
 	switch j.Deliver.Kind {
 	case DeliverTelegram, DeliverStdout, DeliverLog:
 	case "":
@@ -100,6 +108,7 @@ func (s *Store) Add(job Job) (Job, error) {
 	}
 	s.mu.Lock()
 	defer s.mu.Unlock()
+	defer s.fileLock()()
 
 	doc, err := s.loadDoc()
 	if err != nil {
@@ -167,6 +176,7 @@ func (s *Store) Put(job Job) error {
 	}
 	s.mu.Lock()
 	defer s.mu.Unlock()
+	defer s.fileLock()()
 	doc, err := s.loadDoc()
 	if err != nil {
 		return err
@@ -192,6 +202,7 @@ func (s *Store) Put(job Job) error {
 func (s *Store) Remove(id string) error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
+	defer s.fileLock()()
 	doc, err := s.loadDoc()
 	if err != nil {
 		return err
@@ -225,6 +236,7 @@ func (s *Store) Remove(id string) error {
 func (s *Store) SetEnabled(id string, enabled bool) error {
 	s.mu.Lock()
 	defer s.mu.Unlock()
+	defer s.fileLock()()
 	doc, err := s.loadDoc()
 	if err != nil {
 		return err
@@ -271,6 +283,7 @@ func (s *Store) SaveState(st RunState) error {
 	}
 	s.mu.Lock()
 	defer s.mu.Unlock()
+	defer s.fileLock()()
 	sd, err := s.loadState()
 	if err != nil {
 		return err
@@ -354,6 +367,29 @@ func writeJSONAtomic(path string, v any) error {
 	return nil
 }
 
+// fileLock takes an exclusive OS lock (flock) on ~/.odek/schedules.lock and
+// returns a release func. The in-process mutex only serialises one process;
+// this serialises the read-modify-write cycle ACROSS processes, so two
+// concurrent `odek schedule add` invocations can't both load the same baseline
+// and clobber each other's write. Best-effort: if the lock file can't be opened
+// or locked, the caller still proceeds (single-process safety is preserved by
+// s.mu). Callers hold s.mu, so there is a single lock order (mu → flock) and no
+// deadlock with the read-only methods, which take only s.mu.
+func (s *Store) fileLock() func() {
+	f, err := os.OpenFile(filepath.Join(s.dir, "schedules.lock"), os.O_CREATE|os.O_RDWR, 0600)
+	if err != nil {
+		return func() {}
+	}
+	if err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX); err != nil {
+		f.Close()
+		return func() {}
+	}
+	return func() {
+		syscall.Flock(int(f.Fd()), syscall.LOCK_UN)
+		f.Close()
+	}
+}
+
 // newJobID returns a stable, collision-resistant job ID like "jb-1a2b3c4d".
 func newJobID() string {
 	buf := make([]byte, 4)
diff --git a/internal/schedule/store_test.go b/internal/schedule/store_test.go
index 31c60fc..159e149 100644
--- a/internal/schedule/store_test.go
+++ b/internal/schedule/store_test.go
@@ -38,6 +38,8 @@ func TestValidate(t *testing.T) {
 		{"valid", func(*Job) {}, false},
 		{"empty task", func(j *Job) { j.Task = "" }, true},
 		{"bad cron", func(j *Job) { j.Cron = "nope" }, true},
+		{"impossible cron (Feb 30)", func(j *Job) { j.Cron = "0 0 30 2 *" }, true},
+		{"impossible cron (Apr 31)", func(j *Job) { j.Cron = "0 0 31 4 *" }, true},
 		{"bad timezone", func(j *Job) { j.Timezone = "Mars/Phobos" }, true},
 		{"good timezone", func(j *Job) { j.Timezone = "Europe/Berlin" }, false},
 		{"empty deliver kind", func(j *Job) { j.Deliver.Kind = "" }, true},
diff --git a/internal/telegram/bot.go b/internal/telegram/bot.go
index 27fc06b..a4e7ae0 100644
--- a/internal/telegram/bot.go
+++ b/internal/telegram/bot.go
@@ -426,6 +426,13 @@ func isRetryableNetworkError(err error) bool {
 
 // SendMessage sends a text message to the specified chat.
 func (b *Bot) SendMessage(chatID int64, text string, opts *SendOpts) (*Message, error) {
+	return b.SendMessageContext(context.Background(), chatID, text, opts)
+}
+
+// SendMessageContext is like SendMessage but aborts the request (and its retry
+// backoff) when ctx is cancelled — used by the scheduler so a stuck delivery
+// doesn't block graceful shutdown.
+func (b *Bot) SendMessageContext(ctx context.Context, chatID int64, text string, opts *SendOpts) (*Message, error) {
 	params := map[string]any{
 		"chat_id": chatID,
 		"text":    text,
@@ -446,7 +453,7 @@ func (b *Bot) SendMessage(chatID int64, text string, opts *SendOpts) (*Message,
 	}
 
 	var msg Message
-	if err := b.doJSON("sendMessage", params, &msg); err != nil {
+	if err := b.doJSONContext(ctx, "sendMessage", params, &msg); err != nil {
 		return nil, err
 	}
 	return &msg, nil

From 5f674ede8051312dc2646d77dbf589e0650371ee Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 5 Jun 2026 06:52:29 +0000
Subject: [PATCH 08/11] test(schedule): cover error paths and edge cases to
 99.6%

Add targeted tests for the native scheduler package and its CLI glue,
raising internal/schedule statement coverage from 87.8% to 99.6% (the
only remaining gap is the best-effort flock syscall-error fallback).

internal/schedule/coverage_test.go exercises:
- store error paths: NewStore HOME failure, NewStoreAt mkdir failure,
  corrupt-file loadDoc/loadState propagation across all CRUD methods,
  writeJSONAtomic marshal/write/rename failures, version defaulting,
  null states map, fileLock open failure, and the List ID tiebreak.
- scheduler branches: reload-on-mtime-change, reconcile List/LoadState
  errors, skip- and execute-time SaveState failures, zero next-fire
  drop, timeToNext empty/past/near cases, compile bad-timezone, and
  preview truncation.
- cronexpr branches: nil-location default, empty field, range/empty
  value parse errors, and a month mismatch in Matches.

cmd/odek/schedule_cli_test.go covers the non-LLM CLI surface: list,
add, rm, enable/disable, next, command dispatch, scheduler options,
MCP no-op, schedule lock acquire/release, embedded-scheduler lifecycle,
and the telegram budget gate.
---
 cmd/odek/schedule_cli_test.go      | 335 +++++++++++++++++++++
 internal/schedule/coverage_test.go | 467 +++++++++++++++++++++++++++++
 2 files changed, 802 insertions(+)
 create mode 100644 cmd/odek/schedule_cli_test.go
 create mode 100644 internal/schedule/coverage_test.go

diff --git a/cmd/odek/schedule_cli_test.go b/cmd/odek/schedule_cli_test.go
new file mode 100644
index 0000000..6afe2ec
--- /dev/null
+++ b/cmd/odek/schedule_cli_test.go
@@ -0,0 +1,335 @@
+package main
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/BackendStack21/odek/internal/config"
+	"github.com/BackendStack21/odek/internal/schedule"
+	"github.com/BackendStack21/odek/internal/telegram"
+)
+
+// newCLITestStore returns a schedule store rooted at a temp dir.
+func newCLITestStore(t *testing.T) *schedule.Store {
+	t.Helper()
+	st, err := schedule.NewStoreAt(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStoreAt: %v", err)
+	}
+	return st
+}
+
+func validJob() schedule.Job {
+	return schedule.Job{Name: "j", Cron: "0 9 * * *", Task: "do it",
+		Deliver: schedule.Delivery{Kind: schedule.DeliverStdout}, Enabled: true}
+}
+
+// ── list ──────────────────────────────────────────────────────────────────
+
+func TestScheduleList(t *testing.T) {
+	st := newCLITestStore(t)
+	// Empty store path.
+	if err := scheduleList(st); err != nil {
+		t.Fatalf("scheduleList(empty): %v", err)
+	}
+	// Enabled + disabled jobs exercise both the "yes"/"no" and last-status paths.
+	if _, err := st.Add(validJob()); err != nil {
+		t.Fatal(err)
+	}
+	dis := validJob()
+	dis.Name, dis.Enabled = "off", false
+	if _, err := st.Add(dis); err != nil {
+		t.Fatal(err)
+	}
+	if err := scheduleList(st); err != nil {
+		t.Fatalf("scheduleList(populated): %v", err)
+	}
+}
+
+func TestScheduleList_StoreError(t *testing.T) {
+	dir := t.TempDir()
+	st, _ := schedule.NewStoreAt(dir)
+	if err := os.WriteFile(filepath.Join(dir, "schedules.json"), []byte("{bad"), 0600); err != nil {
+		t.Fatal(err)
+	}
+	if err := scheduleList(st); err == nil {
+		t.Error("scheduleList should surface a corrupt store")
+	}
+}
+
+// ── add ───────────────────────────────────────────────────────────────────
+
+func TestScheduleAdd(t *testing.T) {
+	st := newCLITestStore(t)
+	if err := scheduleAdd(st, []string{"--cron", "0 9 * * *", "--name", "morning", "summarize", "calendar"}); err != nil {
+		t.Fatalf("scheduleAdd: %v", err)
+	}
+	jobs, _ := st.List()
+	if len(jobs) != 1 || jobs[0].Name != "morning" {
+		t.Fatalf("job not added correctly: %+v", jobs)
+	}
+
+	// Default name derived from the task, disabled flag honoured.
+	if err := scheduleAdd(st, []string{"--cron", "* * * * *", "--disabled", "auto", "named", "job", "here"}); err != nil {
+		t.Fatalf("scheduleAdd(disabled): %v", err)
+	}
+}
+
+func TestScheduleAdd_Errors(t *testing.T) {
+	st := newCLITestStore(t)
+	cases := map[string][]string{
+		"missing cron and task": {},
+		"bad flag":              {"--nope"},
+		"unknown deliver":       {"--cron", "* * * * *", "--deliver", "pigeon", "x"},
+		"invalid cron rejected": {"--cron", "garbage", "x"},
+	}
+	for name, args := range cases {
+		if err := scheduleAdd(st, args); err == nil {
+			t.Errorf("%s: expected error", name)
+		}
+	}
+}
+
+// ── rm / enable / disable ──────────────────────────────────────────────────
+
+func TestScheduleRemove(t *testing.T) {
+	st := newCLITestStore(t)
+	a, _ := st.Add(validJob())
+	if err := scheduleRemove(st, []string{a.ID}); err != nil {
+		t.Fatalf("scheduleRemove: %v", err)
+	}
+	if err := scheduleRemove(st, nil); err == nil {
+		t.Error("scheduleRemove with no args should error")
+	}
+	if err := scheduleRemove(st, []string{"jb-missing"}); err == nil {
+		t.Error("scheduleRemove of a missing id should error")
+	}
+}
+
+func TestScheduleSetEnabled(t *testing.T) {
+	st := newCLITestStore(t)
+	a, _ := st.Add(validJob())
+	if err := scheduleSetEnabled(st, []string{a.ID}, false); err != nil {
+		t.Fatalf("disable: %v", err)
+	}
+	if err := scheduleSetEnabled(st, []string{a.ID}, true); err != nil {
+		t.Fatalf("enable: %v", err)
+	}
+	if err := scheduleSetEnabled(st, nil, true); err == nil {
+		t.Error("enable with no args should error")
+	}
+	if err := scheduleSetEnabled(st, []string{"jb-missing"}, false); err == nil {
+		t.Error("disable of a missing id should error")
+	}
+}
+
+// ── next ───────────────────────────────────────────────────────────────────
+
+func TestScheduleNext(t *testing.T) {
+	st := newCLITestStore(t)
+	a, _ := st.Add(validJob())
+
+	if err := scheduleNext(st, []string{a.ID}); err != nil {
+		t.Fatalf("next by id: %v", err)
+	}
+	if err := scheduleNext(st, []string{"*/15", "*", "*", "*", "*"}); err != nil {
+		t.Fatalf("next by expression: %v", err)
+	}
+	// An impossible cron prints the "no further fires" line without erroring.
+	if err := scheduleNext(st, []string{"0", "0", "30", "2", "*"}); err != nil {
+		t.Fatalf("next impossible cron: %v", err)
+	}
+	if err := scheduleNext(st, nil); err == nil {
+		t.Error("next with no args should error")
+	}
+	if err := scheduleNext(st, []string{"not-a-cron"}); err == nil {
+		t.Error("next with a bad expression should error")
+	}
+}
+
+func TestScheduleNext_StoreError(t *testing.T) {
+	dir := t.TempDir()
+	st, _ := schedule.NewStoreAt(dir)
+	if err := os.WriteFile(filepath.Join(dir, "schedules.json"), []byte("{bad"), 0600); err != nil {
+		t.Fatal(err)
+	}
+	if err := scheduleNext(st, []string{"jb-x"}); err == nil {
+		t.Error("scheduleNext should surface a corrupt store on Get")
+	}
+}
+
+// ── dispatch ────────────────────────────────────────────────────────────────
+
+func TestScheduleCmd_Dispatch(t *testing.T) {
+	t.Setenv("HOME", t.TempDir())
+	// No args → usage, no error.
+	if err := scheduleCmd(nil); err != nil {
+		t.Errorf("scheduleCmd(nil): %v", err)
+	}
+	// Unknown subcommand → error.
+	if err := scheduleCmd([]string{"bogus"}); err == nil {
+		t.Error("scheduleCmd(bogus) should error")
+	}
+	// Store-backed subcommands route correctly.
+	if err := scheduleCmd([]string{"list"}); err != nil {
+		t.Errorf("scheduleCmd(list): %v", err)
+	}
+	if err := scheduleCmd([]string{"add", "--cron", "0 9 * * *", "morning task"}); err != nil {
+		t.Errorf("scheduleCmd(add): %v", err)
+	}
+	if err := scheduleCmd([]string{"ls"}); err != nil {
+		t.Errorf("scheduleCmd(ls): %v", err)
+	}
+	// rm/enable/disable/next with missing args still route (and return usage errors).
+	for _, sub := range [][]string{{"rm"}, {"enable"}, {"disable"}, {"next"}} {
+		if err := scheduleCmd(sub); err == nil {
+			t.Errorf("scheduleCmd(%v) should error on missing args", sub)
+		}
+	}
+}
+
+func TestPrintScheduleUsage(t *testing.T) {
+	printScheduleUsage() // smoke: must not panic
+}
+
+// ── cliDeliverer stdout ─────────────────────────────────────────────────────
+
+func TestCliDeliverer_Stdout(t *testing.T) {
+	d := cliDeliverer{resolved: config.ResolvedConfig{}}
+	job := schedule.Job{Name: "j", Deliver: schedule.Delivery{Kind: schedule.DeliverStdout}}
+	if err := d.Deliver(context.Background(), job, "hello stdout"); err != nil {
+		t.Errorf("stdout deliver: %v", err)
+	}
+}
+
+// ── helpers ─────────────────────────────────────────────────────────────────
+
+func TestSchedulerOptions(t *testing.T) {
+	log := schedule.NopLogger{}
+	// Valid timezone is loaded.
+	opts := schedulerOptions(config.ScheduleConfig{Timezone: "Europe/Berlin", MaxConcurrent: 3, Catchup: true}, log)
+	if opts.DefaultTZ == nil || opts.DefaultTZ.String() != "Europe/Berlin" {
+		t.Errorf("DefaultTZ = %v, want Europe/Berlin", opts.DefaultTZ)
+	}
+	if opts.MaxConcurrent != 3 || !opts.Catchup {
+		t.Errorf("options not carried through: %+v", opts)
+	}
+	// Invalid timezone falls back to UTC (and logs).
+	opts = schedulerOptions(config.ScheduleConfig{Timezone: "Mars/Phobos"}, log)
+	if opts.DefaultTZ != time.UTC {
+		t.Errorf("invalid tz should fall back to UTC, got %v", opts.DefaultTZ)
+	}
+	// Empty timezone → UTC.
+	opts = schedulerOptions(config.ScheduleConfig{}, log)
+	if opts.DefaultTZ != time.UTC {
+		t.Errorf("empty tz should be UTC, got %v", opts.DefaultTZ)
+	}
+}
+
+func TestBuildScheduledMCPTools_NoServers(t *testing.T) {
+	tools, cleanup, err := buildScheduledMCPTools(config.ResolvedConfig{})
+	if err != nil {
+		t.Fatalf("buildScheduledMCPTools: %v", err)
+	}
+	if tools != nil {
+		t.Errorf("expected no tools, got %d", len(tools))
+	}
+	cleanup() // no-op, must not panic
+}
+
+func TestAppendScheduleLog_HomeError(t *testing.T) {
+	t.Setenv("HOME", "")
+	if err := appendScheduleLog(schedule.Job{ID: "jb-1", Name: "j"}, "x"); err == nil {
+		t.Error("appendScheduleLog should error when HOME is unresolvable")
+	}
+}
+
+func TestAcquireScheduleLock(t *testing.T) {
+	home := t.TempDir()
+	t.Setenv("HOME", home)
+	unlock, err := acquireScheduleLock()
+	if err != nil {
+		t.Fatalf("acquireScheduleLock: %v", err)
+	}
+	pidPath := filepath.Join(home, ".odek", "schedule.pid")
+	if _, err := os.Stat(pidPath); err != nil {
+		t.Errorf("pid file not written: %v", err)
+	}
+
+	// A second acquire while this (live, odek-owned) process holds the lock must
+	// be refused — but only when /proc reports an odek cmdline for our PID.
+	if cmdline, err := os.ReadFile("/proc/self/cmdline"); err == nil && strings.Contains(string(cmdline), "odek") {
+		if _, err := acquireScheduleLock(); err == nil {
+			t.Error("expected refusal while a live owned daemon holds the lock")
+		}
+	}
+
+	unlock()
+	if _, err := os.Stat(pidPath); !os.IsNotExist(err) {
+		t.Error("unlock did not remove the pid file")
+	}
+	// After release, re-acquiring succeeds.
+	u2, err := acquireScheduleLock()
+	if err != nil {
+		t.Fatalf("re-acquire after release: %v", err)
+	}
+	u2()
+}
+
+func TestAcquireScheduleLock_HomeError(t *testing.T) {
+	t.Setenv("HOME", "")
+	if _, err := acquireScheduleLock(); err == nil {
+		t.Error("acquireScheduleLock should error when HOME is unresolvable")
+	}
+}
+
+// ── embedded scheduler (bot) lifecycle ──────────────────────────────────────
+
+func TestStartSchedulerForBot_Disabled(t *testing.T) {
+	stop := startSchedulerForBot(context.Background(), nil, config.ResolvedConfig{}, "system",
+		telegram.NewFileLogger(telegram.LogInfo, ""))
+	stop() // disabled → no-op stop, must not panic
+}
+
+func TestStartSchedulerForBot_StartAndStop(t *testing.T) {
+	t.Setenv("HOME", t.TempDir())
+	bot, _ := newRecordingTestBot(t)
+	resolved := config.ResolvedConfig{
+		Schedules: config.ScheduleConfig{Enabled: true, MaxConcurrent: 2, Timezone: "UTC"},
+	}
+	ctx, cancel := context.WithCancel(context.Background())
+	stop := startSchedulerForBot(ctx, bot, resolved, "system", telegram.NewFileLogger(telegram.LogInfo, ""))
+	cancel()
+	stop() // drains the scheduler goroutine, cleans up MCP, releases the lock
+}
+
+// ── telegramRunner budget gate ──────────────────────────────────────────────
+
+func TestTelegramRunner_BudgetExhausted(t *testing.T) {
+	home := t.TempDir()
+	t.Setenv("HOME", home)
+	if err := os.MkdirAll(filepath.Join(home, ".odek"), 0755); err != nil {
+		t.Fatal(err)
+	}
+	// Seed today's usage file above the limit so the pre-flight check trips
+	// before any (LLM-dependent) task execution.
+	date := time.Now().Format("2006-01-02")
+	usageFile := filepath.Join(home, ".odek", "telegram_token_usage_"+date)
+	if err := os.WriteFile(usageFile, []byte("1000"), 0644); err != nil {
+		t.Fatal(err)
+	}
+	bot := telegram.NewBot("test:token")
+	bot.SetDailyTokenBudget(10)
+	r := telegramRunner{
+		resolved: config.ResolvedConfig{Telegram: telegram.TelegramConfig{DailyTokenBudget: 10}},
+		bot:      bot,
+	}
+	_, _, err := r.Run(context.Background(), schedule.Job{Task: "x"})
+	if err == nil {
+		t.Error("expected a budget-exhausted error before task execution")
+	}
+}
diff --git a/internal/schedule/coverage_test.go b/internal/schedule/coverage_test.go
new file mode 100644
index 0000000..fc5e03c
--- /dev/null
+++ b/internal/schedule/coverage_test.go
@@ -0,0 +1,467 @@
+package schedule
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+	"time"
+)
+
+// This file targets the error-handling and edge-case branches that the main
+// test files leave uncovered, pushing the package to full statement coverage.
+
+// writeFile is a tiny helper for seeding (possibly corrupt) on-disk state.
+func writeFile(t *testing.T, path, content string) {
+	t.Helper()
+	if err := os.WriteFile(path, []byte(content), 0600); err != nil {
+		t.Fatalf("write %s: %v", path, err)
+	}
+}
+
+// ── NewStore / NewStoreAt ─────────────────────────────────────────────────
+
+func TestNewStore_HomeAndError(t *testing.T) {
+	// Happy path: HOME points at a writable temp dir.
+	t.Setenv("HOME", t.TempDir())
+	if _, err := NewStore(); err != nil {
+		t.Fatalf("NewStore with valid HOME: %v", err)
+	}
+	// Error path: an empty HOME makes os.UserHomeDir fail on Linux.
+	t.Setenv("HOME", "")
+	if _, err := NewStore(); err == nil {
+		t.Error("NewStore with empty HOME should error")
+	}
+}
+
+func TestNewStoreAt_MkdirError(t *testing.T) {
+	// A path component that is a file makes MkdirAll fail with ENOTDIR.
+	f := filepath.Join(t.TempDir(), "afile")
+	writeFile(t, f, "x")
+	if _, err := NewStoreAt(filepath.Join(f, "sub")); err == nil {
+		t.Error("NewStoreAt under a file should error")
+	}
+}
+
+// ── loadDoc error propagation across CRUD ─────────────────────────────────
+
+func TestCRUD_LoadDocError(t *testing.T) {
+	dir := t.TempDir()
+	st, _ := NewStoreAt(dir)
+	// Corrupt the definitions file so every read fails to parse.
+	writeFile(t, filepath.Join(dir, schedulesFile), "{not json")
+
+	if _, err := st.Add(sampleJob()); err == nil {
+		t.Error("Add should surface a corrupt store")
+	}
+	if _, err := st.List(); err == nil {
+		t.Error("List should surface a corrupt store")
+	}
+	if _, _, err := st.Get("jb-x"); err == nil {
+		t.Error("Get should surface a corrupt store")
+	}
+	put := sampleJob()
+	put.ID = "jb-x"
+	if err := st.Put(put); err == nil {
+		t.Error("Put should surface a corrupt store")
+	}
+	if err := st.Remove("jb-x"); err == nil {
+		t.Error("Remove should surface a corrupt store")
+	}
+	if err := st.SetEnabled("jb-x", true); err == nil {
+		t.Error("SetEnabled should surface a corrupt store")
+	}
+}
+
+func TestLoadState_Error(t *testing.T) {
+	dir := t.TempDir()
+	st, _ := NewStoreAt(dir)
+	writeFile(t, filepath.Join(dir, stateFile), "{bad")
+	if _, err := st.LoadState(); err == nil {
+		t.Error("LoadState should surface a corrupt state file")
+	}
+	if err := st.SaveState(RunState{JobID: "jb-1"}); err == nil {
+		t.Error("SaveState should surface a corrupt state file on its load step")
+	}
+}
+
+// ── saveDoc / writeJSONAtomic error paths ─────────────────────────────────
+
+// makeTmpDir creates a directory at "<file>.tmp" so writeJSONAtomic's WriteFile
+// to that temp path fails (it can't write a file over a directory).
+func makeTmpDir(t *testing.T, path string) {
+	t.Helper()
+	if err := os.MkdirAll(path+".tmp", 0755); err != nil {
+		t.Fatalf("mkdir %s.tmp: %v", path, err)
+	}
+}
+
+func TestAdd_SaveDocError(t *testing.T) {
+	dir := t.TempDir()
+	st, _ := NewStoreAt(dir)
+	makeTmpDir(t, filepath.Join(dir, schedulesFile))
+	if _, err := st.Add(sampleJob()); err == nil {
+		t.Error("Add should fail when the definitions file can't be written")
+	}
+}
+
+func TestRemove_SaveDocError(t *testing.T) {
+	dir := t.TempDir()
+	st, _ := NewStoreAt(dir)
+	a, err := st.Add(sampleJob())
+	if err != nil {
+		t.Fatalf("Add: %v", err)
+	}
+	makeTmpDir(t, filepath.Join(dir, schedulesFile))
+	if err := st.Remove(a.ID); err == nil {
+		t.Error("Remove should fail when the definitions file can't be rewritten")
+	}
+}
+
+func TestPut_ValidateAndEmptyID(t *testing.T) {
+	st := newTestStore(t)
+	bad := sampleJob()
+	bad.ID = "jb-1"
+	bad.Cron = "garbage"
+	if err := st.Put(bad); err == nil {
+		t.Error("Put should reject an invalid job")
+	}
+	noID := sampleJob()
+	if err := st.Put(noID); err == nil {
+		t.Error("Put should require an ID")
+	}
+}
+
+// ── internal IO helpers (same-package direct calls) ───────────────────────
+
+func TestReadJSON_ReadAndParseErrors(t *testing.T) {
+	dir := t.TempDir()
+
+	// A directory in place of the file makes os.ReadFile fail (not IsNotExist).
+	asDir := filepath.Join(dir, "isdir.json")
+	if err := os.Mkdir(asDir, 0755); err != nil {
+		t.Fatal(err)
+	}
+	if err := readJSON(asDir, &scheduleDoc{}); err == nil {
+		t.Error("readJSON should error when the path is a directory")
+	}
+
+	// An empty file is treated as an empty document (no error).
+	empty := filepath.Join(dir, "empty.json")
+	writeFile(t, empty, "")
+	if err := readJSON(empty, &scheduleDoc{}); err != nil {
+		t.Errorf("readJSON of an empty file should be nil, got %v", err)
+	}
+
+	// Invalid JSON is a parse error.
+	bad := filepath.Join(dir, "bad.json")
+	writeFile(t, bad, "{nope}")
+	if err := readJSON(bad, &scheduleDoc{}); err == nil {
+		t.Error("readJSON should error on invalid JSON")
+	}
+}
+
+func TestWriteJSONAtomic_Errors(t *testing.T) {
+	dir := t.TempDir()
+
+	// Marshal error: a channel cannot be encoded to JSON.
+	if err := writeJSONAtomic(filepath.Join(dir, "x.json"), make(chan int)); err == nil {
+		t.Error("writeJSONAtomic should fail to marshal a channel")
+	}
+
+	// Write error: the temp path is a directory.
+	wpath := filepath.Join(dir, "w.json")
+	makeTmpDir(t, wpath)
+	if err := writeJSONAtomic(wpath, map[string]int{"a": 1}); err == nil {
+		t.Error("writeJSONAtomic should fail when the temp path is a directory")
+	}
+
+	// Rename error: the destination is a non-empty directory, so rename of the
+	// temp file onto it fails after a successful temp write.
+	rpath := filepath.Join(dir, "r.json")
+	if err := os.Mkdir(rpath, 0755); err != nil {
+		t.Fatal(err)
+	}
+	writeFile(t, filepath.Join(rpath, "child"), "x") // make it non-empty
+	if err := writeJSONAtomic(rpath, map[string]int{"a": 1}); err == nil {
+		t.Error("writeJSONAtomic should fail to rename onto a directory")
+	}
+}
+
+func TestSaveDocSaveState_VersionDefaulting(t *testing.T) {
+	st := newTestStore(t)
+	// A zero-version document must be stamped to 1 on save.
+	if err := st.saveDoc(&scheduleDoc{}); err != nil {
+		t.Fatalf("saveDoc: %v", err)
+	}
+	doc, err := st.loadDoc()
+	if err != nil || doc.Version != 1 {
+		t.Errorf("saveDoc did not default Version: %+v err=%v", doc, err)
+	}
+	if err := st.saveState(&stateDoc{States: map[string]RunState{}}); err != nil {
+		t.Fatalf("saveState: %v", err)
+	}
+	sd, err := st.loadState()
+	if err != nil || sd.Version != 1 {
+		t.Errorf("saveState did not default Version: %+v err=%v", sd, err)
+	}
+}
+
+func TestLoadState_NullStatesMap(t *testing.T) {
+	dir := t.TempDir()
+	st, _ := NewStoreAt(dir)
+	// A persisted null states map must be normalised to an empty (non-nil) map.
+	writeFile(t, filepath.Join(dir, stateFile), `{"version":1,"states":null}`)
+	states, err := st.LoadState()
+	if err != nil {
+		t.Fatalf("LoadState: %v", err)
+	}
+	if states == nil {
+		t.Error("LoadState should return a non-nil map for a null states field")
+	}
+}
+
+func TestFileLock_OpenError(t *testing.T) {
+	dir := t.TempDir()
+	st, _ := NewStoreAt(dir)
+	// A directory where the lock file should be makes OpenFile fail; the store
+	// must still proceed (best-effort lock), so Add succeeds.
+	if err := os.Mkdir(filepath.Join(dir, "schedules.lock"), 0755); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := st.Add(sampleJob()); err != nil {
+		t.Errorf("Add should still succeed when the lock can't be opened: %v", err)
+	}
+}
+
+// ── List sort tiebreak ────────────────────────────────────────────────────
+
+func TestList_SortByIDOnEqualCreatedAt(t *testing.T) {
+	st := newTestStore(t)
+	at := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
+	j1 := sampleJob()
+	j1.ID, j1.CreatedAt = "jb-bbb", at
+	j2 := sampleJob()
+	j2.ID, j2.CreatedAt = "jb-aaa", at
+	if _, err := st.Add(j1); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := st.Add(j2); err != nil {
+		t.Fatal(err)
+	}
+	jobs, _ := st.List()
+	if len(jobs) != 2 || jobs[0].ID != "jb-aaa" {
+		t.Errorf("equal CreatedAt should tiebreak by ID; got order %v", []string{jobs[0].ID, jobs[1].ID})
+	}
+}
+
+// ── cronexpr edge branches ────────────────────────────────────────────────
+
+func TestParseInLocation_NilLocDefaultsUTC(t *testing.T) {
+	s, err := ParseInLocation("0 9 * * *", nil)
+	if err != nil {
+		t.Fatalf("ParseInLocation(nil loc): %v", err)
+	}
+	if s.loc != time.UTC {
+		t.Errorf("nil loc should default to UTC, got %v", s.loc)
+	}
+}
+
+func TestParseField_EmptyField(t *testing.T) {
+	if _, _, err := parseField("", 0, 59, nil); err == nil {
+		t.Error("parseField should reject an empty field")
+	}
+}
+
+func TestParse_RangeAndEmptyValueErrors(t *testing.T) {
+	bad := []string{
+		"zz-5 * * * *", // non-numeric range low
+		"5-zz * * * *", // non-numeric range high
+		"-5 * * * *",   // empty range low
+		"5- * * * *",   // empty range high
+	}
+	for _, expr := range bad {
+		if _, err := Parse(expr); err == nil {
+			t.Errorf("Parse(%q): expected error", expr)
+		}
+	}
+}
+
+func TestMatches_MonthMismatch(t *testing.T) {
+	s := mustParse(t, "0 0 1 6 *") // only June 1
+	if s.Matches(time.Date(2026, 7, 1, 0, 0, 0, 0, time.UTC)) {
+		t.Error("July must not match a June-only schedule")
+	}
+	if !s.Matches(time.Date(2026, 6, 1, 0, 0, 0, 0, time.UTC)) {
+		t.Error("June 1 should match")
+	}
+}
+
+// ── scheduler edge branches ───────────────────────────────────────────────
+
+func TestReconcile_ListAndLoadStateErrors(t *testing.T) {
+	// List error: a corrupt definitions file makes reconcile bail early.
+	dir := t.TempDir()
+	st, _ := NewStoreAt(dir)
+	writeFile(t, filepath.Join(dir, schedulesFile), "{bad")
+	s := New(st, &fakeRunner{}, &fakeDeliverer{}, Options{})
+	s.reconcile(time.Now()) // must not panic; logs and returns
+	if len(s.next) != 0 {
+		t.Error("reconcile should track nothing when List fails")
+	}
+
+	// LoadState error: valid definitions, corrupt state file → reconcile
+	// continues with empty state.
+	dir2 := t.TempDir()
+	st2, _ := NewStoreAt(dir2)
+	job := addJob(t, st2, Job{Name: "j", Cron: "* * * * *", Task: "x",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true})
+	writeFile(t, filepath.Join(dir2, stateFile), "{bad")
+	s2 := New(st2, &fakeRunner{}, &fakeDeliverer{}, Options{})
+	s2.reconcile(time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC))
+	if s2.peekNext(job.ID).IsZero() {
+		t.Error("reconcile should still schedule the job despite a corrupt state file")
+	}
+}
+
+func TestReconcile_SkipSaveStateError(t *testing.T) {
+	dir := t.TempDir()
+	st, _ := NewStoreAt(dir)
+	job := addJob(t, st, Job{Name: "j", Cron: "0 9 * * *", Task: "x",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true, Catchup: false})
+	past := time.Date(2026, 6, 3, 9, 0, 0, 0, time.UTC)
+	if err := st.SaveState(RunState{JobID: job.ID, NextRun: past, Sig: jobSig(job)}); err != nil {
+		t.Fatal(err)
+	}
+	// Break state writes so the skip-record persistence fails (logged, not fatal).
+	makeTmpDir(t, filepath.Join(dir, stateFile))
+	s := New(st, &fakeRunner{}, &fakeDeliverer{}, Options{})
+	now := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
+	s.reconcile(now) // exercises the SaveState-error log branch
+	if !s.peekNext(job.ID).After(now) {
+		t.Error("missed fire should still be forward-scheduled")
+	}
+}
+
+func TestExecute_SaveStateError(t *testing.T) {
+	dir := t.TempDir()
+	st, _ := NewStoreAt(dir)
+	job := addJob(t, st, Job{Name: "j", Cron: "* * * * *", Task: "x",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true})
+	s := New(st, &fakeRunner{result: "ok"}, &fakeDeliverer{}, Options{})
+	t0 := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
+	s.reconcile(t0)
+	// Break state writes; the run still completes, the SaveState error is logged.
+	makeTmpDir(t, filepath.Join(dir, stateFile))
+	s.fireDue(context.Background(), s.peekNext(job.ID))
+	s.Wait()
+}
+
+func TestFireDue_DropsZeroNextFire(t *testing.T) {
+	st := newTestStore(t)
+	job := addJob(t, st, Job{Name: "j", Cron: "* * * * *", Task: "x",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true})
+	runner := &fakeRunner{}
+	s := New(st, runner, &fakeDeliverer{}, Options{})
+	now := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
+	s.reconcile(now)
+	// Force a zero next-fire; fireDue must drop it and never run it.
+	s.mu.Lock()
+	s.next[job.ID] = time.Time{}
+	s.mu.Unlock()
+	s.fireDue(context.Background(), now)
+	s.Wait()
+	s.mu.Lock()
+	_, tracked := s.next[job.ID]
+	s.mu.Unlock()
+	if tracked {
+		t.Error("a zero next-fire should be dropped")
+	}
+	if runner.callCount() != 0 {
+		t.Error("a zero next-fire must never run")
+	}
+}
+
+func TestTimeToNext_EmptyAndPast(t *testing.T) {
+	s := New(newTestStore(t), &fakeRunner{}, &fakeDeliverer{}, Options{})
+	// No jobs → cap at maxSleep.
+	if d := s.timeToNext(time.Now()); d != maxSleep {
+		t.Errorf("timeToNext with no jobs = %v, want %v", d, maxSleep)
+	}
+	// A past next-fire → zero (fire immediately).
+	now := time.Date(2026, 6, 4, 10, 0, 0, 0, time.UTC)
+	s.mu.Lock()
+	s.next["x"] = now.Add(-time.Hour)
+	s.mu.Unlock()
+	if d := s.timeToNext(now); d != 0 {
+		t.Errorf("timeToNext with a past fire = %v, want 0", d)
+	}
+	// A near-future fire (within maxSleep) → returns the exact remaining delay.
+	s.mu.Lock()
+	s.next["x"] = now.Add(5 * time.Minute)
+	s.mu.Unlock()
+	if d := s.timeToNext(now); d != 5*time.Minute {
+		t.Errorf("timeToNext with a near fire = %v, want 5m", d)
+	}
+}
+
+func TestCompile_BadTimezone(t *testing.T) {
+	if _, err := compile(Job{Cron: "* * * * *", Timezone: "Mars/Phobos"}, time.UTC); err == nil {
+		t.Error("compile should fail for an invalid timezone")
+	}
+}
+
+func TestPreview_Truncates(t *testing.T) {
+	short := "hello"
+	if preview(short) != short {
+		t.Error("preview should leave short text unchanged")
+	}
+	long := strings.Repeat("x", resultPreviewRunes+50)
+	got := preview(long)
+	if !strings.HasSuffix(got, "…") {
+		t.Error("preview should append an ellipsis when truncating")
+	}
+	if len([]rune(got)) != resultPreviewRunes+1 {
+		t.Errorf("preview length = %d runes, want %d", len([]rune(got)), resultPreviewRunes+1)
+	}
+}
+
+func TestRun_ReloadsOnFileChange(t *testing.T) {
+	st := newTestStore(t)
+	runner := &fakeRunner{result: "ok", started: make(chan string, 1)}
+	s := New(st, runner, &fakeDeliverer{}, Options{ReloadEvery: 10 * time.Millisecond})
+
+	ctx, cancel := context.WithCancel(context.Background())
+	done := make(chan error, 1)
+	go func() { done <- s.Run(ctx) }()
+
+	// Let the initial reconcile run against the empty store first, so the new
+	// job is only picked up via the reload (mtime-change) path.
+	time.Sleep(60 * time.Millisecond)
+
+	job := Job{ID: "jb-reload", Name: "r", Cron: "0 0 1 1 *", Task: "x",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true, Catchup: true}
+	// Seed a missed past fire with a matching signature so catchup fires it now.
+	past := time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC)
+	if err := st.SaveState(RunState{JobID: job.ID, NextRun: past, Sig: jobSig(job)}); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := st.Add(job); err != nil { // bumps schedules.json mtime → reload
+		t.Fatal(err)
+	}
+
+	select {
+	case <-runner.started:
+		// reload picked up the new job and the catchup fire ran — good.
+	case <-time.After(3 * time.Second):
+		cancel()
+		t.Fatal("scheduler did not reload and fire the new job")
+	}
+	cancel()
+	select {
+	case <-done:
+	case <-time.After(2 * time.Second):
+		t.Fatal("Run did not return after cancel")
+	}
+}

From 248dac1586ea426b7df684bc1b3f1f25597067e3 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 5 Jun 2026 06:57:28 +0000
Subject: [PATCH 09/11] docs: make docs consistent with the native scheduler
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The native scheduler landed with its own docs (SCHEDULES.md, CONFIG.md,
TELEGRAM.md, docker/*) but left a few cross-references stale or missing.
Bring the rest of the docs in line:

- README.md: add the missing Scheduled Tasks row to the docs index
  (the feature section already linked SCHEDULES.md).
- docs/index.html: add a Scheduled Tasks feature card mirroring README.
- docs/CLI.md: list 'odek schedule' (and the previously-omitted
  'odek telegram') in the command table; point '--deliver' at the
  native scheduler for recurring tasks.
- docs/CHEATSHEET.md: add a schedule quick-reference (and telegram).
- docs/DAILY-WORKER.md: correct the comparison table — odek now has
  native, in-process scheduling rather than 'None'.
---
 README.md            | 1 +
 docs/CHEATSHEET.md   | 7 +++++++
 docs/CLI.md          | 4 +++-
 docs/DAILY-WORKER.md | 2 +-
 docs/index.html      | 6 ++++++
 5 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index de95344..3821217 100644
--- a/README.md
+++ b/README.md
@@ -167,6 +167,7 @@ odek run "@README.md what does this project do?"
 | [Memory](docs/MEMORY.md) | Three-tier design, go-vector merge-on-write, `memory` tool |
 | [Sessions](docs/SESSIONS.md) | Multi-turn conversations, save/resume/trim/cleanup |
 | [Telegram Bot](docs/TELEGRAM.md) | Telegram integration: bot client, slash commands, session management, plans, media downloads |
+| [Scheduled Tasks](docs/SCHEDULES.md) | Native in-process cron: `odek schedule`, Vixie cron syntax, delivery, missed-run catchup, daemon vs embedded |
 | [Sandboxing](docs/SANDBOXING.md) | Docker isolation model, config, security hardening |
 | [Security](docs/SECURITY.md) | Threat model, prompt injection defense, sandbox model |
 | [Sub-Agents](docs/SUBAGENTS.md) | Task decomposition, delegation tool, subagent protocol |
diff --git a/docs/CHEATSHEET.md b/docs/CHEATSHEET.md
index a7642ad..939b678 100644
--- a/docs/CHEATSHEET.md
+++ b/docs/CHEATSHEET.md
@@ -12,6 +12,13 @@ odek serve                           # Web UI (http://127.0.0.1:8080)
 odek serve --open                    # Web UI + auto-open browser
 odek subagent --goal "review auth"   # Spawn subagent
 odek mcp                             # Expose tools via MCP stdio
+odek telegram                        # Telegram bot (also hosts the scheduler)
+
+# Scheduled tasks (native cron — see docs/SCHEDULES.md)
+odek schedule add --cron "0 9 * * 1-5" --deliver telegram "stand-up nudge"
+odek schedule list                   # List jobs (id, next fire, last status)
+odek schedule next "*/15 * * * *"    # Preview upcoming fire times
+odek schedule daemon                 # Run the scheduler headless
 
 # Sandbox flags (apply to run/repl/serve)
 odek run --sandbox "build safely"
diff --git a/docs/CLI.md b/docs/CLI.md
index b5fa9ee..8e05214 100644
--- a/docs/CLI.md
+++ b/docs/CLI.md
@@ -29,6 +29,8 @@
 || `odek subagent --goal <string> [flags]` | Run a focused sub-task; outputs JSON on stdout. Spawned by `delegate_tasks` tool |
 | `odek init [--global] [--force]` | Create a config file template |
 | `odek mcp [--sandbox]` | Start MCP server (expose tools to Claude Code) or connect to external MCP servers (via `mcp_servers` config) |
+| `odek telegram` | Start the Telegram bot (long-polling). Hosts the embedded scheduler unless `schedules.enabled=false` |
+| `odek schedule <subcommand>` | Manage native in-process scheduled tasks (cron): `list`, `add`, `rm`, `enable`, `disable`, `run`, `next`, `daemon`. See [Schedules](SCHEDULES.md) |
 | `odek version` | Print version and exit |
 
 ## Run flags
@@ -41,7 +43,7 @@
 | `--thinking <level>` | string | profile default | Reasoning depth: `enabled`/`disabled`/`low`/`medium`/`high`. Requires a model that supports extended thinking. |
 | `--thinking-budget <n>` | int | `5000` | Max thinking tokens for extended thinking (Anthropic budget_tokens). Only applied when `--thinking` is set. |
 | `--sandbox` | bool | false | Execute shell commands inside Docker container |
-| `--deliver` | bool | false | Deliver the agent's final response to the configured Telegram `default_chat_id`. Requires `telegram.bot_token` + `telegram.default_chat_id` in config. Use with cron for scheduled agent tasks. |
+| `--deliver` | bool | false | Deliver the agent's final response to the configured Telegram `default_chat_id`. Requires `telegram.bot_token` + `telegram.default_chat_id` in config. Handy for host-cron one-shots; for recurring tasks prefer the native scheduler (`odek schedule`, see [Schedules](SCHEDULES.md)). |
 | `--interaction-mode <mode>` | string | `engaging` | Tool-call rendering: `engaging` (emoji narration) or `verbose` (raw tool output) |
 | `--no-color` | bool | false | Disable colored terminal output |
 | `--prompt-caching` | bool | false | Enable Anthropic/OpenAI/DeepSeek prompt caching markers |
diff --git a/docs/DAILY-WORKER.md b/docs/DAILY-WORKER.md
index 6b2f157..2dc96bb 100644
--- a/docs/DAILY-WORKER.md
+++ b/docs/DAILY-WORKER.md
@@ -11,7 +11,7 @@
 | **Memory** | Per-session + persistent | 🏆 3-tier (facts, buffer, episodes) + vector |
 | **Skills auto-learning** | Manual skills | 🏆 Auto-detect patterns, trie-based trigger |
 | **Docker sandbox** | Manual | 🏆 Wired into agent loop |
-| **Cron / scheduling** | 🏆 Native | ❌ None |
+| **Cron / scheduling** | 🏆 Native | ✅ Native, in-process (`odek schedule`) |
 | **Web UI** | ❌ | 🏆 `odek serve` |
 | **MCP bidirectional** | Client only (native-mcp) | 🏆 Server + Client |
 
diff --git a/docs/index.html b/docs/index.html
index 508a7c4..d49a514 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -556,6 +556,12 @@ <h4>Telegram Bot</h4>
       <p>Full-featured Telegram bot with <code>odek telegram</code>. MarkdownV2 messages, inline keyboards, file attachments, <code>send_message</code> tool, and <code>--deliver</code> flag for cron integration. Interaction mode-aware with per-tool progress traces.</p>
     </div>
 
+    <div class="card">
+      <div class="icon">⏰</div>
+      <h4>Scheduled Tasks</h4>
+      <p>Native, in-process cron with <code>odek schedule</code> — no external cron daemon. Runs inside <code>odek telegram</code> or a standalone <code>odek schedule daemon</code>, so a scheduled task sees the same resolved config an interactive run does. Stdlib cron parser with Vixie day-of-month/day-of-week semantics, per-job timezones, missed-run catchup, and a singleton lock so jobs never double-fire.</p>
+    </div>
+
   </div>
 </section>
 

From a92400ae0ad0a23749daea7f5029d5473c74a5f0 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 5 Jun 2026 07:33:11 +0000
Subject: [PATCH 10/11] feat(schedule): manage schedules from Telegram
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add /schedules and /schedule slash commands so an authorized Telegram
user can list, view, preview, add, enable/disable, remove, and test-run
scheduled tasks without leaving the chat — closing the gap where the
native scheduler was CLI/file-only.

Command layer (cmd/odek/schedule_telegram.go):
- /schedules lists jobs; /schedule <sub> dispatches add|view|next|run|
  enable|disable|rm|help.
- add uses cron's fixed arity (an @macro or 5 fields) so no quoting is
  needed; options follow a literal '|' (deliver=, tz=, name=, catchup,
  disabled). Telegram delivery defaults to the originating chat.
- run returns the job's task for the bot to dispatch through the normal
  agent pipeline (progress + approvals visible), test-running it in chat.
- Replies use the existing MarkdownV2 pipeline; cron/IDs are wrapped in
  code spans to stay literal.

Wiring:
- Scheduler gains Reload() (buffered, coalescing) and a select case so
  in-chat edits reconcile immediately instead of waiting for the mtime
  poll; startSchedulerForBot now takes the shared store and publishes
  its Reload via scheduleReloadRef.
- telegram.go creates one schedule.Store, shares it with the embedded
  scheduler, and intercepts the two commands in OnCommand.

Safety/config:
- New schedules.allow_telegram_management (default true, env
  ODEK_SCHEDULES_ALLOW_TELEGRAM_MANAGEMENT) gates the mutating verbs;
  read-only listing/preview always works. Access is already bounded by
  the bot's allowed_chats/allowed_users.

Docs: SCHEDULES.md gains a 'Managing from Telegram' section; TELEGRAM.md,
CONFIG.md, docker/README.md and .env.example updated. Tests cover the
parser, every subcommand, the management gate, and the Reload trigger.
---
 cmd/odek/schedule.go               |  21 +-
 cmd/odek/schedule_cli_test.go      |   8 +-
 cmd/odek/schedule_telegram.go      | 376 +++++++++++++++++++++++++++++
 cmd/odek/schedule_telegram_test.go | 236 ++++++++++++++++++
 cmd/odek/telegram.go               |  30 ++-
 docker/.env.example                |   6 +-
 docker/README.md                   |  19 +-
 docs/CONFIG.md                     |   4 +-
 docs/SCHEDULES.md                  |  53 +++-
 docs/TELEGRAM.md                   |   2 +
 internal/config/loader.go          |  28 ++-
 internal/config/schedules_test.go  |  23 ++
 internal/schedule/coverage_test.go |  36 +++
 internal/schedule/scheduler.go     |  22 ++
 internal/telegram/commands.go      |  18 ++
 internal/telegram/commands_test.go |   1 +
 16 files changed, 856 insertions(+), 27 deletions(-)
 create mode 100644 cmd/odek/schedule_telegram.go
 create mode 100644 cmd/odek/schedule_telegram_test.go

diff --git a/cmd/odek/schedule.go b/cmd/odek/schedule.go
index d2f179b..446349e 100644
--- a/cmd/odek/schedule.go
+++ b/cmd/odek/schedule.go
@@ -396,6 +396,13 @@ var scheduleUnlockRef func()
 // would leak across every /restart.
 var mcpCleanupRef func()
 
+// scheduleReloadRef points at the running embedded scheduler's Reload method so
+// the Telegram `/schedule` commands can force an immediate reconcile after an
+// edit instead of waiting for the mtime poll. nil when no embedded scheduler is
+// running (disabled, or an external daemon holds the lock) — callers treat a nil
+// ref as a no-op and rely on the file write being picked up by whoever schedules.
+var scheduleReloadRef func()
+
 // telegramRunner runs a job's task headlessly and accounts its token usage
 // against the bot's daily budget.
 type telegramRunner struct {
@@ -448,20 +455,18 @@ func (d telegramDeliverer) Deliver(ctx context.Context, job schedule.Job, result
 // `odek schedule daemon` already holds the lock (in which case the bot defers
 // to it, to avoid double-firing). It returns a stop func that releases the
 // lock; the scheduler goroutine itself stops when ctx is cancelled.
-func startSchedulerForBot(ctx context.Context, bot *telegram.Bot, resolved config.ResolvedConfig, system string, log telegram.Logger) func() {
+func startSchedulerForBot(ctx context.Context, bot *telegram.Bot, resolved config.ResolvedConfig, system string, log telegram.Logger, st *schedule.Store) func() {
 	if !resolved.Schedules.Enabled {
 		log.Info("schedule: embedded scheduler disabled by config")
 		return func() {}
 	}
-	unlock, err := acquireScheduleLock()
-	if err != nil {
-		log.Info("schedule: embedded scheduler not started", "reason", err.Error())
+	if st == nil {
+		log.Error("schedule: store unavailable, embedded scheduler not started")
 		return func() {}
 	}
-	st, err := schedule.NewStore()
+	unlock, err := acquireScheduleLock()
 	if err != nil {
-		log.Error("schedule: store init failed", "error", err)
-		unlock()
+		log.Info("schedule: embedded scheduler not started", "reason", err.Error())
 		return func() {}
 	}
 	// Connect MCP servers once and share across fires. A failure here must not
@@ -478,6 +483,7 @@ func startSchedulerForBot(ctx context.Context, bot *telegram.Bot, resolved confi
 	)
 	scheduleUnlockRef = unlock
 	mcpCleanupRef = mcpCleanup
+	scheduleReloadRef = sched.Reload
 	done := make(chan struct{})
 	go func() {
 		defer close(done)
@@ -497,6 +503,7 @@ func startSchedulerForBot(ctx context.Context, bot *telegram.Bot, resolved confi
 	return func() {
 		scheduleUnlockRef = nil
 		mcpCleanupRef = nil
+		scheduleReloadRef = nil
 		// Wait for the scheduler to drain in-flight jobs before tearing down the
 		// shared MCP connections (otherwise a draining run sees broken pipes and
 		// persists a misleading error state) and releasing the lock. Bounded so a
diff --git a/cmd/odek/schedule_cli_test.go b/cmd/odek/schedule_cli_test.go
index 6afe2ec..2ab9fdf 100644
--- a/cmd/odek/schedule_cli_test.go
+++ b/cmd/odek/schedule_cli_test.go
@@ -291,18 +291,22 @@ func TestAcquireScheduleLock_HomeError(t *testing.T) {
 
 func TestStartSchedulerForBot_Disabled(t *testing.T) {
 	stop := startSchedulerForBot(context.Background(), nil, config.ResolvedConfig{}, "system",
-		telegram.NewFileLogger(telegram.LogInfo, ""))
+		telegram.NewFileLogger(telegram.LogInfo, ""), nil)
 	stop() // disabled → no-op stop, must not panic
 }
 
 func TestStartSchedulerForBot_StartAndStop(t *testing.T) {
 	t.Setenv("HOME", t.TempDir())
 	bot, _ := newRecordingTestBot(t)
+	st, err := schedule.NewStore()
+	if err != nil {
+		t.Fatalf("NewStore: %v", err)
+	}
 	resolved := config.ResolvedConfig{
 		Schedules: config.ScheduleConfig{Enabled: true, MaxConcurrent: 2, Timezone: "UTC"},
 	}
 	ctx, cancel := context.WithCancel(context.Background())
-	stop := startSchedulerForBot(ctx, bot, resolved, "system", telegram.NewFileLogger(telegram.LogInfo, ""))
+	stop := startSchedulerForBot(ctx, bot, resolved, "system", telegram.NewFileLogger(telegram.LogInfo, ""), st)
 	cancel()
 	stop() // drains the scheduler goroutine, cleans up MCP, releases the lock
 }
diff --git a/cmd/odek/schedule_telegram.go b/cmd/odek/schedule_telegram.go
new file mode 100644
index 0000000..4912dec
--- /dev/null
+++ b/cmd/odek/schedule_telegram.go
@@ -0,0 +1,376 @@
+package main
+
+import (
+	"fmt"
+	"strings"
+	"time"
+
+	"github.com/BackendStack21/odek/internal/schedule"
+)
+
+// This file implements the Telegram `/schedule` and `/schedules` slash commands,
+// letting an authorized user manage scheduled tasks from inside the chat. The
+// parsing/formatting lives here (store-backed, unit-testable); the `/schedule run`
+// dispatch — which needs the live agent pipeline — stays in telegram.go and is
+// driven by the runTask value this returns.
+//
+// Replies use the same "odek markdown" dialect as the other handlers
+// (`*bold*`, `_italic_`, `` `code` ``); the bot's FormatResponse escapes
+// reserved MarkdownV2 characters outside code spans and falls back to plain
+// text on a parse error, so cron expressions and IDs are wrapped in backticks
+// to stay literal.
+
+const scheduleTelegramMaxRows = 20
+
+// telegramScheduleReply handles a `/schedule <sub> …` command and returns the
+// reply to send. When the subcommand is `run` and the job exists, runTask holds
+// the job's task for the caller to dispatch through the normal chat pipeline
+// (this helper has no agent access); it is empty otherwise.
+//
+// chatID is the originating chat — telegram-delivered jobs added here default to
+// delivering back to it. reload, if non-nil, is invoked after a mutation so the
+// embedded scheduler reconciles immediately. allowManage gates the mutating
+// verbs; read-only verbs (list/view/next/help) always work.
+func telegramScheduleReply(chatID int64, argsStr string, st *schedule.Store, reload func(), allowManage bool) (reply, runTask string) {
+	if st == nil {
+		return "❌ Schedule store is unavailable.", ""
+	}
+	sub, rest, _ := strings.Cut(strings.TrimSpace(argsStr), " ")
+	sub = strings.ToLower(strings.TrimSpace(sub))
+	rest = strings.TrimSpace(rest)
+
+	// Read-only verbs — always available.
+	switch sub {
+	case "", "help":
+		return scheduleTelegramUsage(), ""
+	case "list", "ls":
+		return scheduleTelegramList(st), ""
+	case "view", "show":
+		return scheduleTelegramView(st, rest), ""
+	case "next":
+		return scheduleTelegramNext(st, rest), ""
+	}
+
+	// Mutating verbs — gated by config.
+	if !allowManage {
+		return "🔒 Managing schedules from Telegram is disabled (`schedules.allow_telegram_management = false`). Use `odek schedule` on the host.", ""
+	}
+	switch sub {
+	case "add":
+		return scheduleTelegramAdd(chatID, rest, st, reload), ""
+	case "rm", "remove", "delete":
+		return scheduleTelegramRemove(st, rest, reload), ""
+	case "enable":
+		return scheduleTelegramSetEnabled(st, rest, true, reload), ""
+	case "disable":
+		return scheduleTelegramSetEnabled(st, rest, false, reload), ""
+	case "run":
+		return scheduleTelegramRun(st, rest)
+	default:
+		return fmt.Sprintf("❓ Unknown subcommand `%s`.\n\n%s", sub, scheduleTelegramUsage()), ""
+	}
+}
+
+func scheduleTelegramUsage() string {
+	return "⏰ *Schedule commands*\n\n" +
+		"`/schedules` — list jobs\n" +
+		"`/schedule add <cron> <task> [| opts]` — add a job (delivered to this chat)\n" +
+		"`/schedule view <id>` — job detail\n" +
+		"`/schedule next <id|cron>` — preview fire times\n" +
+		"`/schedule run <id>` — run once now, here\n" +
+		"`/schedule enable|disable <id>` — toggle a job\n" +
+		"`/schedule rm <id>` — remove a job\n\n" +
+		"*opts* (after ` | `): `deliver=stdout|log|telegram|telegram:<id>` `tz=<IANA>` `name=<label>` `catchup` `disabled`\n\n" +
+		"Example:\n`/schedule add 0 9 * * 1-5 Summarize my unread emails | tz=Europe/Berlin`"
+}
+
+func scheduleTelegramList(st *schedule.Store) string {
+	jobs, err := st.List()
+	if err != nil {
+		return "❌ " + err.Error()
+	}
+	if len(jobs) == 0 {
+		return "⏰ *No scheduled jobs.*\n\nAdd one: `/schedule add 0 9 * * 1-5 your task`"
+	}
+	state, _ := st.LoadState()
+	now := time.Now()
+	var b strings.Builder
+	b.WriteString("⏰ *Scheduled jobs*\n\n")
+	for i, j := range jobs {
+		if i >= scheduleTelegramMaxRows {
+			fmt.Fprintf(&b, "\n_…and %d more — use `odek schedule list` on the host._", len(jobs)-scheduleTelegramMaxRows)
+			break
+		}
+		onOff := "🟢"
+		if !j.Enabled {
+			onOff = "⚪️"
+		}
+		next := "—"
+		if s, err := jobSchedule(j); err != nil {
+			next = "invalid"
+		} else if nt := s.Next(now); !nt.IsZero() {
+			next = nt.Local().Format("Mon 02 Jan 15:04")
+		}
+		last := ""
+		if rs, ok := state[j.ID]; ok && rs.LastStatus != "" {
+			last = " · " + rs.LastStatus
+		}
+		fmt.Fprintf(&b, "%s `%s` `%s`%s\n   next %s\n", onOff, j.ID, j.Cron, last, next)
+		if j.Name != "" {
+			fmt.Fprintf(&b, "   _%s_\n", j.Name)
+		}
+	}
+	return strings.TrimRight(b.String(), "\n")
+}
+
+func scheduleTelegramView(st *schedule.Store, id string) string {
+	id = strings.TrimSpace(id)
+	if id == "" {
+		return "❗ Usage: `/schedule view <id>`"
+	}
+	job, ok, err := st.Get(id)
+	if err != nil {
+		return "❌ " + err.Error()
+	}
+	if !ok {
+		return fmt.Sprintf("❌ No job with ID `%s`.", id)
+	}
+	state, _ := st.LoadState()
+	status := "enabled"
+	if !job.Enabled {
+		status = "disabled"
+	}
+	var b strings.Builder
+	fmt.Fprintf(&b, "⏰ *Job* `%s` (%s)\n", job.ID, status)
+	if job.Name != "" {
+		fmt.Fprintf(&b, "*Name:* %s\n", job.Name)
+	}
+	fmt.Fprintf(&b, "*Cron:* `%s`\n", job.Cron)
+	if job.Timezone != "" {
+		fmt.Fprintf(&b, "*TZ:* %s\n", job.Timezone)
+	}
+	fmt.Fprintf(&b, "*Deliver:* %s\n", deliverString(job.Deliver))
+	fmt.Fprintf(&b, "*Task:* %s\n", job.Task)
+	if rs, ok := state[job.ID]; ok {
+		if rs.LastStatus != "" {
+			fmt.Fprintf(&b, "*Last:* %s", rs.LastStatus)
+			if !rs.LastRun.IsZero() {
+				fmt.Fprintf(&b, " (%s)", rs.LastRun.Local().Format("Mon 02 Jan 15:04"))
+			}
+			b.WriteString("\n")
+		}
+		if rs.LastError != "" {
+			fmt.Fprintf(&b, "*Error:* %s\n", rs.LastError)
+		}
+	}
+	if s, err := jobSchedule(job); err == nil {
+		b.WriteString("*Next fires:*\n")
+		t := time.Now()
+		for range 3 {
+			t = s.Next(t)
+			if t.IsZero() {
+				break
+			}
+			fmt.Fprintf(&b, "  %s\n", t.Local().Format("Mon 02 Jan 15:04"))
+		}
+	}
+	return strings.TrimRight(b.String(), "\n")
+}
+
+func scheduleTelegramNext(st *schedule.Store, arg string) string {
+	arg = strings.TrimSpace(arg)
+	if arg == "" {
+		return "❗ Usage: `/schedule next <id|cron>`"
+	}
+	var sc *schedule.Schedule
+	var header string
+	// A bare token with no spaces or cron metacharacters may be a job ID.
+	if !strings.ContainsAny(arg, " *") {
+		if job, ok, err := st.Get(arg); err == nil && ok {
+			s, jerr := jobSchedule(job)
+			if jerr != nil {
+				return "❌ " + jerr.Error()
+			}
+			sc, header = s, fmt.Sprintf("⏰ Job `%s` (`%s`)", job.ID, job.Cron)
+		}
+	}
+	if sc == nil {
+		s, err := schedule.Parse(arg)
+		if err != nil {
+			return "❌ " + err.Error()
+		}
+		sc, header = s, fmt.Sprintf("⏰ `%s` (UTC)", arg)
+	}
+	var b strings.Builder
+	b.WriteString(header + "\n")
+	t := time.Now()
+	for range 5 {
+		t = sc.Next(t)
+		if t.IsZero() {
+			b.WriteString("  _(no further fires within the horizon)_\n")
+			break
+		}
+		fmt.Fprintf(&b, "  %s\n", t.Local().Format("Mon 02 Jan 15:04 MST"))
+	}
+	return strings.TrimRight(b.String(), "\n")
+}
+
+func scheduleTelegramAdd(chatID int64, args string, st *schedule.Store, reload func()) string {
+	job, errMsg := parseTelegramScheduleAdd(chatID, args)
+	if errMsg != "" {
+		return errMsg
+	}
+	saved, err := st.Add(job)
+	if err != nil {
+		return "❌ " + err.Error()
+	}
+	if reload != nil {
+		reload()
+	}
+	next := "—"
+	if s, err := jobSchedule(saved); err == nil {
+		if nt := s.Next(time.Now()); !nt.IsZero() {
+			next = nt.Local().Format("Mon 02 Jan 15:04")
+		}
+	}
+	status := "enabled"
+	if !saved.Enabled {
+		status = "disabled"
+	}
+	return fmt.Sprintf("✅ *Added* `%s` (%s)\n*Name:* %s\n*Cron:* `%s`\n*Deliver:* %s\n*Next:* %s",
+		saved.ID, status, saved.Name, saved.Cron, deliverString(saved.Deliver), next)
+}
+
+// parseTelegramScheduleAdd turns a chat-friendly add string into a Job. It
+// returns either a populated Job or a user-facing error reply (never both).
+//
+// Grammar: <cron|@macro> <task…> [| key=value … flag …]. Cron's fixed arity
+// resolves the cron/task boundary without quoting — an @macro is one token, a
+// classic expression is exactly five whitespace fields, and the remainder is the
+// task. Options come after a literal "|".
+func parseTelegramScheduleAdd(chatID int64, args string) (schedule.Job, string) {
+	args = strings.TrimSpace(args)
+	if args == "" {
+		return schedule.Job{}, "❗ Usage: `/schedule add <cron> <task> [| opts]`\n\nExample: `/schedule add 0 9 * * 1-5 Stand-up reminder`"
+	}
+	main, optStr, _ := strings.Cut(args, "|")
+	cron, task, ok := splitCronTask(main)
+	if !ok {
+		return schedule.Job{}, "❗ Could not read the cron and task. Provide 5 cron fields (or an `@macro`) then the task:\n`/schedule add 0 9 * * 1-5 your task`"
+	}
+	opts := parseScheduleOpts(optStr)
+
+	delStr := opts["deliver"]
+	if delStr == "" {
+		delStr = "telegram"
+	}
+	del, err := parseDeliver(delStr)
+	if err != nil {
+		return schedule.Job{}, "❌ " + err.Error()
+	}
+	// A telegram delivery with no explicit chat defaults to THIS chat — the
+	// natural expectation when adding from a conversation.
+	if del.Kind == schedule.DeliverTelegram && del.ChatID == 0 {
+		del.ChatID = chatID
+	}
+
+	name := opts["name"]
+	if name == "" {
+		name = firstWords(task, 6)
+	}
+	return schedule.Job{
+		Name:     name,
+		Cron:     cron,
+		Task:     task,
+		Deliver:  del,
+		Timezone: opts["tz"],
+		Catchup:  opts["catchup"] != "",
+		Enabled:  opts["disabled"] == "",
+	}, ""
+}
+
+// splitCronTask separates a cron expression (a single @macro or exactly five
+// whitespace fields) from the trailing task text.
+func splitCronTask(s string) (cron, task string, ok bool) {
+	s = strings.TrimSpace(s)
+	if s == "" {
+		return "", "", false
+	}
+	if strings.HasPrefix(s, "@") {
+		macro, rest, _ := strings.Cut(s, " ")
+		rest = strings.TrimSpace(rest)
+		if rest == "" {
+			return "", "", false
+		}
+		return macro, rest, true
+	}
+	fields := strings.Fields(s)
+	if len(fields) < 6 { // 5 cron fields + at least one task word
+		return "", "", false
+	}
+	return strings.Join(fields[:5], " "), strings.Join(fields[5:], " "), true
+}
+
+// parseScheduleOpts parses the option tail into a map. `key=value` pairs map to
+// their value; bare flags (e.g. `catchup`) map to "true".
+func parseScheduleOpts(s string) map[string]string {
+	out := map[string]string{}
+	for _, tok := range strings.Fields(s) {
+		if k, v, ok := strings.Cut(tok, "="); ok {
+			out[strings.ToLower(strings.TrimSpace(k))] = strings.TrimSpace(v)
+		} else {
+			out[strings.ToLower(tok)] = "true"
+		}
+	}
+	return out
+}
+
+func scheduleTelegramRemove(st *schedule.Store, id string, reload func()) string {
+	id = strings.TrimSpace(id)
+	if id == "" {
+		return "❗ Usage: `/schedule rm <id>`"
+	}
+	if err := st.Remove(id); err != nil {
+		return "❌ " + err.Error()
+	}
+	if reload != nil {
+		reload()
+	}
+	return fmt.Sprintf("🗑️ Removed `%s`.", id)
+}
+
+func scheduleTelegramSetEnabled(st *schedule.Store, id string, enabled bool, reload func()) string {
+	id = strings.TrimSpace(id)
+	verb := "enable"
+	if !enabled {
+		verb = "disable"
+	}
+	if id == "" {
+		return fmt.Sprintf("❗ Usage: `/schedule %s <id>`", verb)
+	}
+	if err := st.SetEnabled(id, enabled); err != nil {
+		return "❌ " + err.Error()
+	}
+	if reload != nil {
+		reload()
+	}
+	if enabled {
+		return fmt.Sprintf("🟢 Enabled `%s`.", id)
+	}
+	return fmt.Sprintf("⚪️ Disabled `%s` (kept, won't fire).", id)
+}
+
+func scheduleTelegramRun(st *schedule.Store, id string) (reply, runTask string) {
+	id = strings.TrimSpace(id)
+	if id == "" {
+		return "❗ Usage: `/schedule run <id>`", ""
+	}
+	job, ok, err := st.Get(id)
+	if err != nil {
+		return "❌ " + err.Error(), ""
+	}
+	if !ok {
+		return fmt.Sprintf("❌ No job with ID `%s`.", id), ""
+	}
+	return fmt.Sprintf("🏃 Running `%s` (%s) now — the result will arrive here.", job.ID, job.Name), job.Task
+}
diff --git a/cmd/odek/schedule_telegram_test.go b/cmd/odek/schedule_telegram_test.go
new file mode 100644
index 0000000..dfe8120
--- /dev/null
+++ b/cmd/odek/schedule_telegram_test.go
@@ -0,0 +1,236 @@
+package main
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/BackendStack21/odek/internal/schedule"
+)
+
+func newTGStore(t *testing.T) *schedule.Store {
+	t.Helper()
+	st, err := schedule.NewStoreAt(t.TempDir())
+	if err != nil {
+		t.Fatalf("NewStoreAt: %v", err)
+	}
+	return st
+}
+
+// ── splitCronTask / parseScheduleOpts ──────────────────────────────────────
+
+func TestSplitCronTask(t *testing.T) {
+	tests := []struct {
+		in       string
+		wantCron string
+		wantTask string
+		wantOK   bool
+	}{
+		{"0 9 * * 1-5 stand up now", "0 9 * * 1-5", "stand up now", true},
+		{"@daily summarize my email", "@daily", "summarize my email", true},
+		{"  @hourly   check builds ", "@hourly", "check builds", true},
+		{"0 9 * * 1-5", "", "", false}, // 5 fields, no task
+		{"0 9 task", "", "", false},    // too few cron fields
+		{"@daily", "", "", false},      // macro, no task
+		{"", "", "", false},
+	}
+	for _, tc := range tests {
+		cron, task, ok := splitCronTask(tc.in)
+		if ok != tc.wantOK || (ok && (cron != tc.wantCron || task != tc.wantTask)) {
+			t.Errorf("splitCronTask(%q) = (%q,%q,%v), want (%q,%q,%v)",
+				tc.in, cron, task, ok, tc.wantCron, tc.wantTask, tc.wantOK)
+		}
+	}
+}
+
+func TestParseScheduleOpts(t *testing.T) {
+	got := parseScheduleOpts(" tz=Europe/Berlin deliver=log catchup name=standup ")
+	if got["tz"] != "Europe/Berlin" || got["deliver"] != "log" || got["name"] != "standup" {
+		t.Errorf("key=value parse wrong: %+v", got)
+	}
+	if got["catchup"] != "true" {
+		t.Errorf("bare flag should map to true: %+v", got)
+	}
+	if len(parseScheduleOpts("")) != 0 {
+		t.Error("empty opts should be empty map")
+	}
+}
+
+// ── add ─────────────────────────────────────────────────────────────────────
+
+func TestTelegramScheduleAdd_DefaultsToThisChat(t *testing.T) {
+	st := newTGStore(t)
+	reloaded := false
+	reply, run := telegramScheduleReply(555, "add 0 9 * * 1-5 Summarize my unread email",
+		st, func() { reloaded = true }, true)
+	if run != "" {
+		t.Error("add should not produce a runTask")
+	}
+	if !strings.Contains(reply, "Added") {
+		t.Fatalf("unexpected reply: %q", reply)
+	}
+	if !reloaded {
+		t.Error("add should trigger a reload")
+	}
+	jobs, _ := st.List()
+	if len(jobs) != 1 {
+		t.Fatalf("want 1 job, got %d", len(jobs))
+	}
+	j := jobs[0]
+	if j.Deliver.Kind != schedule.DeliverTelegram || j.Deliver.ChatID != 555 {
+		t.Errorf("delivery should default to telegram:555, got %+v", j.Deliver)
+	}
+	if j.Name == "" || j.Cron != "0 9 * * 1-5" || !j.Enabled {
+		t.Errorf("job fields wrong: %+v", j)
+	}
+}
+
+func TestTelegramScheduleAdd_Options(t *testing.T) {
+	st := newTGStore(t)
+	reply, _ := telegramScheduleReply(7, "add @daily Daily digest | tz=Europe/Berlin name=digest deliver=log catchup disabled",
+		st, nil, true)
+	if !strings.Contains(reply, "Added") {
+		t.Fatalf("unexpected reply: %q", reply)
+	}
+	jobs, _ := st.List()
+	j := jobs[0]
+	if j.Timezone != "Europe/Berlin" || j.Name != "digest" || !j.Catchup || j.Enabled {
+		t.Errorf("options not applied: %+v", j)
+	}
+	if j.Deliver.Kind != schedule.DeliverLog {
+		t.Errorf("deliver=log not applied: %+v", j.Deliver)
+	}
+}
+
+func TestTelegramScheduleAdd_Errors(t *testing.T) {
+	st := newTGStore(t)
+	cases := map[string]string{
+		"missing args":   "add",
+		"too few fields": "add 0 9 too-short",
+		"invalid cron":   "add nope nope nope nope nope do the thing",
+		"bad deliver":    "add 0 9 * * * a task | deliver=pigeon",
+	}
+	for name, args := range cases {
+		reply, _ := telegramScheduleReply(1, args, st, nil, true)
+		if !strings.HasPrefix(reply, "❗") && !strings.HasPrefix(reply, "❌") {
+			t.Errorf("%s: expected an error reply, got %q", name, reply)
+		}
+	}
+	if jobs, _ := st.List(); len(jobs) != 0 {
+		t.Errorf("no job should persist from failed adds, got %d", len(jobs))
+	}
+}
+
+// ── list / view / next ──────────────────────────────────────────────────────
+
+func TestTelegramScheduleListViewNext(t *testing.T) {
+	st := newTGStore(t)
+	if reply, _ := telegramScheduleReply(1, "list", st, nil, true); !strings.Contains(reply, "No scheduled jobs") {
+		t.Errorf("empty list reply: %q", reply)
+	}
+	a, _ := st.Add(schedule.Job{Name: "morning", Cron: "0 9 * * *", Task: "x",
+		Deliver: schedule.Delivery{Kind: schedule.DeliverStdout}, Enabled: true})
+
+	if reply, _ := telegramScheduleReply(1, "list", st, nil, true); !strings.Contains(reply, a.ID) {
+		t.Errorf("list should include the job id: %q", reply)
+	}
+	if reply, _ := telegramScheduleReply(1, "view "+a.ID, st, nil, true); !strings.Contains(reply, "morning") {
+		t.Errorf("view reply: %q", reply)
+	}
+	if reply, _ := telegramScheduleReply(1, "view jb-missing", st, nil, true); !strings.Contains(reply, "No job") {
+		t.Errorf("view-missing reply: %q", reply)
+	}
+	if reply, _ := telegramScheduleReply(1, "next "+a.ID, st, nil, true); !strings.Contains(reply, a.ID) {
+		t.Errorf("next-by-id reply: %q", reply)
+	}
+	if reply, _ := telegramScheduleReply(1, "next */15 * * * *", st, nil, true); !strings.Contains(reply, "UTC") {
+		t.Errorf("next-by-cron reply: %q", reply)
+	}
+	if reply, _ := telegramScheduleReply(1, "next not-a-cron", st, nil, true); !strings.HasPrefix(reply, "❌") {
+		t.Errorf("next-bad reply: %q", reply)
+	}
+}
+
+// ── rm / enable / disable / run ─────────────────────────────────────────────
+
+func TestTelegramScheduleMutations(t *testing.T) {
+	st := newTGStore(t)
+	a, _ := st.Add(schedule.Job{Name: "j", Cron: "0 9 * * *", Task: "do it",
+		Deliver: schedule.Delivery{Kind: schedule.DeliverStdout}, Enabled: true})
+
+	// disable / enable
+	if reply, _ := telegramScheduleReply(1, "disable "+a.ID, st, nil, true); !strings.Contains(reply, "Disabled") {
+		t.Errorf("disable reply: %q", reply)
+	}
+	if j, _, _ := st.Get(a.ID); j.Enabled {
+		t.Error("job should be disabled")
+	}
+	if reply, _ := telegramScheduleReply(1, "enable "+a.ID, st, nil, true); !strings.Contains(reply, "Enabled") {
+		t.Errorf("enable reply: %q", reply)
+	}
+
+	// run → returns the job task for the caller to dispatch
+	reply, run := telegramScheduleReply(1, "run "+a.ID, st, nil, true)
+	if run != "do it" || !strings.Contains(reply, "Running") {
+		t.Errorf("run should return the task: reply=%q run=%q", reply, run)
+	}
+	if _, miss := telegramScheduleReply(1, "run jb-missing", st, nil, true); miss != "" {
+		t.Error("run of a missing job should not produce a runTask")
+	}
+
+	// rm
+	if reply, _ := telegramScheduleReply(1, "rm "+a.ID, st, nil, true); !strings.Contains(reply, "Removed") {
+		t.Errorf("rm reply: %q", reply)
+	}
+	if jobs, _ := st.List(); len(jobs) != 0 {
+		t.Errorf("job not removed, %d remain", len(jobs))
+	}
+
+	// usage errors for missing ids
+	for _, args := range []string{"rm", "enable", "disable", "run", "view"} {
+		if reply, _ := telegramScheduleReply(1, args, st, nil, true); !strings.HasPrefix(reply, "❗") {
+			t.Errorf("%q with no id should return usage, got %q", args, reply)
+		}
+	}
+}
+
+// ── gating / help / nil store ───────────────────────────────────────────────
+
+func TestTelegramSchedule_ManagementGate(t *testing.T) {
+	st := newTGStore(t)
+	a, _ := st.Add(schedule.Job{Name: "j", Cron: "0 9 * * *", Task: "x",
+		Deliver: schedule.Delivery{Kind: schedule.DeliverStdout}, Enabled: true})
+
+	// Mutating verbs are refused when management is disabled.
+	for _, args := range []string{"add 0 9 * * * t", "rm " + a.ID, "enable " + a.ID, "disable " + a.ID, "run " + a.ID} {
+		reply, run := telegramScheduleReply(1, args, st, nil, false)
+		if !strings.Contains(reply, "disabled") || run != "" {
+			t.Errorf("gated %q should be refused, got %q", args, reply)
+		}
+	}
+	// Read-only verbs still work.
+	if reply, _ := telegramScheduleReply(1, "list", st, nil, false); !strings.Contains(reply, a.ID) {
+		t.Errorf("list should work even when management is disabled: %q", reply)
+	}
+	if reply, _ := telegramScheduleReply(1, "view "+a.ID, st, nil, false); strings.Contains(reply, "disabled (`schedules") {
+		t.Error("view should not be gated")
+	}
+	// The job must be untouched.
+	if jobs, _ := st.List(); len(jobs) != 1 {
+		t.Errorf("gated mutations must not change the store, %d jobs", len(jobs))
+	}
+}
+
+func TestTelegramSchedule_HelpAndUnknownAndNilStore(t *testing.T) {
+	st := newTGStore(t)
+	for _, args := range []string{"", "help"} {
+		if reply, _ := telegramScheduleReply(1, args, st, nil, true); !strings.Contains(reply, "Schedule commands") {
+			t.Errorf("%q should return usage, got %q", args, reply)
+		}
+	}
+	if reply, _ := telegramScheduleReply(1, "bogus", st, nil, true); !strings.Contains(reply, "Unknown subcommand") {
+		t.Errorf("unknown subcommand reply: %q", reply)
+	}
+	if reply, _ := telegramScheduleReply(1, "list", nil, nil, true); !strings.Contains(reply, "unavailable") {
+		t.Errorf("nil store should report unavailable, got %q", reply)
+	}
+}
diff --git a/cmd/odek/telegram.go b/cmd/odek/telegram.go
index e0cc266..b3e5d2b 100644
--- a/cmd/odek/telegram.go
+++ b/cmd/odek/telegram.go
@@ -22,6 +22,7 @@ import (
 	"github.com/BackendStack21/odek/internal/llm"
 	"github.com/BackendStack21/odek/internal/loop"
 	"github.com/BackendStack21/odek/internal/render"
+	"github.com/BackendStack21/odek/internal/schedule"
 	"github.com/BackendStack21/odek/internal/session"
 	"github.com/BackendStack21/odek/internal/skills"
 	"github.com/BackendStack21/odek/internal/telegram"
@@ -216,12 +217,39 @@ func telegramCmd(args []string) error {
 		return "", nil
 	}
 
+	// Shared schedule store for the in-chat /schedule commands. Created once and
+	// also handed to the embedded scheduler below so both sides agree. A nil
+	// store (rare init failure) degrades to a friendly error from the commands.
+	scheduleStore, err := schedule.NewStore()
+	if err != nil {
+		handlerLog.Warn("schedule: store unavailable; /schedule commands degraded", "error", err)
+		scheduleStore = nil
+	}
+
 	handler.OnCommand = func(chatID int64, messageID int, cmdName string, argsStr string) (string, error) {
 		cmd := telegram.FindCommand(cmdName)
 		if cmd == nil {
 			return fmt.Sprintf("Unknown command: /%s", cmdName), nil
 		}
 
+		// Handle /schedules and /schedule — manage scheduled tasks. scheduleReloadRef
+		// is set by the embedded scheduler (nil if none runs here); the parsing and
+		// formatting live in schedule_telegram.go. A /schedule run dispatches the
+		// job's task through the normal chat pipeline.
+		if cmdName == "schedules" || cmdName == "schedule" {
+			sub := argsStr
+			if cmdName == "schedules" {
+				sub = "list"
+			}
+			reply, runTask := telegramScheduleReply(chatID, sub, scheduleStore,
+				scheduleReloadRef, resolved.Schedules.AllowTelegramManagement)
+			if runTask != "" {
+				go handleChatMessage(chatID, messageID, runTask, bot, handler, sessionManager,
+					resolved, systemMessage, handlerLog)
+			}
+			return reply, nil
+		}
+
 		// Handle /restart — return confirmation message, then signal SIGHUP.
 		// The message is sent through the standard response pipeline (MarkdownV2 +
 		// retry logic). SIGHUP fires asynchronously after a short delay so the
@@ -628,7 +656,7 @@ func telegramCmd(args []string) error {
 	// process's resolved config — so no environment-inheritance problem and no
 	// separate cron daemon. If an external `odek schedule daemon` already holds
 	// the lock, this defers to it instead of double-firing.
-	stopScheduler := startSchedulerForBot(ctx, bot, resolved, systemMessage, handlerLog)
+	stopScheduler := startSchedulerForBot(ctx, bot, resolved, systemMessage, handlerLog, scheduleStore)
 	defer stopScheduler()
 
 	// 17. Process updates until the channel is closed (ctx cancelled).
diff --git a/docker/.env.example b/docker/.env.example
index 011fb68..dc561e4 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -52,9 +52,11 @@ GIT_COMMITTER_EMAIL=you@example.com
 # ODEK_TELEGRAM_HEALTH_ADDR=0.0.0.0:9090      # optional GET /health endpoint
 
 # ── Scheduled tasks (native cron; see docs/SCHEDULES.md) ─────────────────
-# The Telegram bot runs the scheduler in-process. Manage jobs with
-# `odek schedule add|list|...`; they persist in ./.odek/schedules.json.
+# The Telegram bot runs the scheduler in-process. Manage jobs from the chat
+# with /schedules and /schedule add|rm|enable|disable|run|next, or via
+# `odek schedule …` on the host; they persist in ./.odek/schedules.json.
 # ODEK_SCHEDULES_ENABLED=true                 # set false to disable the embedded scheduler
 # ODEK_SCHEDULES_MAX_CONCURRENT=2             # max jobs running at once
 # ODEK_SCHEDULES_TIMEZONE=UTC                 # default tz for jobs without their own
 # ODEK_SCHEDULES_CATCHUP=false                # run a missed fire once on startup
+# ODEK_SCHEDULES_ALLOW_TELEGRAM_MANAGEMENT=true  # set false to make /schedule read-only (CLI-manages)
diff --git a/docker/README.md b/docker/README.md
index 7ec948b..2202143 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -108,17 +108,26 @@ Full guide: [../docs/SCHEDULES.md](../docs/SCHEDULES.md).
 
 1. In `.env`, set **`ODEK_TELEGRAM_DEFAULT_CHAT_ID`** — the chat reminders are sent to
    (usually your own ID, the same as `ODEK_TELEGRAM_ALLOWED_CHATS`).
-2. Add a job. Either run the CLI inside the container:
+2. Add a job. The easiest way is **from the chat itself** — message the bot:
+
+   ```text
+   /schedule add 0 9 * * 1-5 Stand-up in 15 minutes
+   ```
+
+   Jobs added this way deliver back to that chat by default. Use `/schedules`
+   to list and `/schedule rm|enable|disable|run|next` to manage them. To keep
+   management host-only, set `ODEK_SCHEDULES_ALLOW_TELEGRAM_MANAGEMENT=false`
+   (the chat can still list and preview).
+
+   You can also run the CLI inside the container, or edit
+   `./.odek/schedules.json` on the host directly — jobs persist in the `./.odek`
+   volume and the running bot picks up changes automatically:
 
    ```bash
    docker compose --profile telegram-restricted exec odek-telegram-restricted \
      odek schedule add --cron "0 9 * * 1-5" --deliver telegram "Stand-up in 15 minutes"
    ```
 
-   …or edit `./.odek/schedules.json` on the host directly. Jobs persist in the
-   `./.odek` volume and the running bot picks up changes automatically.
-3. Inspect with `odek schedule list` / `odek schedule next <id>`.
-
 Don't run a separate `odek schedule daemon` against the same `./.odek` while the
 bot is up — a shared lock prevents double-firing, but the daemon will refuse to
 start (non-zero exit, "another schedule daemon is already running") when the bot
diff --git a/docs/CONFIG.md b/docs/CONFIG.md
index 4e9383b..ec4d8ff 100644
--- a/docs/CONFIG.md
+++ b/docs/CONFIG.md
@@ -337,7 +337,8 @@ engine. Every field has an `ODEK_SCHEDULES_*` environment override.
     "enabled": true,
     "max_concurrent": 2,
     "timezone": "UTC",
-    "catchup": false
+    "catchup": false,
+    "allow_telegram_management": true
   }
 }
 ```
@@ -348,6 +349,7 @@ engine. Every field has an `ODEK_SCHEDULES_*` environment override.
 | `max_concurrent` | `ODEK_SCHEDULES_MAX_CONCURRENT` | `2` | Maximum scheduled jobs running at once. |
 | `timezone` | `ODEK_SCHEDULES_TIMEZONE` | `UTC` | Default timezone for jobs that don't set their own `--tz`. |
 | `catchup` | `ODEK_SCHEDULES_CATCHUP` | `false` | Global default for the missed-run policy: run a missed fire once on startup. |
+| `allow_telegram_management` | `ODEK_SCHEDULES_ALLOW_TELEGRAM_MANAGEMENT` | `true` | Allow the Telegram `/schedule` commands to create/remove/toggle/run jobs. When false, the bot still lists and previews jobs but mutations must go through `odek schedule`. |
 
 Full guide: [docs/SCHEDULES.md](SCHEDULES.md).
 
diff --git a/docs/SCHEDULES.md b/docs/SCHEDULES.md
index 08cba83..da6522d 100644
--- a/docs/SCHEDULES.md
+++ b/docs/SCHEDULES.md
@@ -67,6 +67,55 @@ scheduler picks up edits to the definitions file automatically (no restart).
 
 ---
 
+## Managing from Telegram
+
+When you run `odek telegram`, the same jobs can be managed from inside the chat
+— no shell access needed. Two slash commands mirror the CLI:
+
+```text
+/schedules                                  List jobs (id, on/off, cron, next fire, last status)
+/schedule add <cron> <task> [| opts]        Add a job (delivered to this chat by default)
+/schedule view <id>                         Show a job's full detail + recent status
+/schedule next <id|cron>                    Preview the next few fire times
+/schedule run <id>                          Run a job once now, in this chat
+/schedule enable | disable <id>             Toggle a job
+/schedule rm <id>                           Remove a job
+```
+
+Because a cron expression has a fixed shape, `add` needs no quoting: an
+`@macro` is one token and a classic expression is exactly **five** fields; the
+rest of the line is the task. Options come after a literal `|`:
+
+```text
+/schedule add 0 9 * * 1-5 Summarize my unread email
+/schedule add @daily Daily standup digest | tz=Europe/Berlin name=standup
+/schedule add */15 9-17 * * 1-5 Check the build | deliver=telegram catchup
+```
+
+| Option (after `|`) | Meaning |
+|---|---|
+| `deliver=<dest>` | `stdout`, `log`, `telegram`, or `telegram:<chatID>`. **Default: this chat.** |
+| `tz=<IANA>` | Per-job timezone, e.g. `Europe/Berlin` |
+| `name=<label>` | Human label (single token; default: first words of the task) |
+| `catchup` | Run a missed fire once on startup |
+| `disabled` | Add without enabling |
+
+Notes:
+
+- **Delivery defaults to the current chat** (unlike the CLI, which defaults to
+  `stdout`) — adding a job from a conversation sends its results back there.
+- `/schedule run` executes the task **now, in this chat**, through the normal
+  agent pipeline (you see progress and answer any approval prompts) — a safe way
+  to test a job. It does not change the job's own schedule or delivery.
+- Edits made from Telegram take effect **immediately** (the embedded scheduler
+  reconciles on the spot, not on the ~30 s poll).
+- Only chats/users on the bot's allowlist (`ODEK_TELEGRAM_ALLOWED_CHATS` /
+  `ALLOWED_USERS`) reach these commands. To keep schedule **management**
+  CLI-only while still allowing in-chat listing/preview, set
+  `schedules.allow_telegram_management = false` (read-only verbs still work).
+
+---
+
 ## Cron syntax
 
 Standard 5-field Vixie cron:
@@ -140,7 +189,8 @@ the engine. Every field also has an `ODEK_SCHEDULES_*` environment override.
     "enabled": true,
     "max_concurrent": 2,
     "timezone": "UTC",
-    "catchup": false
+    "catchup": false,
+    "allow_telegram_management": true
   }
 }
 ```
@@ -151,6 +201,7 @@ the engine. Every field also has an `ODEK_SCHEDULES_*` environment override.
 | `max_concurrent` | `ODEK_SCHEDULES_MAX_CONCURRENT` | `2` | Max jobs running at once |
 | `timezone` | `ODEK_SCHEDULES_TIMEZONE` | `UTC` | Default timezone for jobs without `--tz` |
 | `catchup` | `ODEK_SCHEDULES_CATCHUP` | `false` | Global default for the missed-run policy |
+| `allow_telegram_management` | `ODEK_SCHEDULES_ALLOW_TELEGRAM_MANAGEMENT` | `true` | Allow the in-chat `/schedule` commands to add/remove/toggle/run jobs (read-only listing always works) |
 
 ---
 
diff --git a/docs/TELEGRAM.md b/docs/TELEGRAM.md
index 27ecdad..3720fb4 100644
--- a/docs/TELEGRAM.md
+++ b/docs/TELEGRAM.md
@@ -209,6 +209,8 @@ The handler uses `sync.Map` for `TelegramApprover` instances, keyed by `chatID`.
 | `/sessions` | List recent conversation sessions |
 | `/resume <session_id>` | Resume a previous session by ID |
 | `/prune [days]` | Clean up old sessions (default: 30 days) |
+| `/schedules` | List scheduled tasks (id, on/off, cron, next fire, last status) |
+| `/schedule <subcommand>` | Manage scheduled tasks — `add`, `rm`, `enable`, `disable`, `run`, `next`, `view`. See [Managing schedules from Telegram](SCHEDULES.md#managing-from-telegram) |
 
 ### Architecture
 
diff --git a/internal/config/loader.go b/internal/config/loader.go
index 899ebc0..4194325 100644
--- a/internal/config/loader.go
+++ b/internal/config/loader.go
@@ -609,6 +609,9 @@ func LoadConfig(cli CLIFlags) ResolvedConfig {
 	if v := envBool("SCHEDULES_CATCHUP"); v != nil {
 		cfg.Schedules.Catchup = v
 	}
+	if v := envBool("SCHEDULES_ALLOW_TELEGRAM_MANAGEMENT"); v != nil {
+		cfg.Schedules.AllowTelegramManagement = v
+	}
 
 	// Telegram env overrides: merge env vars on top of file config.
 	baseTelegram := telegram.DefaultConfig()
@@ -966,23 +969,29 @@ type SchedulesConfig struct {
 	MaxConcurrent int    `json:"max_concurrent,omitempty"` // max jobs running at once (default 2)
 	Timezone      string `json:"timezone,omitempty"`       // default timezone for jobs with none (default UTC)
 	Catchup       *bool  `json:"catchup,omitempty"`        // global default: run a missed fire once on startup (default false)
+	// AllowTelegramManagement gates the in-chat `/schedule` management commands.
+	// When false, the Telegram bot still lists/previews jobs but refuses to
+	// add/remove/enable/disable/run them — manage from the host CLI instead.
+	AllowTelegramManagement *bool `json:"allow_telegram_management,omitempty"` // default true
 }
 
 // ScheduleConfig is the resolved scheduler config (all fields concrete).
 type ScheduleConfig struct {
-	Enabled       bool
-	MaxConcurrent int
-	Timezone      string
-	Catchup       bool
+	Enabled                 bool
+	MaxConcurrent           int
+	Timezone                string
+	Catchup                 bool
+	AllowTelegramManagement bool
 }
 
 // resolveSchedules merges file-level scheduler config with defaults.
 func resolveSchedules(cfg *SchedulesConfig) ScheduleConfig {
 	out := ScheduleConfig{
-		Enabled:       true,
-		MaxConcurrent: 2,
-		Timezone:      "UTC",
-		Catchup:       false,
+		Enabled:                 true,
+		MaxConcurrent:           2,
+		Timezone:                "UTC",
+		Catchup:                 false,
+		AllowTelegramManagement: true,
 	}
 	if cfg == nil {
 		return out
@@ -999,6 +1008,9 @@ func resolveSchedules(cfg *SchedulesConfig) ScheduleConfig {
 	if cfg.Catchup != nil {
 		out.Catchup = *cfg.Catchup
 	}
+	if cfg.AllowTelegramManagement != nil {
+		out.AllowTelegramManagement = *cfg.AllowTelegramManagement
+	}
 	return out
 }
 
diff --git a/internal/config/schedules_test.go b/internal/config/schedules_test.go
index 6f1db70..a0b2032 100644
--- a/internal/config/schedules_test.go
+++ b/internal/config/schedules_test.go
@@ -16,6 +16,29 @@ func TestResolveSchedules_Defaults(t *testing.T) {
 	if got.Catchup {
 		t.Error("Catchup should default to false")
 	}
+	if !got.AllowTelegramManagement {
+		t.Error("AllowTelegramManagement should default to true")
+	}
+}
+
+func TestResolveSchedules_AllowTelegramManagementOverride(t *testing.T) {
+	got := resolveSchedules(&SchedulesConfig{AllowTelegramManagement: boolPtr(false)})
+	if got.AllowTelegramManagement {
+		t.Error("AllowTelegramManagement should be overridable to false")
+	}
+	// Unrelated defaults are preserved.
+	if !got.Enabled || got.MaxConcurrent != 2 {
+		t.Errorf("override disturbed defaults: %+v", got)
+	}
+}
+
+func TestLoadConfig_SchedulesAllowTelegramManagementEnv(t *testing.T) {
+	t.Setenv("HOME", t.TempDir())
+	t.Setenv("ODEK_SCHEDULES_ALLOW_TELEGRAM_MANAGEMENT", "false")
+	cfg := LoadConfig(CLIFlags{})
+	if cfg.Schedules.AllowTelegramManagement {
+		t.Error("ODEK_SCHEDULES_ALLOW_TELEGRAM_MANAGEMENT=false should disable in-chat management")
+	}
 }
 
 func TestResolveSchedules_Overrides(t *testing.T) {
diff --git a/internal/schedule/coverage_test.go b/internal/schedule/coverage_test.go
index fc5e03c..05157ff 100644
--- a/internal/schedule/coverage_test.go
+++ b/internal/schedule/coverage_test.go
@@ -427,6 +427,42 @@ func TestPreview_Truncates(t *testing.T) {
 	}
 }
 
+func TestRun_ReloadTrigger(t *testing.T) {
+	st := newTestStore(t)
+	runner := &fakeRunner{result: "ok", started: make(chan string, 1)}
+	// Long reload poll so the only way the new job fires promptly is via Reload().
+	s := New(st, runner, &fakeDeliverer{}, Options{ReloadEvery: time.Hour})
+
+	ctx, cancel := context.WithCancel(context.Background())
+	done := make(chan error, 1)
+	go func() { done <- s.Run(ctx) }()
+	time.Sleep(40 * time.Millisecond) // let the initial reconcile run (empty store)
+
+	job := Job{ID: "jb-reload", Name: "r", Cron: "0 0 1 1 *", Task: "x",
+		Deliver: Delivery{Kind: DeliverStdout}, Enabled: true, Catchup: true}
+	past := time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC)
+	if err := st.SaveState(RunState{JobID: job.ID, NextRun: past, Sig: jobSig(job)}); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := st.Add(job); err != nil {
+		t.Fatal(err)
+	}
+	s.Reload() // force an immediate reconcile instead of waiting an hour
+
+	select {
+	case <-runner.started:
+	case <-time.After(3 * time.Second):
+		cancel()
+		t.Fatal("Reload() did not trigger a reconcile + catchup fire")
+	}
+	cancel()
+	<-done
+
+	// A Reload with no active Run loop must not block (buffered, coalescing).
+	s.Reload()
+	s.Reload()
+}
+
 func TestRun_ReloadsOnFileChange(t *testing.T) {
 	st := newTestStore(t)
 	runner := &fakeRunner{result: "ok", started: make(chan string, 1)}
diff --git a/internal/schedule/scheduler.go b/internal/schedule/scheduler.go
index 8eba077..930c8b8 100644
--- a/internal/schedule/scheduler.go
+++ b/internal/schedule/scheduler.go
@@ -75,6 +75,8 @@ type Scheduler struct {
 
 	sem chan struct{}  // bounds concurrent executions
 	wg  sync.WaitGroup // tracks in-flight executions for graceful drain
+
+	reloadCh chan struct{} // manual reconcile trigger (Reload); buffered, coalescing
 }
 
 // New builds a Scheduler. The store, runner, and deliverer are required.
@@ -110,6 +112,19 @@ func New(store *Store, runner Runner, deliverer Deliverer, opts Options) *Schedu
 		runs:      map[string]int{},
 		running:   map[string]bool{},
 		sem:       make(chan struct{}, opts.MaxConcurrent),
+		reloadCh:  make(chan struct{}, 1),
+	}
+}
+
+// Reload asks a running Run loop to re-read job definitions immediately instead
+// of waiting for the next mtime poll — used after an out-of-band edit (e.g. the
+// Telegram `/schedule` commands) so changes take effect at once. Safe to call
+// from any goroutine; if a reload is already pending it coalesces, and if Run
+// isn't active the buffered signal is consumed on the next loop iteration.
+func (s *Scheduler) Reload() {
+	select {
+	case s.reloadCh <- struct{}{}:
+	default:
 	}
 }
 
@@ -143,6 +158,13 @@ func (s *Scheduler) Run(ctx context.Context) error {
 				s.log.Info("scheduler: schedules changed, reloading")
 				s.reconcile(s.opts.Now())
 			}
+		case <-s.reloadCh:
+			// Explicit Reload() — reconcile now and resync lastMod so the mtime
+			// poll doesn't redundantly reconcile the same write on its next tick.
+			timer.Stop()
+			lastMod = s.store.ModTime()
+			s.log.Info("scheduler: manual reload")
+			s.reconcile(s.opts.Now())
 		}
 	}
 }
diff --git a/internal/telegram/commands.go b/internal/telegram/commands.go
index e00da97..66b23e2 100644
--- a/internal/telegram/commands.go
+++ b/internal/telegram/commands.go
@@ -95,6 +95,16 @@ func init() {
 			Description: "Resume the most recent plan",
 			Handler:     planResumeHandler,
 		},
+		{
+			Command:     "schedules",
+			Description: "List scheduled tasks",
+			Handler:     schedulesHandler,
+		},
+		{
+			Command:     "schedule",
+			Description: "Manage scheduled tasks (add, rm, enable, disable, run, next, view)",
+			Handler:     scheduleHandler,
+		},
 	}
 }
 
@@ -108,6 +118,8 @@ func startHandler(args string) (string, error) {
 		"/sessions — List recent sessions\n" +
 		"/resume <id> — Resume a previous session\n" +
 		"/prune [days] — Clean up old sessions\n" +
+		"/schedules — List scheduled tasks\n" +
+		"/schedule add <cron> <task> — Schedule a recurring task\n" +
 		"/stop — Cancel running task\n\n" +
 		"Send me a message and I will help!", nil
 }
@@ -222,6 +234,12 @@ func planDeleteHandler(args string) (string, error) {
 
 func planResumeHandler(args string) (string, error) { return "", nil }
 
+// Schedule command handlers are intercepted in the bot's OnCommand callback
+// (they need the chat ID and the schedule store), so the descriptor handlers
+// are no-ops — like /sessions and /resume.
+func schedulesHandler(args string) (string, error) { return "", nil }
+func scheduleHandler(args string) (string, error)  { return "", nil }
+
 // FindCommand returns the command descriptor with the matching name, or nil.
 func FindCommand(name string) *CommandDescriptor {
 	for i := range DefaultCommands {
diff --git a/internal/telegram/commands_test.go b/internal/telegram/commands_test.go
index 418011d..3860b4e 100644
--- a/internal/telegram/commands_test.go
+++ b/internal/telegram/commands_test.go
@@ -193,6 +193,7 @@ func TestAllHandlers_ReturnNoError(t *testing.T) {
 	inlineOnly := map[string]bool{
 		"sessions": true, "resume": true, "prune": true,
 		"plan": true, "plan_resume": true,
+		"schedule": true, "schedules": true,
 	}
 
 	for _, cmd := range DefaultCommands {

From b20b9497e7f5cecdc615f0bf54f0bca90141b6cb Mon Sep 17 00:00:00 2001
From: Rolando Santamaria Maso <kyberneees@gmail.com>
Date: Fri, 5 Jun 2026 23:01:47 +0200
Subject: [PATCH 11/11] fix(schedule): format scheduled results for Telegram
 MarkdownV2

Scheduled task results were sent raw, so odek markdown like **bold**
arrived as literal asterisks. Route both the CLI and embedded-bot
deliverers through sendTelegramResult, which mirrors the live bot's
SendResponse pipeline: convert to MarkdownV2, chunk via FormatResponse,
and retry each chunk as plain text if Telegram rejects the formatting.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 cmd/odek/schedule.go                    |  38 ++++-
 cmd/odek/schedule_telegram_send_test.go | 213 ++++++++++++++++++++++++
 2 files changed, 247 insertions(+), 4 deletions(-)
 create mode 100644 cmd/odek/schedule_telegram_send_test.go

diff --git a/cmd/odek/schedule.go b/cmd/odek/schedule.go
index 446349e..0e21111 100644
--- a/cmd/odek/schedule.go
+++ b/cmd/odek/schedule.go
@@ -378,8 +378,39 @@ func (d cliDeliverer) deliverTelegram(ctx context.Context, job schedule.Job, res
 		return fmt.Errorf("no chat id (set the job's telegram:<chatID> or telegram.default_chat_id)")
 	}
 	bot := telegram.NewBot(d.resolved.Telegram.Token)
-	_, err := bot.SendMessageContext(ctx, chatID, result, nil)
-	return err
+	return sendTelegramResult(ctx, bot, chatID, result)
+}
+
+// sendTelegramResult delivers a scheduled task's result to Telegram, mirroring
+// the live bot's Handler.SendResponse pipeline: the result (odek markdown) is
+// converted to Telegram MarkdownV2 and chunked via FormatResponse, then each
+// chunk is sent with MarkdownV2 and retried as plain text if Telegram rejects
+// the formatting. Without this the raw "**bold**" markdown is delivered as
+// literal asterisks. Uses the context-aware send so a stuck delivery doesn't
+// block the scheduler's graceful shutdown.
+func sendTelegramResult(ctx context.Context, bot *telegram.Bot, chatID int64, result string) error {
+	chunks, err := telegram.FormatResponse(result)
+	if err != nil {
+		return fmt.Errorf("format response: %w", err)
+	}
+	for _, chunk := range chunks {
+		if chunk == "" {
+			continue
+		}
+		_, err := bot.SendMessageContext(ctx, chatID, chunk, &telegram.SendOpts{ParseMode: telegram.ParseModeMarkdownV2})
+		if err == nil {
+			continue
+		}
+		// Retry as plain text — covers MarkdownV2 parse errors and other
+		// transient failures — but give up if the context was cancelled.
+		if ctx.Err() != nil {
+			return err
+		}
+		if _, perr := bot.SendMessageContext(ctx, chatID, chunk, nil); perr != nil {
+			return perr
+		}
+	}
+	return nil
 }
 
 // ── embedded scheduler (inside `odek telegram`) ─────────────────────────
@@ -447,8 +478,7 @@ func (d telegramDeliverer) Deliver(ctx context.Context, job schedule.Job, result
 	if chatID == 0 {
 		return fmt.Errorf("no chat id (set the job's telegram:<chatID> or telegram.default_chat_id)")
 	}
-	_, err := d.bot.SendMessageContext(ctx, chatID, result, nil)
-	return err
+	return sendTelegramResult(ctx, d.bot, chatID, result)
 }
 
 // startSchedulerForBot starts the embedded scheduler unless an external
diff --git a/cmd/odek/schedule_telegram_send_test.go b/cmd/odek/schedule_telegram_send_test.go
new file mode 100644
index 0000000..3d3bb13
--- /dev/null
+++ b/cmd/odek/schedule_telegram_send_test.go
@@ -0,0 +1,213 @@
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"sync"
+	"testing"
+
+	"github.com/BackendStack21/odek/internal/telegram"
+)
+
+// capturedSend records one sendMessage request the test server received.
+type capturedSend struct {
+	text      string
+	parseMode string
+}
+
+// sendRecorder is a Telegram mock server that records every sendMessage call
+// and lets the test decide, per call index, whether to succeed or fail.
+type sendRecorder struct {
+	mu    sync.Mutex
+	calls []capturedSend
+	// reply is consulted for each call (0-based index). Returning ok=false
+	// makes the server respond with a 400 "can't parse entities" error, which
+	// the Bot treats as a non-retryable client error.
+	reply func(index int) (ok bool)
+}
+
+func (s *sendRecorder) handler(w http.ResponseWriter, r *http.Request) {
+	body, _ := io.ReadAll(r.Body)
+	var payload struct {
+		Text      string `json:"text"`
+		ParseMode string `json:"parse_mode"`
+	}
+	_ = json.Unmarshal(body, &payload)
+
+	s.mu.Lock()
+	idx := len(s.calls)
+	s.calls = append(s.calls, capturedSend{text: payload.Text, parseMode: payload.ParseMode})
+	s.mu.Unlock()
+
+	ok := true
+	if s.reply != nil {
+		ok = s.reply(idx)
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	if !ok {
+		// Telegram always returns HTTP 200 even on API errors.
+		_ = json.NewEncoder(w).Encode(map[string]any{
+			"ok":          false,
+			"error_code":  400,
+			"description": "Bad Request: can't parse entities",
+		})
+		return
+	}
+	_ = json.NewEncoder(w).Encode(map[string]any{
+		"ok":     true,
+		"result": map[string]any{"message_id": 1, "chat": map[string]any{"id": 1}},
+	})
+}
+
+func (s *sendRecorder) snapshot() []capturedSend {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	out := make([]capturedSend, len(s.calls))
+	copy(out, s.calls)
+	return out
+}
+
+// newRecorderBot spins up a mock server and a Bot pointed at it.
+func newRecorderBot(t *testing.T, rec *sendRecorder) *telegram.Bot {
+	t.Helper()
+	ts := httptest.NewServer(http.HandlerFunc(rec.handler))
+	t.Cleanup(ts.Close)
+	bot := telegram.NewBot("testtoken")
+	bot.BaseURL = ts.URL
+	return bot
+}
+
+// A scheduled task's result is odek markdown; sendTelegramResult must convert
+// it to Telegram MarkdownV2 (italic `*x*` → `_x_`, reserved chars escaped) and
+// set parse_mode, instead of shipping the raw text as plain.
+func TestSendTelegramResult_ConvertsAndSetsParseMode(t *testing.T) {
+	rec := &sendRecorder{}
+	bot := newRecorderBot(t, rec)
+
+	result := "The temperature in **Berlin** is *mild* at +20°C."
+	if err := sendTelegramResult(context.Background(), bot, 555, result); err != nil {
+		t.Fatalf("sendTelegramResult: %v", err)
+	}
+
+	calls := rec.snapshot()
+	if len(calls) != 1 {
+		t.Fatalf("want 1 send, got %d", len(calls))
+	}
+	got := calls[0]
+	if got.parseMode != telegram.ParseModeMarkdownV2 {
+		t.Errorf("parse_mode = %q, want %q", got.parseMode, telegram.ParseModeMarkdownV2)
+	}
+	// Italic single-asterisk is converted to underscore form.
+	if !strings.Contains(got.text, "_mild_") {
+		t.Errorf("italic not converted, text = %q", got.text)
+	}
+	// The reserved '+' is escaped so MarkdownV2 won't reject the message.
+	if !strings.Contains(got.text, `\+20`) {
+		t.Errorf("reserved '+' not escaped, text = %q", got.text)
+	}
+	// The text must differ from the raw input (i.e. it was actually formatted).
+	if got.text == result {
+		t.Errorf("text was sent unformatted: %q", got.text)
+	}
+}
+
+// If Telegram rejects the MarkdownV2 formatting, the message is re-sent as plain
+// text (no parse_mode) so the user still receives the content.
+func TestSendTelegramResult_FallsBackToPlainText(t *testing.T) {
+	rec := &sendRecorder{
+		reply: func(index int) bool { return index != 0 }, // first (MarkdownV2) fails
+	}
+	bot := newRecorderBot(t, rec)
+
+	if err := sendTelegramResult(context.Background(), bot, 7, "**hi** there"); err != nil {
+		t.Fatalf("sendTelegramResult should recover via plain text: %v", err)
+	}
+
+	calls := rec.snapshot()
+	if len(calls) != 2 {
+		t.Fatalf("want 2 sends (markdown + plain retry), got %d", len(calls))
+	}
+	if calls[0].parseMode != telegram.ParseModeMarkdownV2 {
+		t.Errorf("first send parse_mode = %q, want MarkdownV2", calls[0].parseMode)
+	}
+	if calls[1].parseMode != "" {
+		t.Errorf("retry should be plain text, got parse_mode = %q", calls[1].parseMode)
+	}
+}
+
+// When both the MarkdownV2 send and the plain-text retry fail, the error is
+// surfaced to the scheduler (which records it as a failed run).
+func TestSendTelegramResult_ReturnsErrorWhenBothFail(t *testing.T) {
+	rec := &sendRecorder{
+		reply: func(int) bool { return false }, // every send fails
+	}
+	bot := newRecorderBot(t, rec)
+
+	err := sendTelegramResult(context.Background(), bot, 7, "**boom**")
+	if err == nil {
+		t.Fatal("expected an error when both sends fail")
+	}
+	if n := len(rec.snapshot()); n != 2 {
+		t.Errorf("want 2 attempts, got %d", n)
+	}
+}
+
+// A result larger than Telegram's 4096-byte limit is split into multiple chunks,
+// each sent as its own message.
+func TestSendTelegramResult_ChunksLargeResult(t *testing.T) {
+	rec := &sendRecorder{}
+	bot := newRecorderBot(t, rec)
+
+	// Two paragraphs, each ~3000 bytes, separated by a blank line so they split
+	// at the paragraph boundary into two chunks under the 4096 limit.
+	para := strings.Repeat("word ", 600)
+	result := para + "\n\n" + para
+	if err := sendTelegramResult(context.Background(), bot, 1, result); err != nil {
+		t.Fatalf("sendTelegramResult: %v", err)
+	}
+	if n := len(rec.snapshot()); n != 2 {
+		t.Fatalf("want 2 chunked sends, got %d", n)
+	}
+}
+
+// Empty/whitespace-only chunks are skipped (no empty messages are sent).
+func TestSendTelegramResult_SkipsEmpty(t *testing.T) {
+	rec := &sendRecorder{}
+	bot := newRecorderBot(t, rec)
+
+	if err := sendTelegramResult(context.Background(), bot, 1, ""); err != nil {
+		t.Fatalf("sendTelegramResult: %v", err)
+	}
+	if n := len(rec.snapshot()); n != 0 {
+		t.Errorf("empty result should send nothing, got %d sends", n)
+	}
+}
+
+// If the context is cancelled, a failed MarkdownV2 send must NOT trigger a
+// plain-text retry — the scheduler is shutting down and the error propagates.
+func TestSendTelegramResult_NoRetryAfterCancel(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	rec := &sendRecorder{
+		reply: func(index int) bool {
+			if index == 0 {
+				cancel() // cancel before the fallback decision
+				return false
+			}
+			return true
+		},
+	}
+	bot := newRecorderBot(t, rec)
+
+	err := sendTelegramResult(ctx, bot, 7, "**hi**")
+	if err == nil {
+		t.Fatal("expected an error when the context is cancelled")
+	}
+	if n := len(rec.snapshot()); n != 1 {
+		t.Errorf("cancelled context must skip the plain-text retry, got %d sends", n)
+	}
+}