Skip to content

Commit 410f930

Browse files
authored
Reduce CI cost (#19919)
Currently every push to main runs all macOS jobs unconditionally. This PR path-filters macOS jobs on push as well as PR, but samples 25% of push commits (deterministic by SHA hash) to still run the full suite for HUD/bisection signal. A new viable-strict-gate workflow fails on the 75% non-sampled commits and is added to viable/strict's requires list, so viable/strict only advances on commits where every job ran. Estimated ~75% macOS runner savings. CI behavior changes: * macOS jobs in pull.yml / trunk.yml now skip on pushes that don't touch their paths and aren't in the sample * A new viable-strict-gate workflow runs on every push to main/release/* and fails when the commit isn't a sampled full-run. * update-viablestrict now requires the gate workflow → viable/strict advances ~every 4 commits instead of every commit. * Maintainers can force a full run on any main/release commit by running the new "Promote commit to viable/strict" workflow from the Actions tab * Sampling rule lives in one place: _ci-run-decision.yml; change the rate or rule there.
1 parent 175dc6a commit 410f930

8 files changed

Lines changed: 472 additions & 44 deletions

File tree

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
name: CI Run Decision
2+
3+
# Single source of truth for "should this commit force-run all CI jobs
4+
# regardless of path filter?". Used by per-job ``if:`` gates in pull.yml
5+
# and trunk.yml so the sampling logic isn't repeated per job.
6+
#
7+
# Returns ``is-full-run = 'true'`` for:
8+
# - workflow_dispatch (manual run)
9+
# - ciflow/* tag pushes (maintainer-forced full run)
10+
# - push events at every 4th commit by depth from main's root
11+
# (deterministic 25% sample, hard cap of 4 commits between samples)
12+
#
13+
# Returns ``is-full-run = 'false'`` for:
14+
# - pull_request / pull_request_target (use path filter instead)
15+
# - push events not matching any of the above (path-filtered runs)
16+
#
17+
# See ``viable-strict-gate.yml``: viable/strict only advances on
18+
# commits where this is true, so the path-filtered fast path doesn't
19+
# silently advance partial signal.
20+
21+
on:
22+
workflow_call:
23+
outputs:
24+
is-full-run:
25+
description: "'true' if this commit should run all CI jobs regardless of path filter; 'false' otherwise."
26+
value: ${{ jobs.decide.outputs.is-full-run }}
27+
28+
permissions:
29+
contents: read
30+
31+
jobs:
32+
decide:
33+
runs-on: ubuntu-latest
34+
outputs:
35+
is-full-run: ${{ steps.compute.outputs.is-full-run }}
36+
steps:
37+
# Full history needed to compute commit depth via
38+
# `git rev-list --first-parent --count`. The --first-parent flag
39+
# follows only the linear main-branch history through merge
40+
# commits, so the count maps 1:1 to pushes on main regardless of
41+
# how many commits were in any merged PR.
42+
- name: Checkout
43+
uses: actions/checkout@v4
44+
with:
45+
fetch-depth: 0
46+
47+
- name: Compute is-full-run
48+
id: compute
49+
env:
50+
EVENT_NAME: ${{ github.event_name }}
51+
REF: ${{ github.ref }}
52+
SHA: ${{ github.sha }}
53+
run: |
54+
set -eu
55+
56+
IS_FULL=false
57+
58+
case "$EVENT_NAME" in
59+
workflow_dispatch)
60+
IS_FULL=true
61+
;;
62+
esac
63+
64+
case "$REF" in
65+
refs/tags/ciflow/*)
66+
IS_FULL=true
67+
;;
68+
esac
69+
70+
# Depth-based 25% sample on push: every 4th commit on the
71+
# linear main-branch history (depth %% 4 == 0). --first-parent
72+
# is required — plain `git rev-list --count` would walk all
73+
# merge parents, so the count would jump by (1 + PR_size) at
74+
# each merge commit and the sample rate would be unpredictable.
75+
# Hard guarantees with --first-parent:
76+
# - Exactly 25% of pushes on main are sampled.
77+
# - At most 3 non-sampled commits between any two samples.
78+
# Re-runs of the same commit always have the same outcome.
79+
if [ "$IS_FULL" = "false" ] && [ "$EVENT_NAME" = "push" ]; then
80+
DEPTH=$(git rev-list --first-parent --count "$SHA")
81+
if [ $((DEPTH % 4)) -eq 0 ]; then
82+
IS_FULL=true
83+
fi
84+
echo "Depth: $DEPTH (first-parent; depth %% 4 = $((DEPTH % 4)))"
85+
fi
86+
87+
echo "Event: $EVENT_NAME"
88+
echo "Ref: $REF"
89+
echo "SHA: $SHA"
90+
echo "is-full-run: $IS_FULL"
91+
echo "is-full-run=$IS_FULL" >> "$GITHUB_OUTPUT"

.github/workflows/_get-changed-files.yml

Lines changed: 64 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,24 @@ name: Get Changed Files
22

33
on:
44
workflow_call:
5+
inputs:
6+
include-push-diff:
7+
description: |
8+
When true, on push events the output is the diff between
9+
`github.event.before` and `github.sha` (computed via the
10+
GitHub Compare API). Default is false: push events emit '*',
11+
matching the historical behavior.
12+
type: boolean
13+
required: false
14+
default: false
515
outputs:
616
changed-files:
7-
description: "List of changed files (space-separated) or '*' if not in a PR"
17+
description: "Space-separated list of changed files for PR events (and push events when include-push-diff=true); '*' otherwise."
818
value: ${{ jobs.get-changed-files.outputs.changed-files }}
919

20+
permissions:
21+
contents: read
22+
1023
jobs:
1124
get-changed-files:
1225
runs-on: ubuntu-latest
@@ -18,26 +31,65 @@ jobs:
1831
id: get-files
1932
env:
2033
GH_TOKEN: ${{ github.token }}
34+
INCLUDE_PUSH_DIFF: ${{ inputs.include-push-diff }}
2135
run: |
22-
# Check if we're in a pull request context
23-
if [ "${{ github.event_name }}" = "pull_request" ] || [ "${{ github.event_name }}" = "pull_request_target" ]; then
24-
echo "Running in PR context"
36+
set -eu
2537
26-
# Get the PR number from the github context
27-
PR_NUMBER="${{ github.event.number }}"
38+
EVENT_NAME="${{ github.event_name }}"
39+
REPO="${{ github.repository }}"
2840
29-
# Use gh CLI to get changed files in the PR with explicit repo
30-
CHANGED_FILES=$(gh api repos/${{ github.repository }}/pulls/$PR_NUMBER/files --paginate --jq '.[] | select(.status != "removed") | .filename' | tr '\n' ' ' | sed 's/ $//')
41+
# PR context: list files modified by the PR.
42+
if [ "$EVENT_NAME" = "pull_request" ] || [ "$EVENT_NAME" = "pull_request_target" ]; then
43+
echo "Running in PR context"
44+
PR_NUMBER="${{ github.event.number }}"
45+
CHANGED_FILES=$(gh api "repos/$REPO/pulls/$PR_NUMBER/files" --paginate \
46+
--jq '.[] | select(.status != "removed") | .filename' | tr '\n' ' ' | sed 's/ $//')
3147
3248
if [ -z "$CHANGED_FILES" ]; then
3349
echo "No changed files found, setting to '*'"
3450
CHANGED_FILES="*"
3551
fi
36-
3752
echo "Changed files: $CHANGED_FILES"
3853
echo "changed-files=$CHANGED_FILES" >> "$GITHUB_OUTPUT"
54+
exit 0
55+
fi
3956
40-
else
41-
echo "Not in PR context, setting changed files to '*'"
42-
echo "changed-files=*" >> "$GITHUB_OUTPUT"
57+
# Push context with opt-in: diff between previous tip and new
58+
# tip via the GitHub Compare API. This is what lets path-
59+
# filtered jobs skip on push commits that don't touch their
60+
# relevant paths. Callers must explicitly request this with
61+
# `include-push-diff: true` because some workflows (e.g.
62+
# lint.yml) historically rely on the '*' value to take a
63+
# broader code path.
64+
if [ "$EVENT_NAME" = "push" ] && [ "$INCLUDE_PUSH_DIFF" = "true" ]; then
65+
BEFORE="${{ github.event.before }}"
66+
AFTER="${{ github.sha }}"
67+
ZERO_SHA="0000000000000000000000000000000000000000"
68+
69+
if [ -z "$BEFORE" ] || [ "$BEFORE" = "$ZERO_SHA" ]; then
70+
echo "No 'before' SHA on push event (tag/branch creation or initial push); setting changed files to '*'"
71+
echo "changed-files=*" >> "$GITHUB_OUTPUT"
72+
exit 0
73+
fi
74+
75+
echo "Running in push context: comparing $BEFORE..$AFTER"
76+
CHANGED_FILES=$(gh api "repos/$REPO/compare/$BEFORE...$AFTER" --paginate \
77+
--jq '.files[]? | select(.status != "removed") | .filename' 2>/dev/null \
78+
| tr '\n' ' ' | sed 's/ $//' || echo "")
79+
80+
if [ -z "$CHANGED_FILES" ]; then
81+
echo "Compare returned empty; setting changed files to '*'"
82+
echo "changed-files=*" >> "$GITHUB_OUTPUT"
83+
exit 0
84+
fi
85+
86+
echo "Changed files: $CHANGED_FILES"
87+
echo "changed-files=$CHANGED_FILES" >> "$GITHUB_OUTPUT"
88+
exit 0
4389
fi
90+
91+
# Default for non-PR events (push without opt-in,
92+
# workflow_dispatch, schedule, etc.): no diff. Emit '*' to
93+
# preserve the historical behavior.
94+
echo "Event '$EVENT_NAME' (or include-push-diff=false): emitting '*'"
95+
echo "changed-files=*" >> "$GITHUB_OUTPUT"

.github/workflows/mlx.yml

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,19 @@ concurrency:
2525
permissions: {}
2626

2727
jobs:
28+
# Emits is-full-run='true' for workflow_dispatch / ciflow tag /
29+
# sampled-push commits (every 4th main/release commit by depth).
30+
# Returns 'false' for pull_request events — PR jobs use the workflow-
31+
# level `paths:` filter (above) for path-based gating instead.
32+
run-decision:
33+
name: CI run decision
34+
uses: ./.github/workflows/_ci-run-decision.yml
35+
2836
test-mlx:
37+
needs: run-decision
38+
if: |
39+
github.event_name == 'pull_request' ||
40+
needs.run-decision.outputs.is-full-run == 'true'
2941
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
3042
with:
3143
default-packages: ""
@@ -93,6 +105,10 @@ jobs:
93105
echo "::endgroup::"
94106
95107
test-mlx-qwen35-moe:
108+
needs: run-decision
109+
if: |
110+
github.event_name == 'pull_request' ||
111+
needs.run-decision.outputs.is-full-run == 'true'
96112
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
97113
with:
98114
default-packages: ""
@@ -145,6 +161,10 @@ jobs:
145161
echo "::endgroup::"
146162
147163
backend-tester:
164+
needs: run-decision
165+
if: |
166+
github.event_name == 'pull_request' ||
167+
needs.run-decision.outputs.is-full-run == 'true'
148168
strategy:
149169
fail-fast: false
150170
matrix:
@@ -191,6 +211,10 @@ jobs:
191211
fi
192212
193213
test-mlx-parakeet:
214+
needs: run-decision
215+
if: |
216+
github.event_name == 'pull_request' ||
217+
needs.run-decision.outputs.is-full-run == 'true'
194218
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
195219
with:
196220
default-packages: ""
@@ -248,7 +272,10 @@ jobs:
248272
# Requires HuggingFace secrets — skip on fork PRs.
249273
# Maintainers can opt-in by applying the ciflow/mlx label, which
250274
# pushes a ciflow/mlx/<PR> tag that re-runs this workflow with secrets.
251-
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
275+
needs: run-decision
276+
if: |
277+
(github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request') &&
278+
(github.event_name == 'pull_request' || needs.run-decision.outputs.is-full-run == 'true')
252279
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
253280
secrets: inherit
254281
with:
@@ -309,7 +336,10 @@ jobs:
309336
test-mlx-voxtral-realtime:
310337
# Requires HuggingFace secrets — skip on fork PRs.
311338
# Maintainers can opt-in by applying the ciflow/mlx label.
312-
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
339+
needs: run-decision
340+
if: |
341+
(github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request') &&
342+
(github.event_name == 'pull_request' || needs.run-decision.outputs.is-full-run == 'true')
313343
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
314344
secrets: inherit
315345
with:
@@ -387,7 +417,10 @@ jobs:
387417
test-mlx-whisper:
388418
# Requires HuggingFace secrets — skip on fork PRs.
389419
# Maintainers can opt-in by applying the ciflow/mlx label.
390-
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
420+
needs: run-decision
421+
if: |
422+
(github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request') &&
423+
(github.event_name == 'pull_request' || needs.run-decision.outputs.is-full-run == 'true')
391424
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
392425
secrets: inherit
393426
with:
@@ -439,6 +472,10 @@ jobs:
439472
440473
441474
test-mlx-stories110m:
475+
needs: run-decision
476+
if: |
477+
github.event_name == 'pull_request' ||
478+
needs.run-decision.outputs.is-full-run == 'true'
442479
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
443480
with:
444481
default-packages: ""
@@ -505,7 +542,10 @@ jobs:
505542
test-mlx-llm:
506543
# Requires HuggingFace secrets — skip on fork PRs.
507544
# Maintainers can opt-in by applying the ciflow/mlx label.
508-
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
545+
needs: run-decision
546+
if: |
547+
(github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request') &&
548+
(github.event_name == 'pull_request' || needs.run-decision.outputs.is-full-run == 'true')
509549
strategy:
510550
fail-fast: false
511551
matrix:

0 commit comments

Comments
 (0)