From 9580f6cb076b300f0b67a62b8d94dbb8f738df66 Mon Sep 17 00:00:00 2001 From: Scott Roy Date: Tue, 2 Jun 2026 10:26:36 -0700 Subject: [PATCH 1/2] up --- .github/workflows/cuda-perf.yml | 53 ++++++++++++++++++- .github/workflows/cuda.yml | 91 ++++++++++++++++++++++++++++++--- 2 files changed, 136 insertions(+), 8 deletions(-) diff --git a/.github/workflows/cuda-perf.yml b/.github/workflows/cuda-perf.yml index ada2fb9e696..7b24dcbbdde 100644 --- a/.github/workflows/cuda-perf.yml +++ b/.github/workflows/cuda-perf.yml @@ -32,8 +32,31 @@ concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} cancel-in-progress: true +permissions: + contents: read + jobs: + changed-files: + name: Get changed files + uses: ./.github/workflows/_get-changed-files.yml + with: + include-push-diff: true + + run-decision: + name: CI run decision + uses: ./.github/workflows/_ci-run-decision.yml + set-parameters: + needs: [changed-files, run-decision] + # Path-filtered: mirrors the workflow-level pull_request `paths:` + # filter so push commits that don't touch perf-relevant paths skip + # this whole workflow on non-sampled commits. Sampling preserves + # perf time-series at every 4th commit (vs every commit pre-PR). + if: | + contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda-perf.yml') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_benchmark.py') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_perf_prompts') || + needs.run-decision.outputs.is-full-run == 'true' runs-on: ubuntu-22.04 outputs: benchmark_configs: ${{ steps.set-parameters.outputs.benchmark_configs }} @@ -145,9 +168,24 @@ jobs: benchmark-cuda: name: benchmark-cuda needs: + - changed-files + - run-decision - set-parameters - export-models - if: always() + # Inherit the gate from set-parameters/export-models (they cascade- + # skip when the gate evaluates false). `always()` keeps benchmark- + # cuda running even when some export-models matrix cells fail — + # but only if the gate itself is open. Without the explicit gate + # here, `always()` would fire benchmark-cuda even when set- + # parameters was gated out. + if: | + always() && + ( + contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda-perf.yml') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_benchmark.py') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_perf_prompts') || + needs.run-decision.outputs.is-full-run == 'true' + ) uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main permissions: id-token: write @@ -316,8 +354,19 @@ jobs: upload-benchmark-results: needs: + - changed-files + - run-decision - benchmark-cuda - if: always() + # Same gate as benchmark-cuda — skip the upload when the gate + # closed (no benchmarks ran). + if: | + always() && + ( + contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda-perf.yml') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_benchmark.py') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_perf_prompts') || + needs.run-decision.outputs.is-full-run == 'true' + ) runs-on: ubuntu-22.04 environment: upload-benchmark-results permissions: diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index f19b937994f..5972d064fc4 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -26,8 +26,30 @@ concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} cancel-in-progress: false +permissions: + contents: read + jobs: + changed-files: + name: Get changed files + uses: ./.github/workflows/_get-changed-files.yml + with: + include-push-diff: true + + run-decision: + name: CI run decision + uses: ./.github/workflows/_ci-run-decision.yml + test-cuda-builds: + needs: [changed-files, run-decision] + # Path-filtered: mirrors the workflow-level pull_request `paths:` + # filter so push commits that don't touch CUDA-relevant paths skip + # this job on non-sampled commits. + if: | + contains(needs.changed-files.outputs.changed-files, 'backends/cuda') || + contains(needs.changed-files.outputs.changed-files, 'backends/aoti') || + contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') || + needs.run-decision.outputs.is-full-run == 'true' strategy: fail-fast: false matrix: @@ -55,9 +77,19 @@ jobs: # This job will fail if any of the CUDA versions fail check-all-cuda-builds: - needs: test-cuda-builds + needs: [changed-files, run-decision, test-cuda-builds] runs-on: ubuntu-latest - if: always() + # Run only if the test-cuda-builds matrix actually ran (i.e. the same + # path/sample gate as test-cuda-builds itself). Otherwise this job + # would fire on every commit and fail because needs.result == 'skipped'. + if: | + always() && + ( + contains(needs.changed-files.outputs.changed-files, 'backends/cuda') || + contains(needs.changed-files.outputs.changed-files, 'backends/aoti') || + contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') || + needs.run-decision.outputs.is-full-run == 'true' + ) steps: - name: Check if all CUDA builds succeeded run: | @@ -71,6 +103,12 @@ jobs: test-models-cuda: name: test-models-cuda + needs: [changed-files, run-decision] + if: | + contains(needs.changed-files.outputs.changed-files, 'backends/cuda') || + contains(needs.changed-files.outputs.changed-files, 'backends/aoti') || + contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') || + needs.run-decision.outputs.is-full-run == 'true' uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main permissions: id-token: write @@ -106,6 +144,12 @@ jobs: unittest-cuda: name: unittest-cuda + needs: [changed-files, run-decision] + if: | + contains(needs.changed-files.outputs.changed-files, 'backends/cuda') || + contains(needs.changed-files.outputs.changed-files, 'backends/aoti') || + contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') || + needs.run-decision.outputs.is-full-run == 'true' uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main permissions: id-token: write @@ -154,8 +198,19 @@ jobs: export-model-cuda-artifact: name: export-model-cuda-artifact - # Skip this job if the pull request is from a fork (HuggingFace secrets are not available) - if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request' + # Skip this job if the pull request is from a fork (HuggingFace secrets are not available). + # Path-filtered on push: mirrors the workflow-level pull_request `paths:` + # filter so push commits that don't touch CUDA-relevant paths skip + # this job on non-sampled commits. + needs: [changed-files, run-decision] + if: | + (github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request') && + ( + contains(needs.changed-files.outputs.changed-files, 'backends/cuda') || + contains(needs.changed-files.outputs.changed-files, 'backends/aoti') || + contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') || + needs.run-decision.outputs.is-full-run == 'true' + ) uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main permissions: id-token: write @@ -300,7 +355,20 @@ jobs: test-model-cuda-e2e: name: test-model-cuda-e2e - needs: export-model-cuda-artifact + # Same path filter as export-model-cuda-artifact above. Also explicitly + # gated on the export job succeeding — when needs: jobs are *skipped* + # (e.g. fork PR), GitHub still evaluates this if:, so without the + # explicit success-check this job would run and then fail trying + # to download an artifact that was never produced. + needs: [changed-files, export-model-cuda-artifact, run-decision] + if: | + needs.export-model-cuda-artifact.result == 'success' && + ( + contains(needs.changed-files.outputs.changed-files, 'backends/cuda') || + contains(needs.changed-files.outputs.changed-files, 'backends/aoti') || + contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') || + needs.run-decision.outputs.is-full-run == 'true' + ) uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main permissions: id-token: write @@ -417,8 +485,19 @@ jobs: test-cuda-pybind: name: test-cuda-pybind - needs: export-model-cuda-artifact # This job downloads models exported by export-model-cuda-artifact and runs them using pybind. + # Same gating as test-model-cuda-e2e — explicit success-check on the + # export job so a skipped export (fork PR, non-sampled push, no path + # match) auto-skips this job too. + needs: [changed-files, export-model-cuda-artifact, run-decision] + if: | + needs.export-model-cuda-artifact.result == 'success' && + ( + contains(needs.changed-files.outputs.changed-files, 'backends/cuda') || + contains(needs.changed-files.outputs.changed-files, 'backends/aoti') || + contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') || + needs.run-decision.outputs.is-full-run == 'true' + ) uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main permissions: id-token: write From 3e19cd74d29ad1eafaeea08b6c5362cef60e339c Mon Sep 17 00:00:00 2001 From: Scott Roy Date: Tue, 2 Jun 2026 10:34:51 -0700 Subject: [PATCH 2/2] up --- .github/workflows/cuda-perf.yml | 8 ++++++++ .github/workflows/cuda.yml | 24 ++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/.github/workflows/cuda-perf.yml b/.github/workflows/cuda-perf.yml index 7b24dcbbdde..1bb9b62be65 100644 --- a/.github/workflows/cuda-perf.yml +++ b/.github/workflows/cuda-perf.yml @@ -12,6 +12,8 @@ on: - .github/workflows/cuda-perf.yml - .ci/scripts/cuda_benchmark.py - .ci/scripts/cuda_perf_prompts/** + - .ci/scripts/export_model_artifact.sh + - .ci/scripts/test_model_e2e.sh workflow_dispatch: inputs: models: @@ -56,6 +58,8 @@ jobs: contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda-perf.yml') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_benchmark.py') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_perf_prompts') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') || needs.run-decision.outputs.is-full-run == 'true' runs-on: ubuntu-22.04 outputs: @@ -184,6 +188,8 @@ jobs: contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda-perf.yml') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_benchmark.py') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_perf_prompts') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') || needs.run-decision.outputs.is-full-run == 'true' ) uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main @@ -365,6 +371,8 @@ jobs: contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda-perf.yml') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_benchmark.py') || contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_perf_prompts') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') || needs.run-decision.outputs.is-full-run == 'true' ) runs-on: ubuntu-22.04 diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index 5972d064fc4..eafdc3807f7 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -20,6 +20,9 @@ on: - .github/workflows/cuda.yml - backends/cuda/** - backends/aoti/** + - .ci/scripts/test-cuda-build.sh + - .ci/scripts/export_model_artifact.sh + - .ci/scripts/test_model_e2e.sh workflow_dispatch: concurrency: @@ -49,6 +52,9 @@ jobs: contains(needs.changed-files.outputs.changed-files, 'backends/cuda') || contains(needs.changed-files.outputs.changed-files, 'backends/aoti') || contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test-cuda-build.sh') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') || needs.run-decision.outputs.is-full-run == 'true' strategy: fail-fast: false @@ -88,6 +94,9 @@ jobs: contains(needs.changed-files.outputs.changed-files, 'backends/cuda') || contains(needs.changed-files.outputs.changed-files, 'backends/aoti') || contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test-cuda-build.sh') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') || needs.run-decision.outputs.is-full-run == 'true' ) steps: @@ -108,6 +117,9 @@ jobs: contains(needs.changed-files.outputs.changed-files, 'backends/cuda') || contains(needs.changed-files.outputs.changed-files, 'backends/aoti') || contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test-cuda-build.sh') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') || needs.run-decision.outputs.is-full-run == 'true' uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main permissions: @@ -149,6 +161,9 @@ jobs: contains(needs.changed-files.outputs.changed-files, 'backends/cuda') || contains(needs.changed-files.outputs.changed-files, 'backends/aoti') || contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test-cuda-build.sh') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') || needs.run-decision.outputs.is-full-run == 'true' uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main permissions: @@ -209,6 +224,9 @@ jobs: contains(needs.changed-files.outputs.changed-files, 'backends/cuda') || contains(needs.changed-files.outputs.changed-files, 'backends/aoti') || contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test-cuda-build.sh') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') || needs.run-decision.outputs.is-full-run == 'true' ) uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main @@ -367,6 +385,9 @@ jobs: contains(needs.changed-files.outputs.changed-files, 'backends/cuda') || contains(needs.changed-files.outputs.changed-files, 'backends/aoti') || contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test-cuda-build.sh') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') || needs.run-decision.outputs.is-full-run == 'true' ) uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main @@ -496,6 +517,9 @@ jobs: contains(needs.changed-files.outputs.changed-files, 'backends/cuda') || contains(needs.changed-files.outputs.changed-files, 'backends/aoti') || contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test-cuda-build.sh') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') || + contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') || needs.run-decision.outputs.is-full-run == 'true' ) uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main