Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 59 additions & 2 deletions .github/workflows/cuda-perf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ on:
- .github/workflows/cuda-perf.yml
- .ci/scripts/cuda_benchmark.py
- .ci/scripts/cuda_perf_prompts/**
- .ci/scripts/export_model_artifact.sh
- .ci/scripts/test_model_e2e.sh
workflow_dispatch:
inputs:
models:
Expand All @@ -32,8 +34,33 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true

permissions:
contents: read

jobs:
changed-files:
name: Get changed files
uses: ./.github/workflows/_get-changed-files.yml
with:
include-push-diff: true

run-decision:
name: CI run decision
uses: ./.github/workflows/_ci-run-decision.yml

set-parameters:
needs: [changed-files, run-decision]
# Path-filtered: mirrors the workflow-level pull_request `paths:`
# filter so push commits that don't touch perf-relevant paths skip
# this whole workflow on non-sampled commits. Sampling preserves
# perf time-series at every 4th commit (vs every commit pre-PR).
if: |
contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda-perf.yml') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_benchmark.py') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_perf_prompts') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') ||
needs.run-decision.outputs.is-full-run == 'true'
runs-on: ubuntu-22.04
outputs:
benchmark_configs: ${{ steps.set-parameters.outputs.benchmark_configs }}
Expand Down Expand Up @@ -145,9 +172,26 @@ jobs:
benchmark-cuda:
name: benchmark-cuda
needs:
- changed-files
- run-decision
- set-parameters
- export-models
if: always()
# Inherit the gate from set-parameters/export-models (they cascade-
# skip when the gate evaluates false). `always()` keeps benchmark-
# cuda running even when some export-models matrix cells fail —
# but only if the gate itself is open. Without the explicit gate
# here, `always()` would fire benchmark-cuda even when set-
# parameters was gated out.
if: |
always() &&
(
contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda-perf.yml') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_benchmark.py') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_perf_prompts') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') ||
needs.run-decision.outputs.is-full-run == 'true'
)
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
Expand Down Expand Up @@ -316,8 +360,21 @@ jobs:

upload-benchmark-results:
needs:
- changed-files
- run-decision
- benchmark-cuda
if: always()
# Same gate as benchmark-cuda — skip the upload when the gate
# closed (no benchmarks ran).
if: |
always() &&
(
contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda-perf.yml') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_benchmark.py') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/cuda_perf_prompts') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') ||
needs.run-decision.outputs.is-full-run == 'true'
)
runs-on: ubuntu-22.04
environment: upload-benchmark-results
permissions:
Expand Down
115 changes: 109 additions & 6 deletions .github/workflows/cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,42 @@ on:
- .github/workflows/cuda.yml
- backends/cuda/**
- backends/aoti/**
- .ci/scripts/test-cuda-build.sh
- .ci/scripts/export_model_artifact.sh
- .ci/scripts/test_model_e2e.sh
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: false

permissions:
contents: read

jobs:
changed-files:
name: Get changed files
uses: ./.github/workflows/_get-changed-files.yml
with:
include-push-diff: true

run-decision:
name: CI run decision
uses: ./.github/workflows/_ci-run-decision.yml

test-cuda-builds:
needs: [changed-files, run-decision]
# Path-filtered: mirrors the workflow-level pull_request `paths:`
# filter so push commits that don't touch CUDA-relevant paths skip
# this job on non-sampled commits.
if: |
contains(needs.changed-files.outputs.changed-files, 'backends/cuda') ||
contains(needs.changed-files.outputs.changed-files, 'backends/aoti') ||
contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test-cuda-build.sh') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') ||
needs.run-decision.outputs.is-full-run == 'true'
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -55,9 +83,22 @@ jobs:

# This job will fail if any of the CUDA versions fail
check-all-cuda-builds:
needs: test-cuda-builds
needs: [changed-files, run-decision, test-cuda-builds]
runs-on: ubuntu-latest
if: always()
# Run only if the test-cuda-builds matrix actually ran (i.e. the same
# path/sample gate as test-cuda-builds itself). Otherwise this job
# would fire on every commit and fail because needs.result == 'skipped'.
if: |
always() &&
(
contains(needs.changed-files.outputs.changed-files, 'backends/cuda') ||
contains(needs.changed-files.outputs.changed-files, 'backends/aoti') ||
contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test-cuda-build.sh') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') ||
needs.run-decision.outputs.is-full-run == 'true'
)
steps:
- name: Check if all CUDA builds succeeded
run: |
Expand All @@ -71,6 +112,15 @@ jobs:

test-models-cuda:
name: test-models-cuda
needs: [changed-files, run-decision]
if: |
contains(needs.changed-files.outputs.changed-files, 'backends/cuda') ||
contains(needs.changed-files.outputs.changed-files, 'backends/aoti') ||
contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test-cuda-build.sh') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') ||
needs.run-decision.outputs.is-full-run == 'true'
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
Expand Down Expand Up @@ -106,6 +156,15 @@ jobs:

unittest-cuda:
name: unittest-cuda
needs: [changed-files, run-decision]
if: |
contains(needs.changed-files.outputs.changed-files, 'backends/cuda') ||
contains(needs.changed-files.outputs.changed-files, 'backends/aoti') ||
contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test-cuda-build.sh') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') ||
needs.run-decision.outputs.is-full-run == 'true'
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
Expand Down Expand Up @@ -154,8 +213,22 @@ jobs:

export-model-cuda-artifact:
name: export-model-cuda-artifact
# Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
# Skip this job if the pull request is from a fork (HuggingFace secrets are not available).
# Path-filtered on push: mirrors the workflow-level pull_request `paths:`
# filter so push commits that don't touch CUDA-relevant paths skip
# this job on non-sampled commits.
needs: [changed-files, run-decision]
if: |
(github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request') &&
(
contains(needs.changed-files.outputs.changed-files, 'backends/cuda') ||
contains(needs.changed-files.outputs.changed-files, 'backends/aoti') ||
contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test-cuda-build.sh') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') ||
needs.run-decision.outputs.is-full-run == 'true'
)
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
Expand Down Expand Up @@ -300,7 +373,23 @@ jobs:

test-model-cuda-e2e:
name: test-model-cuda-e2e
needs: export-model-cuda-artifact
# Same path filter as export-model-cuda-artifact above. Also explicitly
# gated on the export job succeeding — when needs: jobs are *skipped*
# (e.g. fork PR), GitHub still evaluates this if:, so without the
# explicit success-check this job would run and then fail trying
# to download an artifact that was never produced.
needs: [changed-files, export-model-cuda-artifact, run-decision]
if: |
needs.export-model-cuda-artifact.result == 'success' &&
(
contains(needs.changed-files.outputs.changed-files, 'backends/cuda') ||
contains(needs.changed-files.outputs.changed-files, 'backends/aoti') ||
contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test-cuda-build.sh') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') ||
needs.run-decision.outputs.is-full-run == 'true'
)
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
Expand Down Expand Up @@ -417,8 +506,22 @@ jobs:

test-cuda-pybind:
name: test-cuda-pybind
needs: export-model-cuda-artifact
# This job downloads models exported by export-model-cuda-artifact and runs them using pybind.
# Same gating as test-model-cuda-e2e — explicit success-check on the
# export job so a skipped export (fork PR, non-sampled push, no path
# match) auto-skips this job too.
needs: [changed-files, export-model-cuda-artifact, run-decision]
if: |
needs.export-model-cuda-artifact.result == 'success' &&
(
contains(needs.changed-files.outputs.changed-files, 'backends/cuda') ||
contains(needs.changed-files.outputs.changed-files, 'backends/aoti') ||
contains(needs.changed-files.outputs.changed-files, '.github/workflows/cuda.yml') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test-cuda-build.sh') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/export_model_artifact.sh') ||
contains(needs.changed-files.outputs.changed-files, '.ci/scripts/test_model_e2e.sh') ||
needs.run-decision.outputs.is-full-run == 'true'
)
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
permissions:
id-token: write
Expand Down
Loading