From d3493ae9a0c5b70c4d5aaf1d55af1a8c3c2e26ed Mon Sep 17 00:00:00 2001 From: Mr-Neutr0n <64578610+Mr-Neutr0n@users.noreply.github.com> Date: Sat, 7 Feb 2026 13:36:38 +0530 Subject: [PATCH 1/2] fix: add PyTorch version check for torch.accelerator in gather_size_by_comm torch.accelerator.current_accelerator() only exists in PyTorch 2.6+. Since diffusers officially supports PyTorch 2.1+, this causes AttributeError on versions 2.1-2.5. Added hasattr check with fallback to 'cuda' for older PyTorch versions, matching the pattern already used in lora_pipeline.py. Fixes #13074 --- src/diffusers/models/_modeling_parallel.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/diffusers/models/_modeling_parallel.py b/src/diffusers/models/_modeling_parallel.py index db45159adfc9..f8948897d02e 100644 --- a/src/diffusers/models/_modeling_parallel.py +++ b/src/diffusers/models/_modeling_parallel.py @@ -290,7 +290,12 @@ def gather_size_by_comm(size: int, group: dist.ProcessGroup) -> List[int]: # HACK: Use Gloo backend for all_gather to avoid H2D and D2H overhead comm_backends = str(dist.get_backend(group=group)) # NOTE: e.g., dist.init_process_group(backend="cpu:gloo,cuda:nccl") - gather_device = "cpu" if "cpu" in comm_backends else torch.accelerator.current_accelerator() + if "cpu" in comm_backends: + gather_device = "cpu" + elif hasattr(torch, "accelerator"): + gather_device = torch.accelerator.current_accelerator() + else: + gather_device = "cuda" gathered_sizes = [torch.empty((1,), device=gather_device, dtype=torch.int64) for _ in range(world_size)] dist.all_gather( gathered_sizes, From 3a12e931b75a60f758c772f7b45dad75ea0d57f3 Mon Sep 17 00:00:00 2001 From: Mr-Neutr0n <64578610+Mr-Neutr0n@users.noreply.github.com> Date: Tue, 2 Jun 2026 01:17:43 +0530 Subject: [PATCH 2/2] Simplify gather_device selection per @DN6 suggestion @DN6 (2026-02-13) suggested using the existing get_device() helper from torch_utils instead of the inline hasattr(torch, "accelerator") check. get_device() already returns the appropriate device string for the current environment and falls back to cpu when no accelerator is available, which makes the three-branch if/elif/else a one-liner. --- src/diffusers/models/_modeling_parallel.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/diffusers/models/_modeling_parallel.py b/src/diffusers/models/_modeling_parallel.py index f8948897d02e..e350b8df43cb 100644 --- a/src/diffusers/models/_modeling_parallel.py +++ b/src/diffusers/models/_modeling_parallel.py @@ -22,6 +22,7 @@ import torch.distributed as dist from ..utils import get_logger +from ..utils.torch_utils import get_device if TYPE_CHECKING: @@ -290,12 +291,10 @@ def gather_size_by_comm(size: int, group: dist.ProcessGroup) -> List[int]: # HACK: Use Gloo backend for all_gather to avoid H2D and D2H overhead comm_backends = str(dist.get_backend(group=group)) # NOTE: e.g., dist.init_process_group(backend="cpu:gloo,cuda:nccl") - if "cpu" in comm_backends: - gather_device = "cpu" - elif hasattr(torch, "accelerator"): - gather_device = torch.accelerator.current_accelerator() - else: - gather_device = "cuda" + # get_device() handles accelerator version compatibility internally + # (cuda/npu/xpu/mps/mlu/cpu), so we don't need the hasattr(torch, "accelerator") + # check here. + gather_device = "cpu" if "cpu" in comm_backends else get_device() gathered_sizes = [torch.empty((1,), device=gather_device, dtype=torch.int64) for _ in range(world_size)] dist.all_gather( gathered_sizes,