From d3493ae9a0c5b70c4d5aaf1d55af1a8c3c2e26ed Mon Sep 17 00:00:00 2001
From: Mr-Neutr0n <64578610+Mr-Neutr0n@users.noreply.github.com>
Date: Sat, 7 Feb 2026 13:36:38 +0530
Subject: [PATCH 1/2] fix: add PyTorch version check for torch.accelerator in
 gather_size_by_comm

torch.accelerator.current_accelerator() only exists in PyTorch 2.6+.
Since diffusers officially supports PyTorch 2.1+, this causes
AttributeError on versions 2.1-2.5.

Added hasattr check with fallback to 'cuda' for older PyTorch versions,
matching the pattern already used in lora_pipeline.py.

Fixes #13074
---
 src/diffusers/models/_modeling_parallel.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/diffusers/models/_modeling_parallel.py b/src/diffusers/models/_modeling_parallel.py
index db45159adfc9..f8948897d02e 100644
--- a/src/diffusers/models/_modeling_parallel.py
+++ b/src/diffusers/models/_modeling_parallel.py
@@ -290,7 +290,12 @@ def gather_size_by_comm(size: int, group: dist.ProcessGroup) -> List[int]:
     # HACK: Use Gloo backend for all_gather to avoid H2D and D2H overhead
     comm_backends = str(dist.get_backend(group=group))
     # NOTE: e.g., dist.init_process_group(backend="cpu:gloo,cuda:nccl")
-    gather_device = "cpu" if "cpu" in comm_backends else torch.accelerator.current_accelerator()
+    if "cpu" in comm_backends:
+        gather_device = "cpu"
+    elif hasattr(torch, "accelerator"):
+        gather_device = torch.accelerator.current_accelerator()
+    else:
+        gather_device = "cuda"
     gathered_sizes = [torch.empty((1,), device=gather_device, dtype=torch.int64) for _ in range(world_size)]
     dist.all_gather(
         gathered_sizes,

From 3a12e931b75a60f758c772f7b45dad75ea0d57f3 Mon Sep 17 00:00:00 2001
From: Mr-Neutr0n <64578610+Mr-Neutr0n@users.noreply.github.com>
Date: Tue, 2 Jun 2026 01:17:43 +0530
Subject: [PATCH 2/2] Simplify gather_device selection per @DN6 suggestion

@DN6 (2026-02-13) suggested using the existing get_device() helper from
torch_utils instead of the inline hasattr(torch, "accelerator") check.
get_device() already returns the appropriate device string for the
current environment and falls back to cpu when no accelerator is
available, which makes the three-branch if/elif/else a one-liner.
---
 src/diffusers/models/_modeling_parallel.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/diffusers/models/_modeling_parallel.py b/src/diffusers/models/_modeling_parallel.py
index f8948897d02e..e350b8df43cb 100644
--- a/src/diffusers/models/_modeling_parallel.py
+++ b/src/diffusers/models/_modeling_parallel.py
@@ -22,6 +22,7 @@
 import torch.distributed as dist
 
 from ..utils import get_logger
+from ..utils.torch_utils import get_device
 
 
 if TYPE_CHECKING:
@@ -290,12 +291,10 @@ def gather_size_by_comm(size: int, group: dist.ProcessGroup) -> List[int]:
     # HACK: Use Gloo backend for all_gather to avoid H2D and D2H overhead
     comm_backends = str(dist.get_backend(group=group))
     # NOTE: e.g., dist.init_process_group(backend="cpu:gloo,cuda:nccl")
-    if "cpu" in comm_backends:
-        gather_device = "cpu"
-    elif hasattr(torch, "accelerator"):
-        gather_device = torch.accelerator.current_accelerator()
-    else:
-        gather_device = "cuda"
+    # get_device() handles accelerator version compatibility internally
+    # (cuda/npu/xpu/mps/mlu/cpu), so we don't need the hasattr(torch, "accelerator")
+    # check here.
+    gather_device = "cpu" if "cpu" in comm_backends else get_device()
     gathered_sizes = [torch.empty((1,), device=gather_device, dtype=torch.int64) for _ in range(world_size)]
     dist.all_gather(
         gathered_sizes,