From b10992674cc71ee3a4d9675a438618bdd01b56aa Mon Sep 17 00:00:00 2001 From: Julian Ng-Thow-Hing Date: Thu, 25 Jun 2026 10:24:17 -0700 Subject: [PATCH 1/3] [ExecuTorch][WebGPU] sigmoid op test suite (cases.py op-test framework) Pull Request resolved: https://github.com/pytorch/executorch/pull/20391 Registers `aten.sigmoid.default` in the `cases.py` op-test framework: a `_sigmoid_suite` (hard-coded shapes + a saturation case over a `linspace(-12, 12)` input) that `generate_op_tests` exports and compares to an fp64 torch golden on Dawn. Also adds `test/ops/sigmoid/test_sigmoid.py` (`SigmoidModule` + `N` + `_det_input` + an export-delegation/eager smoke test) and the `aten.sigmoid.default` partitioner-allowlist entry in `tester.py`. ghstack-source-id: 397026520 @exported-using-ghexport Differential Revision: [D108793159](https://our.internmc.facebook.com/intern/diff/D108793159/) --- backends/webgpu/test/op_tests/cases.py | 31 ++++++++++++++ backends/webgpu/test/ops/test_sigmoid.py | 51 ++++++++++++++++++++++++ backends/webgpu/test/tester.py | 1 + 3 files changed, 83 insertions(+) create mode 100644 backends/webgpu/test/ops/test_sigmoid.py diff --git a/backends/webgpu/test/op_tests/cases.py b/backends/webgpu/test/op_tests/cases.py index be5276cc57a..7df3ee11f11 100644 --- a/backends/webgpu/test/op_tests/cases.py +++ b/backends/webgpu/test/op_tests/cases.py @@ -44,6 +44,11 @@ CONFIGS as _SELECT_CONFIGS, SelectModule, ) +from executorch.backends.webgpu.test.ops.test_sigmoid import ( + _det_input as _sigmoid_det_input, + N as _SIGMOID_N, + SigmoidModule, +) from executorch.backends.webgpu.test.ops.test_view_copy import ( CONFIGS as _VIEW_CONFIGS, ViewModule, @@ -153,3 +158,29 @@ def _view_copy_suite() -> WebGPUTestSuite: @register_op_test("select") def _select_suite() -> WebGPUTestSuite: return _fn_config_suite(SelectModule, _SELECT_CONFIGS) + + +def _sigmoid_full_range(_shape) -> torch.Tensor: + # Reuses the monolith's saturation-tail input (linspace(-12, 12)). + return _sigmoid_det_input() + + +@register_op_test("sigmoid") +def _sigmoid_suite() -> WebGPUTestSuite: + # sigmoid has no CONFIGS table; cover unary shapes directly (tol 1e-4). + return WebGPUTestSuite( + module_factory=lambda: SigmoidModule(), + cases=[ + Case(name="vec", inputs=((M1,),)), + Case(name="mat", inputs=((M1, M2),)), + Case(name="rank3", inputs=((S1, M1, M2),)), + Case(name="rank4", inputs=((S1, S2, S2, M2),)), + # Saturation tails sigmoid(+-12) (~6e-6 / 0.999994) that randn shapes miss. + Case( + name="saturation", + inputs=(InputSpec(shape=(_SIGMOID_N,), gen=_sigmoid_full_range),), + ), + ], + atol=1e-4, + rtol=1e-4, + ) diff --git a/backends/webgpu/test/ops/test_sigmoid.py b/backends/webgpu/test/ops/test_sigmoid.py new file mode 100644 index 00000000000..0ba8c435a9a --- /dev/null +++ b/backends/webgpu/test/ops/test_sigmoid.py @@ -0,0 +1,51 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""`aten.sigmoid.default` module + input for the WebGPU op-test framework. + +`SigmoidModule`, `N`, and `_det_input` are imported by `cases.py` to drive the +declarative op-test suite. `SigmoidTest` is the export-delegation +smoke test. Sigmoid is on the Llama critical path (`F.silu` -> `sigmoid` + `mul`); the +deterministic input spans the saturation tails. +""" + +import unittest + +import torch + +from executorch.backends.vulkan.partitioner.vulkan_partitioner import VulkanPartitioner +from executorch.exir import to_edge_transform_and_lower + +# Input length; the deterministic input spans the saturation tails. +N = 64 + + +class SigmoidModule(torch.nn.Module): + def forward(self, x: torch.Tensor) -> torch.Tensor: + return torch.sigmoid(x) + + +def _det_input() -> torch.Tensor: + """Deterministic fp32 input spanning negatives, zero, and large magnitudes.""" + return torch.linspace(-12.0, 12.0, N, dtype=torch.float32) + + +def _export(m: torch.nn.Module, x: torch.Tensor): + ep = torch.export.export(m, (x,)) + return to_edge_transform_and_lower( + ep, partitioner=[VulkanPartitioner()] + ).to_executorch() + + +class SigmoidTest(unittest.TestCase): + def test_export_delegates(self) -> None: + et = _export(SigmoidModule().eval(), _det_input()) + found = any( + d.id == "VulkanBackend" + for plan in et.executorch_program.execution_plan + for d in plan.delegates + ) + self.assertTrue(found, "Expected a VulkanBackend delegate (sigmoid)") diff --git a/backends/webgpu/test/tester.py b/backends/webgpu/test/tester.py index 9ba9a4d9ad4..e5dd510d49b 100644 --- a/backends/webgpu/test/tester.py +++ b/backends/webgpu/test/tester.py @@ -24,6 +24,7 @@ exir_ops.edge.aten.mul.Tensor, exir_ops.edge.aten.view_copy.default, exir_ops.edge.aten.select_copy.int, + exir_ops.edge.aten.sigmoid.default, ] From 621084c2912df140386f4bb2f83352316bbfdedd Mon Sep 17 00:00:00 2001 From: Julian Ng-Thow-Hing Date: Thu, 25 Jun 2026 10:24:17 -0700 Subject: [PATCH 2/3] [ExecuTorch][WebGPU] Add squeeze_copy + unsqueeze_copy (flat copies) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pull Request resolved: https://github.com/pytorch/executorch/pull/20392 Adds `aten.squeeze_copy.dims` and `aten.unsqueeze_copy.default` to the WebGPU delegate. Both are numel-preserving shape ops; on a dense row-major buffer backend they are the same flat copy as `view_copy` — only the shape metadata differs (mirrors the Vulkan delegate, which routes both through `add_view_copy_node`). Composition (no new kernel): - `squeeze/Squeeze.cpp` — reads `args = [self, dims, out]`, ignores the AOT-fixed `dims`, calls `add_flat_copy(graph, in, out)` from `runtime/ops/view_copy/view_copy.h`. - `unsqueeze/Unsqueeze.cpp` — reads `args = [self, dim, out]`, ignores the AOT-fixed `dim`, calls `add_flat_copy(graph, in, out)`. ghstack-source-id: 397026523 @exported-using-ghexport Differential Revision: [D108793153](https://our.internmc.facebook.com/intern/diff/D108793153/) --- backends/webgpu/CMakeLists.txt | 2 ++ .../webgpu/runtime/ops/squeeze/Squeeze.cpp | 31 +++++++++++++++++++ .../runtime/ops/unsqueeze/Unsqueeze.cpp | 31 +++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 backends/webgpu/runtime/ops/squeeze/Squeeze.cpp create mode 100644 backends/webgpu/runtime/ops/unsqueeze/Unsqueeze.cpp diff --git a/backends/webgpu/CMakeLists.txt b/backends/webgpu/CMakeLists.txt index c3b6ef4e706..01bb5236a44 100644 --- a/backends/webgpu/CMakeLists.txt +++ b/backends/webgpu/CMakeLists.txt @@ -45,6 +45,8 @@ set(WEBGPU_SRCS runtime/ops/view_copy/ViewCopy.cpp runtime/ops/select/Select.cpp runtime/ops/sigmoid/UnaryOp.cpp + runtime/ops/squeeze/Squeeze.cpp + runtime/ops/unsqueeze/Unsqueeze.cpp ) add_library(webgpu_backend ${WEBGPU_SRCS}) diff --git a/backends/webgpu/runtime/ops/squeeze/Squeeze.cpp b/backends/webgpu/runtime/ops/squeeze/Squeeze.cpp new file mode 100644 index 00000000000..12b0fe561f1 --- /dev/null +++ b/backends/webgpu/runtime/ops/squeeze/Squeeze.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include + +#include + +namespace executorch::backends::webgpu { + +namespace { + +// squeeze_copy.dims = numel-preserving flat copy (Vulkan Squeeze.cpp:102-104). +void squeeze_copy_dims_impl(WebGPUGraph& graph, const std::vector& args) { + // args: [self, dims, out]; dims ignored (out shape fixed AOT). + add_flat_copy(graph, args.at(0), args.at(args.size() - 1)); +} + +} // namespace + +WEBGPU_REGISTER_OPERATORS { + WEBGPU_REGISTER_OP(aten.squeeze_copy.dims, squeeze_copy_dims_impl); +} + +} // namespace executorch::backends::webgpu diff --git a/backends/webgpu/runtime/ops/unsqueeze/Unsqueeze.cpp b/backends/webgpu/runtime/ops/unsqueeze/Unsqueeze.cpp new file mode 100644 index 00000000000..27d2c52e708 --- /dev/null +++ b/backends/webgpu/runtime/ops/unsqueeze/Unsqueeze.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include + +#include + +namespace executorch::backends::webgpu { + +namespace { + +// unsqueeze_copy = numel-preserving flat copy (Vulkan Unsqueeze.cpp:101-103). +void unsqueeze_copy_impl(WebGPUGraph& graph, const std::vector& args) { + // args: [self, dim, out]; dim ignored (out shape fixed AOT, like view_copy). + add_flat_copy(graph, args.at(0), args.at(args.size() - 1)); +} + +} // namespace + +WEBGPU_REGISTER_OPERATORS { + WEBGPU_REGISTER_OP(aten.unsqueeze_copy.default, unsqueeze_copy_impl); +} + +} // namespace executorch::backends::webgpu From c86418c4ec769c32ffafdcd3729dccce4ed55185 Mon Sep 17 00:00:00 2001 From: Julian Ng-Thow-Hing Date: Thu, 25 Jun 2026 10:24:18 -0700 Subject: [PATCH 3/3] [ExecuTorch][WebGPU] squeeze_copy + unsqueeze_copy test suites (cases.py op-test framework) Pull Request resolved: https://github.com/pytorch/executorch/pull/20393 Registers `aten.squeeze_copy.dims` and `aten.unsqueeze_copy.default` in the `cases.py` op-test framework: a `_squeeze_suite` of 3 configs (squeeze leading/middle/multiple size-1 dims) and a `_unsqueeze_suite` of 3 configs (insert dim at front/middle/last) that `generate_op_tests` exports via `VulkanPartitioner` and compares to a torch golden on Dawn. Also adds `test/ops/squeeze/test_squeeze.py` (`SqueezeModule` + `CONFIGS` + `_op_delegated` smoke test), `test/ops/unsqueeze/test_unsqueeze.py` (`UnsqueezeModule` + `CONFIGS` + `_op_delegated` smoke test), and the two partitioner-allowlist entries in `tester.py`. ghstack-source-id: 397026525 @exported-using-ghexport Differential Revision: [D108793152](https://our.internmc.facebook.com/intern/diff/D108793152/) --- backends/webgpu/test/op_tests/cases.py | 36 +++++++++++ backends/webgpu/test/ops/test_squeeze.py | 75 ++++++++++++++++++++++ backends/webgpu/test/ops/test_unsqueeze.py | 75 ++++++++++++++++++++++ backends/webgpu/test/tester.py | 2 + 4 files changed, 188 insertions(+) create mode 100644 backends/webgpu/test/ops/test_squeeze.py create mode 100644 backends/webgpu/test/ops/test_unsqueeze.py diff --git a/backends/webgpu/test/op_tests/cases.py b/backends/webgpu/test/op_tests/cases.py index 7df3ee11f11..0db8685fa18 100644 --- a/backends/webgpu/test/op_tests/cases.py +++ b/backends/webgpu/test/op_tests/cases.py @@ -49,6 +49,16 @@ N as _SIGMOID_N, SigmoidModule, ) + +from executorch.backends.webgpu.test.ops.test_squeeze import ( + CONFIGS as _SQUEEZE_CONFIGS, + SqueezeModule, +) + +from executorch.backends.webgpu.test.ops.test_unsqueeze import ( + CONFIGS as _UNSQUEEZE_CONFIGS, + UnsqueezeModule, +) from executorch.backends.webgpu.test.ops.test_view_copy import ( CONFIGS as _VIEW_CONFIGS, ViewModule, @@ -184,3 +194,29 @@ def _sigmoid_suite() -> WebGPUTestSuite: atol=1e-4, rtol=1e-4, ) + + +@register_op_test("squeeze") +def _squeeze_suite() -> WebGPUTestSuite: + # CONFIGS: name -> (shape, dim) where dim is an int or a tuple. + return WebGPUTestSuite( + module_factory=lambda dim: SqueezeModule(dim), + cases=[ + Case(name=n, construct={"dim": dim}, inputs=(shape,)) + for n, (shape, dim) in _SQUEEZE_CONFIGS.items() + ], + golden_dtype="float32", # reshape copies values; fp64 bit-identical + ) + + +@register_op_test("unsqueeze") +def _unsqueeze_suite() -> WebGPUTestSuite: + # CONFIGS: name -> (shape, dim). + return WebGPUTestSuite( + module_factory=lambda dim: UnsqueezeModule(dim), + cases=[ + Case(name=n, construct={"dim": dim}, inputs=(shape,)) + for n, (shape, dim) in _UNSQUEEZE_CONFIGS.items() + ], + golden_dtype="float32", # reshape copies values; fp64 bit-identical + ) diff --git a/backends/webgpu/test/ops/test_squeeze.py b/backends/webgpu/test/ops/test_squeeze.py new file mode 100644 index 00000000000..b55a5143538 --- /dev/null +++ b/backends/webgpu/test/ops/test_squeeze.py @@ -0,0 +1,75 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""`aten.squeeze_copy.dims` module + configs for the WebGPU op-test framework. + +`SqueezeModule` + `CONFIGS` are imported by `cases.py` to drive the declarative +op-test suite. `SqueezeTest` is the export-delegation smoke +test. +""" + +import unittest + +import torch + +from executorch.backends.vulkan.partitioner.vulkan_partitioner import VulkanPartitioner +from executorch.exir import to_edge_transform_and_lower + +# name -> (input_shape, squeeze_dim) +CONFIGS = { + "dim0": ((1, 3, 4), 0), + "mid": ((2, 1, 4), 1), + "multi": ((1, 3, 1, 4), (0, 2)), +} + + +class SqueezeModule(torch.nn.Module): + def __init__(self, dim): + super().__init__() + self.dim = dim + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return torch.squeeze(x, self.dim) + + +def _det_input(shape): + g = torch.Generator().manual_seed(0) + return torch.randn(*shape, generator=g, dtype=torch.float32) + + +def _lower(dim, x: torch.Tensor): + ep = torch.export.export(SqueezeModule(dim).eval(), (x,)) + return to_edge_transform_and_lower(ep, partitioner=[VulkanPartitioner()]) + + +def _delegated(et) -> bool: + return any( + d.id == "VulkanBackend" + for plan in et.executorch_program.execution_plan + for d in plan.delegates + ) + + +def _op_delegated(edge, op_substr: str) -> bool: + # op must be absorbed into the delegate, not left as a CPU-fallback node. + gm = edge.exported_program().graph_module + return all(op_substr not in str(getattr(n, "target", "")) for n in gm.graph.nodes) + + +class SqueezeTest(unittest.TestCase): + def test_export_delegates(self) -> None: + for name, (shape, dim) in CONFIGS.items(): + with self.subTest(name=name): + edge = _lower(dim, _det_input(shape)) + et = edge.to_executorch() + self.assertTrue( + _delegated(et), + f"Expected a VulkanBackend delegate (squeeze {name})", + ) + self.assertTrue( + _op_delegated(edge, "squeeze_copy"), + f"squeeze_copy not delegated (fell back to CPU) for {name}", + ) diff --git a/backends/webgpu/test/ops/test_unsqueeze.py b/backends/webgpu/test/ops/test_unsqueeze.py new file mode 100644 index 00000000000..dcddf4faa51 --- /dev/null +++ b/backends/webgpu/test/ops/test_unsqueeze.py @@ -0,0 +1,75 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""`aten.unsqueeze_copy.default` module + configs for the WebGPU op-test framework. + +`UnsqueezeModule` + `CONFIGS` are imported by `cases.py` to drive the declarative +op-test suite. `UnsqueezeTest` is the export-delegation smoke +test. +""" + +import unittest + +import torch + +from executorch.backends.vulkan.partitioner.vulkan_partitioner import VulkanPartitioner +from executorch.exir import to_edge_transform_and_lower + +# name -> (input_shape, unsqueeze_dim) +CONFIGS = { + "front": ((3, 4), 0), + "mid": ((2, 4), 1), + "last": ((3, 4), 2), +} + + +class UnsqueezeModule(torch.nn.Module): + def __init__(self, dim): + super().__init__() + self.dim = dim + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return torch.unsqueeze(x, self.dim) + + +def _det_input(shape): + g = torch.Generator().manual_seed(0) + return torch.randn(*shape, generator=g, dtype=torch.float32) + + +def _lower(dim, x: torch.Tensor): + ep = torch.export.export(UnsqueezeModule(dim).eval(), (x,)) + return to_edge_transform_and_lower(ep, partitioner=[VulkanPartitioner()]) + + +def _delegated(et) -> bool: + return any( + d.id == "VulkanBackend" + for plan in et.executorch_program.execution_plan + for d in plan.delegates + ) + + +def _op_delegated(edge, op_substr: str) -> bool: + # op must be absorbed into the delegate, not left as a top-level CPU-fallback node. + gm = edge.exported_program().graph_module + return all(op_substr not in str(getattr(n, "target", "")) for n in gm.graph.nodes) + + +class UnsqueezeTest(unittest.TestCase): + def test_export_delegates(self) -> None: + for name, (shape, dim) in CONFIGS.items(): + with self.subTest(name=name): + edge = _lower(dim, _det_input(shape)) + et = edge.to_executorch() + self.assertTrue( + _delegated(et), + f"Expected a VulkanBackend delegate (unsqueeze {name})", + ) + self.assertTrue( + _op_delegated(edge, "unsqueeze_copy"), + f"unsqueeze_copy not delegated (fell back to CPU) for {name}", + ) diff --git a/backends/webgpu/test/tester.py b/backends/webgpu/test/tester.py index e5dd510d49b..53a745a16df 100644 --- a/backends/webgpu/test/tester.py +++ b/backends/webgpu/test/tester.py @@ -25,6 +25,8 @@ exir_ops.edge.aten.view_copy.default, exir_ops.edge.aten.select_copy.int, exir_ops.edge.aten.sigmoid.default, + exir_ops.edge.aten.squeeze_copy.dims, + exir_ops.edge.aten.unsqueeze_copy.default, ]