diff --git a/backends/webgpu/CMakeLists.txt b/backends/webgpu/CMakeLists.txt index c3b6ef4e706..01bb5236a44 100644 --- a/backends/webgpu/CMakeLists.txt +++ b/backends/webgpu/CMakeLists.txt @@ -45,6 +45,8 @@ set(WEBGPU_SRCS runtime/ops/view_copy/ViewCopy.cpp runtime/ops/select/Select.cpp runtime/ops/sigmoid/UnaryOp.cpp + runtime/ops/squeeze/Squeeze.cpp + runtime/ops/unsqueeze/Unsqueeze.cpp ) add_library(webgpu_backend ${WEBGPU_SRCS}) diff --git a/backends/webgpu/runtime/ops/squeeze/Squeeze.cpp b/backends/webgpu/runtime/ops/squeeze/Squeeze.cpp new file mode 100644 index 00000000000..12b0fe561f1 --- /dev/null +++ b/backends/webgpu/runtime/ops/squeeze/Squeeze.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include + +#include + +namespace executorch::backends::webgpu { + +namespace { + +// squeeze_copy.dims = numel-preserving flat copy (Vulkan Squeeze.cpp:102-104). +void squeeze_copy_dims_impl(WebGPUGraph& graph, const std::vector& args) { + // args: [self, dims, out]; dims ignored (out shape fixed AOT). + add_flat_copy(graph, args.at(0), args.at(args.size() - 1)); +} + +} // namespace + +WEBGPU_REGISTER_OPERATORS { + WEBGPU_REGISTER_OP(aten.squeeze_copy.dims, squeeze_copy_dims_impl); +} + +} // namespace executorch::backends::webgpu diff --git a/backends/webgpu/runtime/ops/unsqueeze/Unsqueeze.cpp b/backends/webgpu/runtime/ops/unsqueeze/Unsqueeze.cpp new file mode 100644 index 00000000000..27d2c52e708 --- /dev/null +++ b/backends/webgpu/runtime/ops/unsqueeze/Unsqueeze.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include + +#include + +namespace executorch::backends::webgpu { + +namespace { + +// unsqueeze_copy = numel-preserving flat copy (Vulkan Unsqueeze.cpp:101-103). +void unsqueeze_copy_impl(WebGPUGraph& graph, const std::vector& args) { + // args: [self, dim, out]; dim ignored (out shape fixed AOT, like view_copy). + add_flat_copy(graph, args.at(0), args.at(args.size() - 1)); +} + +} // namespace + +WEBGPU_REGISTER_OPERATORS { + WEBGPU_REGISTER_OP(aten.unsqueeze_copy.default, unsqueeze_copy_impl); +} + +} // namespace executorch::backends::webgpu diff --git a/backends/webgpu/test/op_tests/cases.py b/backends/webgpu/test/op_tests/cases.py index be5276cc57a..0db8685fa18 100644 --- a/backends/webgpu/test/op_tests/cases.py +++ b/backends/webgpu/test/op_tests/cases.py @@ -44,6 +44,21 @@ CONFIGS as _SELECT_CONFIGS, SelectModule, ) +from executorch.backends.webgpu.test.ops.test_sigmoid import ( + _det_input as _sigmoid_det_input, + N as _SIGMOID_N, + SigmoidModule, +) + +from executorch.backends.webgpu.test.ops.test_squeeze import ( + CONFIGS as _SQUEEZE_CONFIGS, + SqueezeModule, +) + +from executorch.backends.webgpu.test.ops.test_unsqueeze import ( + CONFIGS as _UNSQUEEZE_CONFIGS, + UnsqueezeModule, +) from executorch.backends.webgpu.test.ops.test_view_copy import ( CONFIGS as _VIEW_CONFIGS, ViewModule, @@ -153,3 +168,55 @@ def _view_copy_suite() -> WebGPUTestSuite: @register_op_test("select") def _select_suite() -> WebGPUTestSuite: return _fn_config_suite(SelectModule, _SELECT_CONFIGS) + + +def _sigmoid_full_range(_shape) -> torch.Tensor: + # Reuses the monolith's saturation-tail input (linspace(-12, 12)). + return _sigmoid_det_input() + + +@register_op_test("sigmoid") +def _sigmoid_suite() -> WebGPUTestSuite: + # sigmoid has no CONFIGS table; cover unary shapes directly (tol 1e-4). + return WebGPUTestSuite( + module_factory=lambda: SigmoidModule(), + cases=[ + Case(name="vec", inputs=((M1,),)), + Case(name="mat", inputs=((M1, M2),)), + Case(name="rank3", inputs=((S1, M1, M2),)), + Case(name="rank4", inputs=((S1, S2, S2, M2),)), + # Saturation tails sigmoid(+-12) (~6e-6 / 0.999994) that randn shapes miss. + Case( + name="saturation", + inputs=(InputSpec(shape=(_SIGMOID_N,), gen=_sigmoid_full_range),), + ), + ], + atol=1e-4, + rtol=1e-4, + ) + + +@register_op_test("squeeze") +def _squeeze_suite() -> WebGPUTestSuite: + # CONFIGS: name -> (shape, dim) where dim is an int or a tuple. + return WebGPUTestSuite( + module_factory=lambda dim: SqueezeModule(dim), + cases=[ + Case(name=n, construct={"dim": dim}, inputs=(shape,)) + for n, (shape, dim) in _SQUEEZE_CONFIGS.items() + ], + golden_dtype="float32", # reshape copies values; fp64 bit-identical + ) + + +@register_op_test("unsqueeze") +def _unsqueeze_suite() -> WebGPUTestSuite: + # CONFIGS: name -> (shape, dim). + return WebGPUTestSuite( + module_factory=lambda dim: UnsqueezeModule(dim), + cases=[ + Case(name=n, construct={"dim": dim}, inputs=(shape,)) + for n, (shape, dim) in _UNSQUEEZE_CONFIGS.items() + ], + golden_dtype="float32", # reshape copies values; fp64 bit-identical + ) diff --git a/backends/webgpu/test/ops/test_sigmoid.py b/backends/webgpu/test/ops/test_sigmoid.py new file mode 100644 index 00000000000..0ba8c435a9a --- /dev/null +++ b/backends/webgpu/test/ops/test_sigmoid.py @@ -0,0 +1,51 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""`aten.sigmoid.default` module + input for the WebGPU op-test framework. + +`SigmoidModule`, `N`, and `_det_input` are imported by `cases.py` to drive the +declarative op-test suite. `SigmoidTest` is the export-delegation +smoke test. Sigmoid is on the Llama critical path (`F.silu` -> `sigmoid` + `mul`); the +deterministic input spans the saturation tails. +""" + +import unittest + +import torch + +from executorch.backends.vulkan.partitioner.vulkan_partitioner import VulkanPartitioner +from executorch.exir import to_edge_transform_and_lower + +# Input length; the deterministic input spans the saturation tails. +N = 64 + + +class SigmoidModule(torch.nn.Module): + def forward(self, x: torch.Tensor) -> torch.Tensor: + return torch.sigmoid(x) + + +def _det_input() -> torch.Tensor: + """Deterministic fp32 input spanning negatives, zero, and large magnitudes.""" + return torch.linspace(-12.0, 12.0, N, dtype=torch.float32) + + +def _export(m: torch.nn.Module, x: torch.Tensor): + ep = torch.export.export(m, (x,)) + return to_edge_transform_and_lower( + ep, partitioner=[VulkanPartitioner()] + ).to_executorch() + + +class SigmoidTest(unittest.TestCase): + def test_export_delegates(self) -> None: + et = _export(SigmoidModule().eval(), _det_input()) + found = any( + d.id == "VulkanBackend" + for plan in et.executorch_program.execution_plan + for d in plan.delegates + ) + self.assertTrue(found, "Expected a VulkanBackend delegate (sigmoid)") diff --git a/backends/webgpu/test/ops/test_squeeze.py b/backends/webgpu/test/ops/test_squeeze.py new file mode 100644 index 00000000000..b55a5143538 --- /dev/null +++ b/backends/webgpu/test/ops/test_squeeze.py @@ -0,0 +1,75 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""`aten.squeeze_copy.dims` module + configs for the WebGPU op-test framework. + +`SqueezeModule` + `CONFIGS` are imported by `cases.py` to drive the declarative +op-test suite. `SqueezeTest` is the export-delegation smoke +test. +""" + +import unittest + +import torch + +from executorch.backends.vulkan.partitioner.vulkan_partitioner import VulkanPartitioner +from executorch.exir import to_edge_transform_and_lower + +# name -> (input_shape, squeeze_dim) +CONFIGS = { + "dim0": ((1, 3, 4), 0), + "mid": ((2, 1, 4), 1), + "multi": ((1, 3, 1, 4), (0, 2)), +} + + +class SqueezeModule(torch.nn.Module): + def __init__(self, dim): + super().__init__() + self.dim = dim + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return torch.squeeze(x, self.dim) + + +def _det_input(shape): + g = torch.Generator().manual_seed(0) + return torch.randn(*shape, generator=g, dtype=torch.float32) + + +def _lower(dim, x: torch.Tensor): + ep = torch.export.export(SqueezeModule(dim).eval(), (x,)) + return to_edge_transform_and_lower(ep, partitioner=[VulkanPartitioner()]) + + +def _delegated(et) -> bool: + return any( + d.id == "VulkanBackend" + for plan in et.executorch_program.execution_plan + for d in plan.delegates + ) + + +def _op_delegated(edge, op_substr: str) -> bool: + # op must be absorbed into the delegate, not left as a CPU-fallback node. + gm = edge.exported_program().graph_module + return all(op_substr not in str(getattr(n, "target", "")) for n in gm.graph.nodes) + + +class SqueezeTest(unittest.TestCase): + def test_export_delegates(self) -> None: + for name, (shape, dim) in CONFIGS.items(): + with self.subTest(name=name): + edge = _lower(dim, _det_input(shape)) + et = edge.to_executorch() + self.assertTrue( + _delegated(et), + f"Expected a VulkanBackend delegate (squeeze {name})", + ) + self.assertTrue( + _op_delegated(edge, "squeeze_copy"), + f"squeeze_copy not delegated (fell back to CPU) for {name}", + ) diff --git a/backends/webgpu/test/ops/test_unsqueeze.py b/backends/webgpu/test/ops/test_unsqueeze.py new file mode 100644 index 00000000000..dcddf4faa51 --- /dev/null +++ b/backends/webgpu/test/ops/test_unsqueeze.py @@ -0,0 +1,75 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +"""`aten.unsqueeze_copy.default` module + configs for the WebGPU op-test framework. + +`UnsqueezeModule` + `CONFIGS` are imported by `cases.py` to drive the declarative +op-test suite. `UnsqueezeTest` is the export-delegation smoke +test. +""" + +import unittest + +import torch + +from executorch.backends.vulkan.partitioner.vulkan_partitioner import VulkanPartitioner +from executorch.exir import to_edge_transform_and_lower + +# name -> (input_shape, unsqueeze_dim) +CONFIGS = { + "front": ((3, 4), 0), + "mid": ((2, 4), 1), + "last": ((3, 4), 2), +} + + +class UnsqueezeModule(torch.nn.Module): + def __init__(self, dim): + super().__init__() + self.dim = dim + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return torch.unsqueeze(x, self.dim) + + +def _det_input(shape): + g = torch.Generator().manual_seed(0) + return torch.randn(*shape, generator=g, dtype=torch.float32) + + +def _lower(dim, x: torch.Tensor): + ep = torch.export.export(UnsqueezeModule(dim).eval(), (x,)) + return to_edge_transform_and_lower(ep, partitioner=[VulkanPartitioner()]) + + +def _delegated(et) -> bool: + return any( + d.id == "VulkanBackend" + for plan in et.executorch_program.execution_plan + for d in plan.delegates + ) + + +def _op_delegated(edge, op_substr: str) -> bool: + # op must be absorbed into the delegate, not left as a top-level CPU-fallback node. + gm = edge.exported_program().graph_module + return all(op_substr not in str(getattr(n, "target", "")) for n in gm.graph.nodes) + + +class UnsqueezeTest(unittest.TestCase): + def test_export_delegates(self) -> None: + for name, (shape, dim) in CONFIGS.items(): + with self.subTest(name=name): + edge = _lower(dim, _det_input(shape)) + et = edge.to_executorch() + self.assertTrue( + _delegated(et), + f"Expected a VulkanBackend delegate (unsqueeze {name})", + ) + self.assertTrue( + _op_delegated(edge, "unsqueeze_copy"), + f"unsqueeze_copy not delegated (fell back to CPU) for {name}", + ) diff --git a/backends/webgpu/test/tester.py b/backends/webgpu/test/tester.py index 9ba9a4d9ad4..53a745a16df 100644 --- a/backends/webgpu/test/tester.py +++ b/backends/webgpu/test/tester.py @@ -24,6 +24,9 @@ exir_ops.edge.aten.mul.Tensor, exir_ops.edge.aten.view_copy.default, exir_ops.edge.aten.select_copy.int, + exir_ops.edge.aten.sigmoid.default, + exir_ops.edge.aten.squeeze_copy.dims, + exir_ops.edge.aten.unsqueeze_copy.default, ]