Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions backends/webgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ set(WEBGPU_SRCS
runtime/ops/view_copy/ViewCopy.cpp
runtime/ops/select/Select.cpp
runtime/ops/sigmoid/UnaryOp.cpp
runtime/ops/squeeze/Squeeze.cpp
runtime/ops/unsqueeze/Unsqueeze.cpp
)

add_library(webgpu_backend ${WEBGPU_SRCS})
Expand Down
31 changes: 31 additions & 0 deletions backends/webgpu/runtime/ops/squeeze/Squeeze.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/backends/webgpu/runtime/WebGPUGraph.h>
#include <executorch/backends/webgpu/runtime/ops/OperatorRegistry.h>
#include <executorch/backends/webgpu/runtime/ops/view_copy/view_copy.h>

#include <vector>

namespace executorch::backends::webgpu {

namespace {

// squeeze_copy.dims = numel-preserving flat copy (Vulkan Squeeze.cpp:102-104).
void squeeze_copy_dims_impl(WebGPUGraph& graph, const std::vector<int>& args) {
// args: [self, dims, out]; dims ignored (out shape fixed AOT).
add_flat_copy(graph, args.at(0), args.at(args.size() - 1));
}

} // namespace

WEBGPU_REGISTER_OPERATORS {
WEBGPU_REGISTER_OP(aten.squeeze_copy.dims, squeeze_copy_dims_impl);
}

} // namespace executorch::backends::webgpu
31 changes: 31 additions & 0 deletions backends/webgpu/runtime/ops/unsqueeze/Unsqueeze.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/backends/webgpu/runtime/WebGPUGraph.h>
#include <executorch/backends/webgpu/runtime/ops/OperatorRegistry.h>
#include <executorch/backends/webgpu/runtime/ops/view_copy/view_copy.h>

#include <vector>

namespace executorch::backends::webgpu {

namespace {

// unsqueeze_copy = numel-preserving flat copy (Vulkan Unsqueeze.cpp:101-103).
void unsqueeze_copy_impl(WebGPUGraph& graph, const std::vector<int>& args) {
// args: [self, dim, out]; dim ignored (out shape fixed AOT, like view_copy).
add_flat_copy(graph, args.at(0), args.at(args.size() - 1));
}

} // namespace

WEBGPU_REGISTER_OPERATORS {
WEBGPU_REGISTER_OP(aten.unsqueeze_copy.default, unsqueeze_copy_impl);
}

} // namespace executorch::backends::webgpu
67 changes: 67 additions & 0 deletions backends/webgpu/test/op_tests/cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,21 @@
CONFIGS as _SELECT_CONFIGS,
SelectModule,
)
from executorch.backends.webgpu.test.ops.test_sigmoid import (
_det_input as _sigmoid_det_input,
N as _SIGMOID_N,
SigmoidModule,
)

from executorch.backends.webgpu.test.ops.test_squeeze import (
CONFIGS as _SQUEEZE_CONFIGS,
SqueezeModule,
)

from executorch.backends.webgpu.test.ops.test_unsqueeze import (
CONFIGS as _UNSQUEEZE_CONFIGS,
UnsqueezeModule,
)
from executorch.backends.webgpu.test.ops.test_view_copy import (
CONFIGS as _VIEW_CONFIGS,
ViewModule,
Expand Down Expand Up @@ -153,3 +168,55 @@ def _view_copy_suite() -> WebGPUTestSuite:
@register_op_test("select")
def _select_suite() -> WebGPUTestSuite:
return _fn_config_suite(SelectModule, _SELECT_CONFIGS)


def _sigmoid_full_range(_shape) -> torch.Tensor:
# Reuses the monolith's saturation-tail input (linspace(-12, 12)).
return _sigmoid_det_input()


@register_op_test("sigmoid")
def _sigmoid_suite() -> WebGPUTestSuite:
# sigmoid has no CONFIGS table; cover unary shapes directly (tol 1e-4).
return WebGPUTestSuite(
module_factory=lambda: SigmoidModule(),
cases=[
Case(name="vec", inputs=((M1,),)),
Case(name="mat", inputs=((M1, M2),)),
Case(name="rank3", inputs=((S1, M1, M2),)),
Case(name="rank4", inputs=((S1, S2, S2, M2),)),
# Saturation tails sigmoid(+-12) (~6e-6 / 0.999994) that randn shapes miss.
Case(
name="saturation",
inputs=(InputSpec(shape=(_SIGMOID_N,), gen=_sigmoid_full_range),),
),
],
atol=1e-4,
rtol=1e-4,
)


@register_op_test("squeeze")
def _squeeze_suite() -> WebGPUTestSuite:
# CONFIGS: name -> (shape, dim) where dim is an int or a tuple.
return WebGPUTestSuite(
module_factory=lambda dim: SqueezeModule(dim),
cases=[
Case(name=n, construct={"dim": dim}, inputs=(shape,))
for n, (shape, dim) in _SQUEEZE_CONFIGS.items()
],
golden_dtype="float32", # reshape copies values; fp64 bit-identical
)


@register_op_test("unsqueeze")
def _unsqueeze_suite() -> WebGPUTestSuite:
# CONFIGS: name -> (shape, dim).
return WebGPUTestSuite(
module_factory=lambda dim: UnsqueezeModule(dim),
cases=[
Case(name=n, construct={"dim": dim}, inputs=(shape,))
for n, (shape, dim) in _UNSQUEEZE_CONFIGS.items()
],
golden_dtype="float32", # reshape copies values; fp64 bit-identical
)
51 changes: 51 additions & 0 deletions backends/webgpu/test/ops/test_sigmoid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""`aten.sigmoid.default` module + input for the WebGPU op-test framework.

`SigmoidModule`, `N`, and `_det_input` are imported by `cases.py` to drive the
declarative op-test suite. `SigmoidTest` is the export-delegation
smoke test. Sigmoid is on the Llama critical path (`F.silu` -> `sigmoid` + `mul`); the
deterministic input spans the saturation tails.
"""

import unittest

import torch

from executorch.backends.vulkan.partitioner.vulkan_partitioner import VulkanPartitioner
from executorch.exir import to_edge_transform_and_lower

# Input length; the deterministic input spans the saturation tails.
N = 64


class SigmoidModule(torch.nn.Module):
def forward(self, x: torch.Tensor) -> torch.Tensor:
return torch.sigmoid(x)


def _det_input() -> torch.Tensor:
"""Deterministic fp32 input spanning negatives, zero, and large magnitudes."""
return torch.linspace(-12.0, 12.0, N, dtype=torch.float32)


def _export(m: torch.nn.Module, x: torch.Tensor):
ep = torch.export.export(m, (x,))
return to_edge_transform_and_lower(
ep, partitioner=[VulkanPartitioner()]
).to_executorch()


class SigmoidTest(unittest.TestCase):
def test_export_delegates(self) -> None:
et = _export(SigmoidModule().eval(), _det_input())
found = any(
d.id == "VulkanBackend"
for plan in et.executorch_program.execution_plan
for d in plan.delegates
)
self.assertTrue(found, "Expected a VulkanBackend delegate (sigmoid)")
75 changes: 75 additions & 0 deletions backends/webgpu/test/ops/test_squeeze.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""`aten.squeeze_copy.dims` module + configs for the WebGPU op-test framework.
`SqueezeModule` + `CONFIGS` are imported by `cases.py` to drive the declarative
op-test suite. `SqueezeTest` is the export-delegation smoke
test.
"""

import unittest

import torch

from executorch.backends.vulkan.partitioner.vulkan_partitioner import VulkanPartitioner
from executorch.exir import to_edge_transform_and_lower

# name -> (input_shape, squeeze_dim)
CONFIGS = {
"dim0": ((1, 3, 4), 0),
"mid": ((2, 1, 4), 1),
"multi": ((1, 3, 1, 4), (0, 2)),
}


class SqueezeModule(torch.nn.Module):
def __init__(self, dim):
super().__init__()
self.dim = dim

def forward(self, x: torch.Tensor) -> torch.Tensor:
return torch.squeeze(x, self.dim)


def _det_input(shape):
g = torch.Generator().manual_seed(0)
return torch.randn(*shape, generator=g, dtype=torch.float32)


def _lower(dim, x: torch.Tensor):
ep = torch.export.export(SqueezeModule(dim).eval(), (x,))
return to_edge_transform_and_lower(ep, partitioner=[VulkanPartitioner()])


def _delegated(et) -> bool:
return any(
d.id == "VulkanBackend"
for plan in et.executorch_program.execution_plan
for d in plan.delegates
)


def _op_delegated(edge, op_substr: str) -> bool:
# op must be absorbed into the delegate, not left as a CPU-fallback node.
gm = edge.exported_program().graph_module
return all(op_substr not in str(getattr(n, "target", "")) for n in gm.graph.nodes)


class SqueezeTest(unittest.TestCase):
def test_export_delegates(self) -> None:
for name, (shape, dim) in CONFIGS.items():
with self.subTest(name=name):
edge = _lower(dim, _det_input(shape))
et = edge.to_executorch()
self.assertTrue(
_delegated(et),
f"Expected a VulkanBackend delegate (squeeze {name})",
)
self.assertTrue(
_op_delegated(edge, "squeeze_copy"),
f"squeeze_copy not delegated (fell back to CPU) for {name}",
)
75 changes: 75 additions & 0 deletions backends/webgpu/test/ops/test_unsqueeze.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""`aten.unsqueeze_copy.default` module + configs for the WebGPU op-test framework.
`UnsqueezeModule` + `CONFIGS` are imported by `cases.py` to drive the declarative
op-test suite. `UnsqueezeTest` is the export-delegation smoke
test.
"""

import unittest

import torch

from executorch.backends.vulkan.partitioner.vulkan_partitioner import VulkanPartitioner
from executorch.exir import to_edge_transform_and_lower

# name -> (input_shape, unsqueeze_dim)
CONFIGS = {
"front": ((3, 4), 0),
"mid": ((2, 4), 1),
"last": ((3, 4), 2),
}


class UnsqueezeModule(torch.nn.Module):
def __init__(self, dim):
super().__init__()
self.dim = dim

def forward(self, x: torch.Tensor) -> torch.Tensor:
return torch.unsqueeze(x, self.dim)


def _det_input(shape):
g = torch.Generator().manual_seed(0)
return torch.randn(*shape, generator=g, dtype=torch.float32)


def _lower(dim, x: torch.Tensor):
ep = torch.export.export(UnsqueezeModule(dim).eval(), (x,))
return to_edge_transform_and_lower(ep, partitioner=[VulkanPartitioner()])


def _delegated(et) -> bool:
return any(
d.id == "VulkanBackend"
for plan in et.executorch_program.execution_plan
for d in plan.delegates
)


def _op_delegated(edge, op_substr: str) -> bool:
# op must be absorbed into the delegate, not left as a top-level CPU-fallback node.
gm = edge.exported_program().graph_module
return all(op_substr not in str(getattr(n, "target", "")) for n in gm.graph.nodes)


class UnsqueezeTest(unittest.TestCase):
def test_export_delegates(self) -> None:
for name, (shape, dim) in CONFIGS.items():
with self.subTest(name=name):
edge = _lower(dim, _det_input(shape))
et = edge.to_executorch()
self.assertTrue(
_delegated(et),
f"Expected a VulkanBackend delegate (unsqueeze {name})",
)
self.assertTrue(
_op_delegated(edge, "unsqueeze_copy"),
f"unsqueeze_copy not delegated (fell back to CPU) for {name}",
)
3 changes: 3 additions & 0 deletions backends/webgpu/test/tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
exir_ops.edge.aten.mul.Tensor,
exir_ops.edge.aten.view_copy.default,
exir_ops.edge.aten.select_copy.int,
exir_ops.edge.aten.sigmoid.default,
exir_ops.edge.aten.squeeze_copy.dims,
exir_ops.edge.aten.unsqueeze_copy.default,
]


Expand Down
Loading