Skip to content
Open
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ This release is compatible with NumPy 2.5.
* Cleaned up Python bindings for indexing functions, renaming `usm_ndarray_take` and `usm_ndarray_put` to `py_take` and `py_put` and refactoring validation [#2935](https://github.com/IntelPython/dpnp/pull/2935)
* Updated `dpnp.linalg.eig` and `dpnp.linalg.eigvals` documentation to reflect NumPy's always-complex eigenvalue output for general matrices [#2953](https://github.com/IntelPython/dpnp/pull/2953)
* Clarified support for negative axes in `dpnp.transpose`/`dpnp.permute_dims` documentation [#2940](https://github.com/IntelPython/dpnp/pull/2940)
* Improved performance of `dpnp.fft` functions for complex strided input by avoiding oversized allocations and extra copies [#2939](https://github.com/IntelPython/dpnp/pull/2939)

### Deprecated

Expand Down
26 changes: 21 additions & 5 deletions dpnp/fft/dpnp_utils_fft.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,12 +408,29 @@ def _fft(a, norm, out, forward, in_place, c2c, axes, batch_fft=True):
a = dpnp.reshape(a, local_shape)
index = 1

elem_strides = dpnp.get_usm_ndarray(a).strides

if not a.flags.c_contiguous:
# cuFFT requires input arrays to be C-contiguous (row-major)
# for correct execution
if (
dpnp.is_cuda_backend(a) and not a.flags.c_contiguous
): # pragma: no cover
if dpnp.is_cuda_backend(a): # pragma: no cover
Comment thread
vlad-perevezentsev marked this conversation as resolved.
a = dpnp.ascontiguousarray(a)
else:
# Check if the memory footprint of the strides exceeds
# the number of elements.
# If so, copy to contiguous to avoid oversized allocation
# for the output array and unnecessary copy to contiguous
# after oneMKL FFT
a_shape = a.shape
# Max element displacement reachable by positive strides.
# Negative strides are handled by _copy_array;
# zero strides are safely ignored as they reuse the same
# memory location and don't extend the footprint
max_disp = sum(
st * (sh - 1) for st, sh in zip(elem_strides, a_shape) if st > 0
)
if (max_disp + 1) > a.size:
Comment thread
vlad-perevezentsev marked this conversation as resolved.
a = dpnp.ascontiguousarray(a)
Comment thread
ndgrigorian marked this conversation as resolved.

# w/a for cuFFT to avoid "Invalid strides" error when
# the last dimension is 1 and there are multiple axes
Expand All @@ -424,8 +441,7 @@ def _fft(a, norm, out, forward, in_place, c2c, axes, batch_fft=True):
if cufft_wa: # pragma: no cover
a = dpnp.moveaxis(a, -1, -2)

strides = dpnp.get_usm_ndarray(a).strides
a_strides = _standardize_strides_to_nonzero(strides, a.shape)
a_strides = _standardize_strides_to_nonzero(elem_strides, a.shape)
dsc, out_strides = _commit_descriptor(
a, forward, in_place, c2c, a_strides, index, batch_fft
)
Expand Down
20 changes: 20 additions & 0 deletions dpnp/tests/test_fft.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,26 @@ def test_strided_2d(self, stride_x, stride_y):
expected = numpy.fft.fft(a)
assert_dtype_allclose(result, expected)

def test_non_contiguous_no_copy(self):
a = generate_random_numpy_array((4, 5, 6), dtype=numpy.complex64)
# Non-contiguous input with compact footprint (no copy needed)
ia = dpnp.moveaxis(dpnp.array(a), 0, -1)
a_np = dpnp.asnumpy(ia)
Comment thread
antonwolfy marked this conversation as resolved.

result = dpnp.fft.fft(ia)
expected = numpy.fft.fft(a_np)
assert_dtype_allclose(result, expected)

@pytest.mark.parametrize("slc", [numpy.s_[::2, :], numpy.s_[:, ::3]])
def test_non_contiguous_with_copy(self, slc):
# Strided input with oversized footprint (triggers copy)
a = generate_random_numpy_array((10, 12), dtype=numpy.complex64)
ia = dpnp.array(a)[slc]

result = dpnp.fft.fft(ia)
expected = numpy.fft.fft(a[slc])
assert_dtype_allclose(result, expected)

def test_empty_array(self):
a = numpy.empty((10, 0, 4), dtype=numpy.complex64)
ia = dpnp.array(a)
Expand Down
Loading