Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions cuda_core/tests/graph/test_graph_memory_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,36 @@ def test_gmr_check_capture_state(mempool_device, mode):
gb.end_building().complete()


def test_graph_memory_resource_attributes_direct_init_raises():
"""GraphMemoryResourceAttributes cannot be constructed directly."""
from cuda.core._memory._graph_memory_resource import GraphMemoryResourceAttributes

with pytest.raises(RuntimeError, match="cannot be instantiated directly"):
GraphMemoryResourceAttributes()


def test_graph_memory_resource_accessibility_flags(init_cuda):
"""GraphMemoryResource exposes expected accessibility flags and device_id."""
device = Device()
gmr = GraphMemoryResource(device)
assert gmr.is_device_accessible is True
assert gmr.is_host_accessible is False
assert gmr.device_id == int(device)


def test_graph_memory_resource_attributes_repr(mempool_device):
"""GraphMemoryResourceAttributes.__repr__ includes the class name and the 4 documented attributes."""
device = mempool_device
gmr = GraphMemoryResource(device)
r = repr(gmr.attributes)
assert r.startswith("GraphMemoryResourceAttributes(")
assert r.endswith(")")
assert "reserved_mem_current=" in r
assert "reserved_mem_high=" in r
assert "used_mem_current=" in r
assert "used_mem_high=" in r


@pytest.mark.parametrize("mode", ["global", "thread_local", "relaxed"])
def test_dmr_check_capture_state(mempool_device, mode):
"""
Expand Down
60 changes: 59 additions & 1 deletion cuda_core/tests/test_launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,64 @@ def test_launch_config_native_conversion(init_cuda):
pytest.skip("Driver or GPU not new enough for thread block clusters")


def test_to_native_launch_config_no_cluster():
"""Covers the no-cluster path of _to_native_launch_config; no Hopper+ required."""
from cuda.core._launch_config import _to_native_launch_config

config = LaunchConfig(grid=(4, 5, 6), block=(7, 8, 9), shmem_size=128)
native = _to_native_launch_config(config)
assert native.gridDimX == 4, f"Expected gridDimX=4, got {native.gridDimX}"
assert native.gridDimY == 5, f"Expected gridDimY=5, got {native.gridDimY}"
assert native.gridDimZ == 6, f"Expected gridDimZ=6, got {native.gridDimZ}"
assert native.blockDimX == 7, f"Expected blockDimX=7, got {native.blockDimX}"
assert native.blockDimY == 8, f"Expected blockDimY=8, got {native.blockDimY}"
assert native.blockDimZ == 9, f"Expected blockDimZ=9, got {native.blockDimZ}"
assert native.sharedMemBytes == 128, f"Expected sharedMemBytes=128, got {native.sharedMemBytes}"
assert native.numAttrs == 0, f"Expected numAttrs=0, got {native.numAttrs}"
assert list(native.attrs) == [], f"Expected empty attrs, got {list(native.attrs)}"


def test_launch_config_cooperative_unsupported(monkeypatch):
"""LaunchConfig(is_cooperative=True) raises when device does not support it."""
from cuda.core import _launch_config as _lc_mod

class _FakeProps:
cooperative_launch = False

class _FakeDev:
properties = _FakeProps()

monkeypatch.setattr(_lc_mod, "Device", lambda: _FakeDev())
with pytest.raises(CUDAError, match="cooperative kernels are not supported"):
LaunchConfig(grid=1, block=1, is_cooperative=True)


def test_to_native_launch_config_cooperative(monkeypatch):
"""Covers the is_cooperative branch of _to_native_launch_config; Device is mocked so it runs on any GPU."""
from cuda.bindings import driver
from cuda.core import _launch_config as _lc_mod
from cuda.core._launch_config import _to_native_launch_config

class _FakeProps:
cooperative_launch = True

class _FakeDev:
properties = _FakeProps()

monkeypatch.setattr(_lc_mod, "Device", lambda: _FakeDev())

config = LaunchConfig(grid=2, block=4, is_cooperative=True)
native = _to_native_launch_config(config)
assert native.gridDimX == 2
assert native.blockDimX == 4
assert native.numAttrs == 1
attr = native.attrs[0]
assert attr.id == driver.CUlaunchAttributeID.CU_LAUNCH_ATTRIBUTE_COOPERATIVE, (
f"Expected CU_LAUNCH_ATTRIBUTE_COOPERATIVE, got {attr.id}"
)
assert attr.value.cooperative == 1, f"Expected cooperative=1, got {attr.value.cooperative}"


def test_launch_invalid_values(init_cuda):
code = 'extern "C" __global__ void my_kernel() {}'
program = Program(code, SourceCodeType.CXX)
Expand Down Expand Up @@ -403,7 +461,7 @@ class MyFloat(ctypes.c_float):
class MyBool(ctypes.c_bool):
pass

# These should NOT raise they should be handled via isinstance fallback
# These should NOT raise; they should be handled via isinstance fallback
holder = ParamHolder([MyInt32(42), MyFloat(3.14), MyBool(True)])
assert holder.ptr != 0

Expand Down
90 changes: 90 additions & 0 deletions cuda_core/tests/test_linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,3 +280,93 @@ def test_which_backend_is_not_property(self):
"""
attr = inspect.getattr_static(Linker, "which_backend")
assert not isinstance(attr, property)


@pytest.fixture
def driver_binding(monkeypatch):
"""Pin _linker._driver to the real driver module so driver-backend tests run under any backend."""
from cuda.bindings import driver

monkeypatch.setattr(_linker, "_driver", driver)
return driver


def test_prepare_driver_options_all_supported(driver_binding):
"""Exercise every supported branch of _prepare_driver_options."""
driver = driver_binding
opts = LinkerOptions(
arch="sm_80",
max_register_count=32,
verbose=True,
link_time_optimization=True,
optimization_level=2,
debug=True,
lineinfo=True,
no_cache=True,
)
formatted, keys = opts._prepare_driver_options()
assert len(formatted) == len(keys)
assert len(keys) == 4 + 8 # 4 fixed log-buffer entries + 8 options set above

# Skip log-buffer entries; verify key-to-value mapping (catches swap/dup/wrong-value).
payload_keys = keys[4:]
assert len(set(payload_keys)) == len(payload_keys), f"duplicate option keys: {payload_keys}"
option_to_value = dict(zip(payload_keys, formatted[4:]))
assert option_to_value[driver.CUjit_option.CU_JIT_TARGET] == driver.CUjit_target.CU_TARGET_COMPUTE_80
assert option_to_value[driver.CUjit_option.CU_JIT_MAX_REGISTERS] == 32
assert option_to_value[driver.CUjit_option.CU_JIT_LOG_VERBOSE] == 1
assert option_to_value[driver.CUjit_option.CU_JIT_LTO] == 1
assert option_to_value[driver.CUjit_option.CU_JIT_OPTIMIZATION_LEVEL] == 2
assert option_to_value[driver.CUjit_option.CU_JIT_GENERATE_DEBUG_INFO] == 1
assert option_to_value[driver.CUjit_option.CU_JIT_GENERATE_LINE_INFO] == 1
assert option_to_value[driver.CUjit_option.CU_JIT_CACHE_MODE] == driver.CUjit_cacheMode.CU_JIT_CACHE_OPTION_NONE


@pytest.mark.parametrize(
"kwargs,match",
[
({"ftz": True}, "ftz option is deprecated"),
({"prec_div": True}, "prec_div option is deprecated"),
({"prec_sqrt": True}, "prec_sqrt option is deprecated"),
({"fma": True}, "fma options is deprecated"),
({"kernels_used": "my_kernel"}, "kernels_used is deprecated"),
({"variables_used": "my_var"}, "variables_used is deprecated"),
({"optimize_unused_variables": True}, "optimize_unused_variables is deprecated"),
],
)
def test_prepare_driver_options_deprecated_warnings(driver_binding, kwargs, match):
"""Each driver-deprecated option emits a DeprecationWarning."""
opts = LinkerOptions(**kwargs)
with pytest.warns(DeprecationWarning, match=match):
opts._prepare_driver_options()


@pytest.mark.parametrize(
"kwargs,match",
[
({"time": True}, "time option is not supported by the driver API"),
({"ptx": True}, "ptx option is not supported by the driver API"),
({"ptxas_options": ["-v"]}, "ptxas_options option is not supported by the driver API"),
({"split_compile": 0}, "split_compile option is not supported by the driver API"),
({"split_compile_extended": 1}, "split_compile_extended option is not supported by the driver API"),
],
)
def test_prepare_driver_options_unsupported_raises(driver_binding, kwargs, match):
"""Each nvjitlink-only option raises ValueError on the driver backend."""
opts = LinkerOptions(**kwargs)
with pytest.raises(ValueError, match=match):
opts._prepare_driver_options()


def test_linker_empty_object_codes_raises():
"""Linker with no ObjectCode raises ValueError."""
with pytest.raises(ValueError, match="At least one ObjectCode object must be provided"):
Linker()


def test_as_bytes_nvjitlink_unavailable(monkeypatch):
"""as_bytes('nvjitlink') raises RuntimeError when the backend is unavailable."""
monkeypatch.setattr(_linker, "_use_nvjitlink_backend", False)
opts = LinkerOptions(arch="sm_80")
with pytest.raises(RuntimeError, match="nvJitLink backend is not available"):
opts.as_bytes("nvjitlink")
134 changes: 134 additions & 0 deletions cuda_core/tests/test_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
DeviceMemoryResource,
DeviceMemoryResourceOptions,
GraphMemoryResource,
LegacyPinnedMemoryResource,
ManagedMemoryResource,
ManagedMemoryResourceOptions,
MemoryResource,
Expand Down Expand Up @@ -1760,3 +1761,136 @@ def test_top_level_namespace_excludes_known_leaks():
public = {n for n in dir(cuda.core) if not n.startswith("_")}
leaked = {"StridedMemoryView", "args_viewable_as_strided_memory"}
assert not (public & leaked)


def test_legacy_pinned_allocate_zero_size(init_cuda):
"""LegacyPinnedMemoryResource.allocate(0) skips the driver call and uses ptr=0."""
mr = LegacyPinnedMemoryResource()
buf = mr.allocate(0)
assert buf.size == 0
# No driver call was made; handle is the sentinel 0.
assert int(buf.handle) == 0


def test_legacy_pinned_device_id_raises():
"""LegacyPinnedMemoryResource.device_id raises; pinned memory is not bound to a GPU."""
mr = LegacyPinnedMemoryResource()
with pytest.raises(RuntimeError, match="not bound to any GPU"):
_ = mr.device_id


def test_synchronous_memory_resource_basic(init_cuda):
"""_SynchronousMemoryResource exercises properties and allocate paths (zero, non-zero, with-stream)."""
from cuda.core._memory._legacy import _SynchronousMemoryResource

dev = Device()
mr = _SynchronousMemoryResource(dev.device_id)
assert mr.is_device_accessible is True
assert mr.is_host_accessible is False
assert mr.device_id == dev.device_id

# Zero-size allocation takes the ptr=0 fast path.
zero_buf = mr.allocate(0)
assert zero_buf.size == 0
assert int(zero_buf.handle) == 0
zero_buf.close(stream=None)

# Non-zero allocation goes through cuMemAlloc; close with stream=None for
# the simple path. The explicit-stream close path is covered separately.
buf = mr.allocate(64)
assert buf.size == 64
assert int(buf.handle) != 0
buf.close(stream=None)

# allocate(size, stream=stream) exercises Stream_accept validation on the
# allocate side (cuMemAlloc is synchronous so the stream is accepted but unused).
stream = dev.create_stream()
buf2 = mr.allocate(32, stream=stream)
assert buf2.size == 32
assert int(buf2.handle) != 0
buf2.close(stream=None)
stream.close()


def test_synchronous_memory_resource_deallocate_accepts_stream(init_cuda):
"""_SynchronousMemoryResource.deallocate accepts an explicit stream."""
from cuda.core._memory._legacy import _SynchronousMemoryResource

dev = Device()
mr = _SynchronousMemoryResource(dev.device_id)
buf = mr.allocate(64)
stream = dev.create_stream()
buf.close(stream=stream)
stream.close()


@pytest.mark.parametrize(
("method", "spec", "match"),
[
("_access_to_flags", "bogus", "Unknown access spec"),
("_allocation_type_to_driver", "bogus", "Unsupported allocation_type"),
("_location_type_to_driver", "bogus", "Unsupported location_type"),
("_handle_type_to_driver", "bogus", "Unsupported handle_type"),
("_granularity_to_driver", "bogus", "Unsupported granularity"),
],
)
def test_vmm_options_spec_validators_raise(method, spec, match):
"""Every VMM spec validator static method rejects unknown strings with ValueError."""
fn = getattr(VirtualMemoryResourceOptions, method)
with pytest.raises(ValueError, match=match):
fn(spec)


def test_vmm_options_handle_type_win32_raises():
"""_handle_type_to_driver raises NotImplementedError for 'win32'."""
with pytest.raises(NotImplementedError, match="win32 is currently not supported"):
VirtualMemoryResourceOptions._handle_type_to_driver("win32")


def test_device_memory_resource_peer_accessible_by_non_owned(mempool_device):
"""peer_accessible_by on a non-owned (default) DMR queries the driver live."""
dev = mempool_device
# The default DeviceMemoryResource(device) wraps the current device's
# default pool, i.e. _mempool_owned is False, so accessing
# peer_accessible_by exercises the live _DMR_query_peer_access path.
mr = DeviceMemoryResource(dev)
peers = mr.peer_accessible_by
assert all(isinstance(p, Device) for p in peers)
# __contains__ accepts int dev_ids; the owning device is never a peer.
assert dev.device_id not in peers


def test_dmr_mempool_get_access_self(mempool_device):
"""DMR_mempool_get_access returns 'rw' when querying the owning device itself."""
from cuda.core._memory._device_memory_resource import DMR_mempool_get_access

mr = DeviceMemoryResource(mempool_device)
# The owning device always has read-write access to its own pool.
assert DMR_mempool_get_access(mr, mempool_device.device_id) == "rw"


def test_dmr_mempool_get_access_peer(mempool_device_x2):
"""DMR_mempool_get_access reflects peer access state for a different device."""
from cuda.core._memory._device_memory_resource import DMR_mempool_get_access

dev, peer = mempool_device_x2
# Use an owned pool so peer-access state isn't contaminated by other tests
# that may have set access on the device's default pool.
mr = DeviceMemoryResource(dev, DeviceMemoryResourceOptions())

# Fresh owned pool: peer has no access.
assert DMR_mempool_get_access(mr, peer.device_id) == ""
# After granting access, peer has read-write.
mr.peer_accessible_by = [peer]
assert DMR_mempool_get_access(mr, peer.device_id) == "rw"
# After revoking, peer is back to no access.
mr.peer_accessible_by = []
assert DMR_mempool_get_access(mr, peer.device_id) == ""


def test_dmr_peer_accessible_by_setter_empty(mempool_device):
"""Assigning an empty peer-access set to a fresh owned pool is a no-op."""
mr = DeviceMemoryResource(mempool_device, options=DeviceMemoryResourceOptions())
assert set(mr.peer_accessible_by) == set()
mr.peer_accessible_by = []
assert set(mr.peer_accessible_by) == set()
43 changes: 43 additions & 0 deletions cuda_core/tests/test_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -830,6 +830,49 @@ def test_extra_sources_empty_source():
ProgramOptions(name="test", arch="sm_80", extra_sources=[("mod", b"")])


@pytest.mark.parametrize(
("extra_sources", "expected"),
[
(None, None),
([("mod_s", "kernel-as-string")], [(b"mod_s", b"kernel-as-string")]),
(
[("mod_ba", bytearray(b"\x00\x01module-as-bytearray"))],
[(b"mod_ba", b"\x00\x01module-as-bytearray")],
),
([("mod_b", b"\x00\x01module-as-bytes")], [(b"mod_b", b"\x00\x01module-as-bytes")]),
],
ids=["none", "str", "bytearray", "bytes"],
)
def test_prepare_extra_sources_bytes(extra_sources, expected):
"""_prepare_extra_sources_bytes converts each input type to (bytes, bytes) tuples (None passthrough)."""
# arch is set to skip __post_init__'s Device() lookup, keeping this a pure unit test.
opts = ProgramOptions(name="t", arch="sm_80", extra_sources=extra_sources)
result = opts._prepare_extra_sources_bytes()
assert result == expected
# bytearray == bytes by content, so == alone misses type regressions.
if result is not None:
for name, source in result:
assert isinstance(name, bytes), f"name should be bytes, got {type(name).__name__}"
assert isinstance(source, bytes), f"source should be bytes, got {type(source).__name__}"


def test_find_libdevice_path_delegates_to_pathfinder(monkeypatch):
"""_find_libdevice_path calls cuda.pathfinder.find_bitcode_lib('device') and returns its result."""
import cuda.pathfinder
from cuda.core import _program

captured = []
sentinel = "/fake/path/libdevice.10.bc"

def fake_find(name):
captured.append(name)
return sentinel

monkeypatch.setattr(cuda.pathfinder, "find_bitcode_lib", fake_find)
assert _program._find_libdevice_path() == sentinel
assert captured == ["device"]


def test_nvrtc_compile_with_logs_capture(init_cuda):
"""Program.compile with logs= exercises the NVRTC program-log reading path."""
import io
Expand Down
Loading
Loading