This is an automated email from the ASF dual-hosted git repository.
ryankert01 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/mahout.git
The following commit(s) were added to refs/heads/main by this push:
new 54b0f2363 fix(qdp): fall back to CPU when GPU arch not in PyTorch's
compiled list (#1323)
54b0f2363 is described below
commit 54b0f23633b5ffcd51479ec272a305b099c37d2e
Author: Andrew Musselman <[email protected]>
AuthorDate: Mon May 18 04:48:43 2026 -0700
fix(qdp): fall back to CPU when GPU arch not in PyTorch's compiled list
(#1323)
---
qdp/qdp-python/qumat_qdp/api.py | 11 ++------
qdp/qdp-python/qumat_qdp/loader.py | 53 ++++++++++++++++++++++++++++++------
testing/qdp_python/test_torch_ref.py | 32 ++++++++++++++++++++--
3 files changed, 76 insertions(+), 20 deletions(-)
diff --git a/qdp/qdp-python/qumat_qdp/api.py b/qdp/qdp-python/qumat_qdp/api.py
index 6493dd0f3..e1e8b20e3 100644
--- a/qdp/qdp-python/qumat_qdp/api.py
+++ b/qdp/qdp-python/qumat_qdp/api.py
@@ -189,17 +189,10 @@ class QdpBenchmark:
def _run_throughput_pytorch(self) -> ThroughputResult:
import torch
+ from qumat_qdp.loader import _select_torch_device
from qumat_qdp.torch_ref import encode
- if torch.cuda.is_available():
- if self._device_id < 0 or self._device_id >=
torch.cuda.device_count():
- raise ValueError(
- f"Invalid CUDA device_id {self._device_id}; "
- f"{torch.cuda.device_count()} device(s) available."
- )
- device = f"cuda:{self._device_id}"
- else:
- device = "cpu"
+ device = _select_torch_device(torch, self._device_id)
# _validate() guarantees these are not None.
assert self._num_qubits is not None
assert self._total_batches is not None
diff --git a/qdp/qdp-python/qumat_qdp/loader.py
b/qdp/qdp-python/qumat_qdp/loader.py
index a3443f1ba..34fae6a1a 100644
--- a/qdp/qdp-python/qumat_qdp/loader.py
+++ b/qdp/qdp-python/qumat_qdp/loader.py
@@ -75,6 +75,49 @@ _BACKEND_AUTO = "auto"
_VALID_BACKENDS = frozenset({_BACKEND_RUST, _BACKEND_PYTORCH, _BACKEND_AUTO})
+def _select_torch_device(torch, device_id: int) -> str:
+ """Pick a torch device the current PyTorch build can actually use.
+
+ ``torch.cuda.is_available()`` returns True whenever a usable driver and at
+ least one GPU are present, but does not check whether the GPU's compute
+ capability is in the PyTorch wheel's compiled arch list. Running on an
+ unsupported GPU surfaces as ``cudaErrorNoKernelImageForDevice`` the first
+ time a kernel launches -- a particularly opaque failure for users on
+ Pascal-and-earlier hardware where recent PyTorch wheels no longer ship
+ matching kernels.
+
+ Intersect the device's capability with ``torch.cuda.get_arch_list()`` and
+ fall back to CPU (with a warning) when they don't match. Raises
+ ``ValueError`` on an out-of-range ``device_id`` to preserve the prior
+ contract for callers that explicitly request a specific GPU.
+ """
+ if not torch.cuda.is_available():
+ return "cpu"
+
+ if device_id < 0 or device_id >= torch.cuda.device_count():
+ raise ValueError(
+ f"Invalid CUDA device_id {device_id}; "
+ f"{torch.cuda.device_count()} device(s) available."
+ )
+
+ arch_list = torch.cuda.get_arch_list()
+ if arch_list:
+ major, minor = torch.cuda.get_device_capability(device_id)
+ device_arch = f"sm_{major}{minor}"
+ if device_arch not in arch_list:
+ warnings.warn(
+ f"GPU {device_id} ({torch.cuda.get_device_name(device_id)}, "
+ f"{device_arch}) is not in this PyTorch build's supported "
+ f"arch list ({sorted(arch_list)}). Falling back to CPU. "
+ "Install a PyTorch wheel that targets this GPU, or set "
+ "CUDA_VISIBLE_DEVICES= to silence this warning.",
+ stacklevel=2,
+ )
+ return "cpu"
+
+ return f"cuda:{device_id}"
+
+
def _path_extension(path: str) -> str:
"""Return the lowercase extension of `path` (handling remote
URLs/queries)."""
is_remote = "://" in path
@@ -478,15 +521,7 @@ class QuantumDataLoader:
from qumat_qdp.torch_ref import encode
- if torch.cuda.is_available():
- if self._device_id < 0 or self._device_id >=
torch.cuda.device_count():
- raise ValueError(
- f"Invalid CUDA device_id {self._device_id}; "
- f"{torch.cuda.device_count()} device(s) available."
- )
- device = f"cuda:{self._device_id}"
- else:
- device = "cpu"
+ device = _select_torch_device(torch, self._device_id)
if use_synthetic:
return self._pytorch_synthetic_iter(torch, encode, device)
diff --git a/testing/qdp_python/test_torch_ref.py
b/testing/qdp_python/test_torch_ref.py
index c6c49883b..0fe3fffd1 100644
--- a/testing/qdp_python/test_torch_ref.py
+++ b/testing/qdp_python/test_torch_ref.py
@@ -36,6 +36,28 @@ from qumat_qdp.torch_ref import (
iqp_encode,
)
+
+def _torch_cuda_usable(device_id: int = 0) -> bool:
+ """True iff the current PyTorch build can launch kernels on ``device_id``.
+
+ ``torch.cuda.is_available()`` alone is not enough: on GPUs whose compute
+ capability isn't in the wheel's compiled arch list (e.g. Pascal sm_61
+ against a recent wheel that ships sm_70+), it returns True but every
+ kernel launch fails with ``cudaErrorNoKernelImageForDevice``. Mirror
+ ``qumat_qdp.loader._select_torch_device``'s capability check so the
+ GPU-only tests skip cleanly instead of erroring.
+ """
+ if not torch.cuda.is_available():
+ return False
+ if device_id < 0 or device_id >= torch.cuda.device_count():
+ return False
+ arch_list = torch.cuda.get_arch_list()
+ if not arch_list:
+ return True
+ major, minor = torch.cuda.get_device_capability(device_id)
+ return f"sm_{major}{minor}" in arch_list
+
+
# ---------------------------------------------------------------------------
# Amplitude encoding
# ---------------------------------------------------------------------------
@@ -349,7 +371,10 @@ class TestDevicePlacement:
result = amplitude_encode(data, num_qubits=2, device="cpu")
assert result.device.type == "cpu"
- @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not
available")
+ @pytest.mark.skipif(
+ not _torch_cuda_usable(),
+ reason="CUDA not available or GPU compute capability not supported by
this PyTorch build",
+ )
def test_gpu_output(self):
data = torch.randn(2, 4, dtype=torch.float64)
result = amplitude_encode(data, num_qubits=2, device="cuda:0")
@@ -369,7 +394,10 @@ class TestCrossValidation:
pytest.importorskip("_qdp")
@pytest.mark.gpu
- @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not
available")
+ @pytest.mark.skipif(
+ not _torch_cuda_usable(),
+ reason="CUDA not available or GPU compute capability not supported by
this PyTorch build",
+ )
@pytest.mark.parametrize("encoding", ["amplitude", "angle", "basis",
"iqp"])
def test_encoding_matches_rust(self, encoding):
import _qdp