This is an automated email from the ASF dual-hosted git repository.
400Ping pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/mahout.git
The following commit(s) were added to refs/heads/main by this push:
new 599a2616f docs: refresh QDP Python API docstrings (#1335)
599a2616f is described below
commit 599a2616f7e1902468b5ef04c41abe955e564f21
Author: Vic Wen <[email protected]>
AuthorDate: Wed Jun 3 09:47:35 2026 +0800
docs: refresh QDP Python API docstrings (#1335)
---
qdp/qdp-python/qumat_qdp/api.py | 61 +++++++++++++++--
qdp/qdp-python/qumat_qdp/backend.py | 17 ++++-
qdp/qdp-python/qumat_qdp/loader.py | 119 ++++++++++++++++++++++++++++-----
qdp/qdp-python/qumat_qdp/tensor.py | 15 ++++-
qdp/qdp-python/qumat_qdp/triton_amd.py | 20 +++++-
5 files changed, 206 insertions(+), 26 deletions(-)
diff --git a/qdp/qdp-python/qumat_qdp/api.py b/qdp/qdp-python/qumat_qdp/api.py
index c593e5d41..a872a5547 100644
--- a/qdp/qdp-python/qumat_qdp/api.py
+++ b/qdp/qdp-python/qumat_qdp/api.py
@@ -38,7 +38,12 @@ from typing import Any
@dataclass
class ThroughputResult:
- """Result of run_throughput(): duration and vectors per second."""
+ """Throughput benchmark measurement.
+
+ Returned by :meth:`QdpBenchmark.run_throughput`. ``duration_sec`` is the
+ measured timed section after any configured warmup batches.
``vectors_per_sec``
+ is computed over ``total_batches * batch_size`` encoded input vectors.
+ """
duration_sec: float
vectors_per_sec: float
@@ -46,7 +51,12 @@ class ThroughputResult:
@dataclass
class LatencyResult:
- """Result of run_latency(): duration and ms per vector."""
+ """Latency benchmark measurement.
+
+ Returned by :meth:`QdpBenchmark.run_latency`. ``duration_sec`` is the same
+ timed interval used for throughput, and ``latency_ms_per_vector`` is the
+ average milliseconds per encoded input vector across the measured batches.
+ """
duration_sec: float
latency_ms_per_vector: float
@@ -132,7 +142,15 @@ class QdpBenchmark:
return self
def prefetch(self, n: int) -> QdpBenchmark:
- """No-op for API compatibility; Rust pipeline does not use prefetch
from Python."""
+ """Accept a prefetch setting for fluent API compatibility.
+
+ The current Rust benchmark pipeline manages work internally and the
+ PyTorch reference path does not use a Python-side prefetch queue, so
+ ``n`` is intentionally ignored.
+
+ :param n: Requested prefetch depth; currently unused.
+ :returns: ``self`` for fluent builder chaining.
+ """
return self
def warmup(self, n: int) -> QdpBenchmark:
@@ -145,7 +163,17 @@ class QdpBenchmark:
return self
def backend(self, name: str) -> QdpBenchmark:
- """Set benchmark backend: ``'rust'`` or ``'pytorch'``."""
+ """Select the benchmark execution backend.
+
+ ``"rust"`` (the default) uses the native optimized pipeline exposed by
+ the ``_qdp`` extension and raises at run time if that extension or
entry
+ point is unavailable. ``"pytorch"`` uses the pure-PyTorch reference
+ implementation on the selected CUDA device when usable, otherwise CPU.
+
+ :param name: Backend name, either ``"rust"`` or ``"pytorch"``.
+ :returns: ``self`` for fluent builder chaining.
+ :raises ValueError: If ``name`` is not a supported backend.
+ """
if name not in ("rust", "pytorch"):
raise ValueError(f"backend must be 'rust' or 'pytorch', got
{name!r}")
self._backend_name = name
@@ -158,14 +186,35 @@ class QdpBenchmark:
)
def run_throughput(self) -> ThroughputResult:
- """Run throughput benchmark using the selected backend."""
+ """Run the configured throughput benchmark.
+
+ ``qubits()`` and ``batches()`` must be configured before calling this
+ method. The default ``"rust"`` backend calls the native ``_qdp``
+ pipeline with any configured warmup batches; ``"pytorch"`` runs the
+ reference encoder loop and synchronizes CUDA timing when applicable.
+
+ :returns: A :class:`ThroughputResult` containing elapsed seconds and
+ encoded vectors per second.
+ :raises ValueError: If required benchmark parameters are missing.
+ :raises RuntimeError: If the Rust backend is selected but unavailable.
+ """
self._validate()
if self._backend_name == "pytorch":
return self._run_throughput_pytorch()
return self._run_throughput_rust()
def run_latency(self) -> LatencyResult:
- """Run latency benchmark using the selected backend."""
+ """Run the configured latency benchmark.
+
+ ``qubits()`` and ``batches()`` must be configured before calling this
+ method. The Rust backend reports latency from the native pipeline; the
+ PyTorch backend derives average latency from its throughput run.
+
+ :returns: A :class:`LatencyResult` containing elapsed seconds and mean
+ milliseconds per encoded vector.
+ :raises ValueError: If required benchmark parameters are missing.
+ :raises RuntimeError: If the Rust backend is selected but unavailable.
+ """
self._validate()
if self._backend_name == "pytorch":
return self._run_latency_pytorch()
diff --git a/qdp/qdp-python/qumat_qdp/backend.py
b/qdp/qdp-python/qumat_qdp/backend.py
index 098662d88..42f105172 100644
--- a/qdp/qdp-python/qumat_qdp/backend.py
+++ b/qdp/qdp-python/qumat_qdp/backend.py
@@ -70,7 +70,22 @@ def _select_engine_adapter(
class QdpEngine:
- """Unified Python facade over the CUDA and Triton engine routes."""
+ """Select and delegate to a native QDP encoding backend.
+
+ ``QdpEngine`` is the small public Python facade used by callers that want
+ explicit backend selection. ``backend="cuda"`` routes to the Rust/CUDA
+ extension-backed engine. ``backend="amd"`` and ``backend="triton_amd"``
+ route to the AMD/Triton implementation. The selected backend is exposed as
+ ``self.backend`` (``"cuda"`` or ``"amd"``) and all ``encode()`` calls are
+ forwarded to that engine.
+
+ :param device_id: GPU device ordinal to use.
+ :param precision: Numeric precision requested from the backend, such as
+ ``"float32"`` or ``"float64"`` when supported by that backend.
+ :param backend: Backend selector. Valid values are ``"cuda"``, ``"amd"``,
+ and ``"triton_amd"``.
+ :raises ValueError: If ``backend`` is not one of the supported selectors.
+ """
def __init__(
self,
diff --git a/qdp/qdp-python/qumat_qdp/loader.py
b/qdp/qdp-python/qumat_qdp/loader.py
index 34fae6a1a..8f05affe7 100644
--- a/qdp/qdp-python/qumat_qdp/loader.py
+++ b/qdp/qdp-python/qumat_qdp/loader.py
@@ -252,10 +252,14 @@ def _sample_dim(num_qubits: int, encoding_method: str) ->
int:
class QuantumDataLoader:
"""
- Builder for a synthetic-data quantum encoding iterator.
-
- Yields one QuantumTensor (batch) per iteration. All encoding runs in Rust;
- __iter__ returns the Rust-backed iterator from create_synthetic_loader.
+ Builder for batched QDP encoding iterators.
+
+ ``QuantumDataLoader`` can generate synthetic input samples or read
supported
+ file formats, then encode each batch with the selected backend. The
default
+ ``"rust"`` backend returns Rust-backed ``QuantumTensor`` batches, while the
+ explicit ``"pytorch"`` backend returns ``torch.Tensor`` batches. The
+ ``"auto"`` backend tries the Rust extension first and falls back to PyTorch
+ when the native extension is unavailable.
"""
def __init__(
@@ -291,14 +295,37 @@ class QuantumDataLoader:
self._backend_name: str = _BACKEND_RUST
def qubits(self, n: int) -> QuantumDataLoader:
- """Set number of qubits. Returns self for chaining."""
+ """Set the number of qubits used by subsequent encodings.
+
+ ``n`` must be a positive integer. The value controls the encoded state
+ size (for example, amplitude and phase-style encodings produce vectors
+ of length ``2**n``) and the expected input width for encodings such as
+ ``"angle"`` and ``"iqp-z"``.
+
+ :param n: Positive qubit count.
+ :returns: ``self`` for fluent builder chaining.
+ :raises ValueError: If ``n`` is not a positive integer.
+ """
if not isinstance(n, int) or n < 1:
raise ValueError(f"num_qubits must be a positive integer, got
{n!r}")
self._num_qubits = n
return self
def encoding(self, method: str) -> QuantumDataLoader:
- """Set encoding method (e.g. 'amplitude', 'angle', 'basis'). Returns
self."""
+ """Set the quantum feature encoding method.
+
+ Valid values are ``"amplitude"``, ``"angle"``, ``"basis"``,
+ ``"iqp"``, ``"iqp-z"``, and ``"phase"``. Use these canonical
+ lowercase names because the selected backend receives the string
exactly
+ as supplied. The PyTorch reference backend supports the same methods
as
+ :mod:`qumat_qdp.torch_ref`; use the native backend for methods that are
+ not available in the reference path.
+
+ :param method: Encoding method name.
+ :returns: ``self`` for fluent builder chaining.
+ :raises ValueError: If ``method`` is empty, not a string, or not a
+ supported encoding.
+ """
if not method or not isinstance(method, str):
raise ValueError(
f"encoding_method must be a non-empty string, got {method!r}"
@@ -312,7 +339,19 @@ class QuantumDataLoader:
return self
def batches(self, total: int, size: int = 64) -> QuantumDataLoader:
- """Set total number of batches and batch size. Returns self."""
+ """Set the number of batches to produce and samples per batch.
+
+ Both ``total`` and ``size`` must be positive integers. For synthetic
+ sources, ``total`` is the exact number of generated batches. For file
+ sources handled by the PyTorch fallback, iteration stops at the smaller
+ of ``total`` and the number of complete/partial batches available from
+ the loaded file.
+
+ :param total: Positive maximum number of batches to emit.
+ :param size: Positive number of samples per encoded batch.
+ :returns: ``self`` for fluent builder chaining.
+ :raises ValueError: If either argument is not a positive integer.
+ """
if not isinstance(total, int) or total < 1:
raise ValueError(f"total_batches must be a positive integer, got
{total!r}")
if not isinstance(size, int) or size < 1:
@@ -325,7 +364,22 @@ class QuantumDataLoader:
self,
total_batches: int | None = None,
) -> QuantumDataLoader:
- """Use synthetic data source (default). Optionally override
total_batches. Returns self."""
+ """Select the synthetic data source.
+
+ Synthetic data is the default when no file source is configured, but
+ calling this method records the source choice explicitly. Use
+ ``seed()`` to make generated samples reproducible where the selected
+ backend supports seeded generation. If ``total_batches`` is provided,
+ it overrides the current batch count and must be a positive integer.
+ Selecting both ``source_synthetic()`` and ``source_file()`` on the same
+ loader is rejected when iteration starts.
+
+ :param total_batches: Optional positive replacement for the configured
+ number of batches.
+ :returns: ``self`` for fluent builder chaining.
+ :raises ValueError: If ``total_batches`` is provided but is not a
+ positive integer.
+ """
self._synthetic_requested = True
if total_batches is not None:
if not isinstance(total_batches, int) or total_batches < 1:
@@ -336,12 +390,21 @@ class QuantumDataLoader:
return self
def source_file(self, path: str, streaming: bool = False) ->
QuantumDataLoader:
- """Use file data source. Path must point to a supported format.
Returns self.
-
- For streaming=True (Phase 2b), only .parquet is supported; data is
read in chunks to reduce memory.
- For streaming=False, supports .parquet, .arrow, .feather, .ipc, .npy,
.pt, .pth, .pb.
- Remote paths (s3://, gs://) are supported when the remote-io feature
is enabled.
- Remote URL query/fragment (for example ?versionId=... or #...) is not
supported.
+ """Use a file data source.
+
+ Non-streaming native loading accepts ``.parquet``, ``.arrow``,
+ ``.feather``, ``.ipc``, ``.npy``, ``.pt``, ``.pth``, and ``.pb`` files.
+ The PyTorch fallback path supports only ``.npy``, ``.pt``, and ``.pth``
+ inputs because it loads the full tensor into memory before encoding.
+ Streaming mode is native-only and currently accepts ``.parquet`` files.
+ Remote ``s3://`` and ``gs://`` paths are accepted when the native
remote
+ I/O feature is enabled; remote query strings and fragments are
rejected.
+
+ :param path: Local or supported remote input path.
+ :param streaming: Whether to request native streaming file loading.
+ :returns: ``self`` for fluent builder chaining.
+ :raises ValueError: If ``path`` is empty, includes an unsupported
remote
+ query/fragment, or requests streaming for an unsupported extension.
"""
if not path or not isinstance(path, str):
raise ValueError(f"path must be a non-empty string, got {path!r}")
@@ -363,7 +426,17 @@ class QuantumDataLoader:
return self
def seed(self, s: int | None = None) -> QuantumDataLoader:
- """Set RNG seed for reproducible synthetic data (must fit Rust u64: 0
<= seed <= 2^64-1). Returns self."""
+ """Set or clear the synthetic data seed.
+
+ ``None`` leaves the loader unseeded for the native Rust path and maps
to
+ the PyTorch reference path's default deterministic seed. Integer seeds
+ must fit Rust ``u64`` so the same configuration can be passed to the
+ native backend.
+
+ :param s: ``None`` or an integer in ``[0, 2**64 - 1]``.
+ :returns: ``self`` for fluent builder chaining.
+ :raises ValueError: If ``s`` is not ``None`` or a valid Rust ``u64``.
+ """
if s is not None:
if not isinstance(s, int):
raise ValueError(
@@ -377,7 +450,19 @@ class QuantumDataLoader:
return self
def null_handling(self, policy: str) -> QuantumDataLoader:
- """Set null handling policy ('fill_zero' or 'reject'). Returns self
for chaining."""
+ """Set how nullable file inputs are handled by the native loader.
+
+ Valid policies are ``"fill_zero"`` (replace nulls with zero before
+ encoding) and ``"reject"`` (fail on null input). The policy is passed
+ through to Rust file and synthetic loader creation when available. The
+ PyTorch fallback loaders do not consume this setting because supported
+ ``.npy``/``.pt``/``.pth`` inputs are loaded as dense tensors.
+
+ :param policy: Null handling policy, either ``"fill_zero"`` or
+ ``"reject"``.
+ :returns: ``self`` for fluent builder chaining.
+ :raises ValueError: If ``policy`` is not supported.
+ """
if policy not in ("fill_zero", "reject"):
raise ValueError(
f"null_handling must be 'fill_zero' or 'reject', got
{policy!r}"
@@ -618,7 +703,7 @@ class QuantumDataLoader:
.as_torch_dataset())
loader = torch.utils.data.DataLoader(dataset, batch_size=None,
num_workers=0)
for batch in loader:
- ... # batch is torch.Tensor, shape (64, 2*2^16)
+ ... # batch is torch.Tensor, shape (64, 2**16)
Note: ``batch_size=None`` in DataLoader disables DataLoader's own
batching;
``num_workers=0`` is required because the Rust backend holds GPU state
that
diff --git a/qdp/qdp-python/qumat_qdp/tensor.py
b/qdp/qdp-python/qumat_qdp/tensor.py
index c9d651a73..bdabf931b 100644
--- a/qdp/qdp-python/qumat_qdp/tensor.py
+++ b/qdp/qdp-python/qumat_qdp/tensor.py
@@ -24,7 +24,20 @@ from typing import Any
@dataclass
class QdpTensor:
- """Thin DLPack facade over backend-native tensor producers."""
+ """DLPack-compatible wrapper for backend-native QDP tensor results.
+
+ The Rust/CUDA path and other native backends may return objects whose
+ concrete tensor type is backend-specific. ``QdpTensor`` preserves that
+ object in ``value`` while exposing ``__dlpack__`` and ``__dlpack_device__``
+ so consumers such as PyTorch can import it without a copy.
+
+ :param value: Backend-native tensor-like object. It must implement the
+ DLPack protocol when converted with ``to_torch()`` or
+ ``torch.from_dlpack``.
+ :param backend: Human-readable backend name used in error messages.
+ :raises RuntimeError: If ``value`` does not implement the required DLPack
+ methods when conversion is attempted.
+ """
value: Any
backend: str
diff --git a/qdp/qdp-python/qumat_qdp/triton_amd.py
b/qdp/qdp-python/qumat_qdp/triton_amd.py
index 678fb49a5..1fbd84026 100644
--- a/qdp/qdp-python/qumat_qdp/triton_amd.py
+++ b/qdp/qdp-python/qumat_qdp/triton_amd.py
@@ -128,7 +128,25 @@ _IQP_PAIR_MATRIX_MAX_N = 20
@dataclass
class TritonAmdEngine:
- """AMD backend implementing amplitude/angle/basis/iqp/iqp-z/phase
encoders."""
+ """ROCm/Triton implementation of the QDP encoder interface.
+
+ This engine targets AMD GPUs through a PyTorch ROCm runtime plus the Triton
+ Python package. ``encode()`` accepts ``"amplitude"``, ``"angle"``,
+ ``"basis"``, ``"iqp"``, ``"iqp-z"``, and ``"phase"``. The phase encoder
+ uses a fused Triton HIP kernel for ``float32`` and ``1 <= num_qubits <=
32``;
+ other supported cases fall back to vectorized PyTorch operations on the
same
+ ROCm device.
+
+ ``precision`` accepts ``"float32"``/``"f32"``/``"float"`` and
+ ``"float64"``/``"f64"``/``"double"``. Runtime availability is checked when
+ ``encode()`` is called and raises a descriptive ``RuntimeError`` if PyTorch
+ ROCm or Triton is unavailable.
+
+ :param device_id: ROCm device ordinal, addressed through PyTorch as
+ ``cuda:{device_id}``.
+ :param precision: Floating-point precision for real inputs and complex
+ outputs.
+ """
device_id: int = 0
precision: str = "float32"