This is an automated email from the ASF dual-hosted git repository.
hcr pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/mahout.git
The following commit(s) were added to refs/heads/main by this push:
new 744e8dd8d [QDP] Add angle encoding support to benchmark suite (#894)
744e8dd8d is described below
commit 744e8dd8d844afe52ee46f860f25f4c90f26ccd5
Author: Jie-Kai Chang <[email protected]>
AuthorDate: Wed Jan 21 23:33:39 2026 +0800
[QDP] Add angle encoding support to benchmark suite (#894)
* Add angle encoding support to benchmark suite
Signed-off-by: 400Ping <[email protected]>
* fix pre-commit
Signed-off-by: 400Ping <[email protected]>
---------
Signed-off-by: 400Ping <[email protected]>
---
qdp/qdp-python/benchmark/benchmark_e2e.py | 51 ++++++++++++++++------
qdp/qdp-python/benchmark/benchmark_latency.py | 6 +--
.../benchmark/benchmark_latency_pytorch.py | 10 ++---
qdp/qdp-python/benchmark/benchmark_numpy_io.py | 9 ++--
qdp/qdp-python/benchmark/benchmark_throughput.py | 7 +--
qdp/qdp-python/benchmark/utils.py | 43 +++++++++++-------
6 files changed, 82 insertions(+), 44 deletions(-)
diff --git a/qdp/qdp-python/benchmark/benchmark_e2e.py
b/qdp/qdp-python/benchmark/benchmark_e2e.py
index 28e0afbd5..689ab28a7 100644
--- a/qdp/qdp-python/benchmark/benchmark_e2e.py
+++ b/qdp/qdp-python/benchmark/benchmark_e2e.py
@@ -88,7 +88,7 @@ def generate_data(n_qubits, n_samples, encoding_method: str =
"amplitude"):
os.remove(f)
print(f"Generating {n_samples} samples of {n_qubits} qubits...")
- dim = 1 << n_qubits
+ dim = n_qubits if encoding_method == "angle" else (1 << n_qubits)
# Generate all data at once
all_data = generate_batch_data(n_samples, dim, encoding_method, seed=42)
@@ -98,7 +98,7 @@ def generate_data(n_qubits, n_samples, encoding_method: str =
"amplitude"):
# For basis encoding, save single scalar indices (not lists)
table = pa.table({"index": pa.array(all_data.flatten(),
type=pa.float64())})
else:
- # For amplitude encoding, use List format for PennyLane/Qiskit
compatibility
+ # For amplitude/angle encoding, use List format for PennyLane/Qiskit
compatibility
feature_vectors = [row.tolist() for row in all_data]
table = pa.table(
{"feature_vector": pa.array(feature_vectors,
type=pa.list_(pa.float64()))}
@@ -111,7 +111,7 @@ def generate_data(n_qubits, n_samples, encoding_method: str
= "amplitude"):
arr = pa.FixedSizeListArray.from_arrays(pa.array(all_data.flatten()),
1)
arrow_table = pa.table({"data": arr})
else:
- # For amplitude encoding, use FixedSizeList format
+ # For amplitude/angle encoding, use FixedSizeList format
arr = pa.FixedSizeListArray.from_arrays(pa.array(all_data.flatten()),
dim)
arrow_table = pa.table({"data": arr})
with ipc.RecordBatchFileWriter(ARROW_FILE, arrow_table.schema) as writer:
@@ -129,7 +129,7 @@ def generate_data(n_qubits, n_samples, encoding_method: str
= "amplitude"):
# -----------------------------------------------------------
# 1. Qiskit Full Pipeline
# -----------------------------------------------------------
-def run_qiskit(n_qubits, n_samples):
+def run_qiskit(n_qubits, n_samples, encoding_method: str = "amplitude"):
if not HAS_QISKIT:
print("\n[Qiskit] Not installed, skipping.")
return 0.0, None
@@ -137,7 +137,7 @@ def run_qiskit(n_qubits, n_samples):
# Clean cache before starting benchmark
clean_cache()
- print("\n[Qiskit] Full Pipeline (Disk -> GPU)...")
+ print(f"\n[Qiskit] Full Pipeline (Disk -> GPU) - {encoding_method}
encoding...")
model = DummyQNN(n_qubits).cuda()
backend = AerSimulator(method="statevector")
@@ -148,7 +148,10 @@ def run_qiskit(n_qubits, n_samples):
import pandas as pd
df = pd.read_parquet(DATA_FILE)
- raw_data = np.stack(df["feature_vector"].values)
+ if encoding_method == "basis":
+ raw_data = df["index"].values.astype(np.int64)
+ else:
+ raw_data = np.stack(df["feature_vector"].values)
io_time = time.perf_counter() - start_time
print(f" IO Time: {io_time:.4f} s")
@@ -156,16 +159,22 @@ def run_qiskit(n_qubits, n_samples):
# Process batches
for i in range(0, n_samples, BATCH_SIZE):
- batch = raw_data[i : i + BATCH_SIZE]
-
- # Normalize
- batch = normalize_batch(batch)
+ batch = normalize_batch(raw_data[i : i + BATCH_SIZE], encoding_method)
# State preparation
batch_states = []
for vec_idx, vec in enumerate(batch):
qc = QuantumCircuit(n_qubits)
- qc.initialize(vec, range(n_qubits))
+ if encoding_method == "basis":
+ idx = int(vec)
+ for bit in range(n_qubits):
+ if (idx >> bit) & 1:
+ qc.x(bit)
+ elif encoding_method == "angle":
+ for qubit, angle in enumerate(vec):
+ qc.ry(2.0 * float(angle), qubit)
+ else:
+ qc.initialize(vec, range(n_qubits))
qc.save_statevector()
t_qc = transpile(qc, backend)
result = backend.run(t_qc).result().get_statevector().data
@@ -220,6 +229,11 @@ def run_pennylane(n_qubits, n_samples, encoding_method:
str = "amplitude"):
qml.BasisEmbedding(features=basis_state, wires=range(n_qubits))
return qml.state()
+ @qml.qnode(dev, interface="torch")
+ def angle_circuit(inputs):
+ qml.AngleEmbedding(features=inputs * 2.0, wires=range(n_qubits),
rotation="Y")
+ return qml.state()
+
model = DummyQNN(n_qubits).cuda()
torch.cuda.synchronize()
@@ -249,6 +263,13 @@ def run_pennylane(n_qubits, n_samples, encoding_method:
str = "amplitude"):
state_cpu = basis_circuit(binary_list)
batch_states.append(state_cpu)
state_cpu = torch.stack(batch_states)
+ elif encoding_method == "angle":
+ batch_cpu = torch.tensor(raw_data[i : i + BATCH_SIZE])
+ # Execute QNode
+ try:
+ state_cpu = angle_circuit(batch_cpu)
+ except Exception:
+ state_cpu = torch.stack([angle_circuit(x) for x in batch_cpu])
else:
batch_cpu = torch.tensor(raw_data[i : i + BATCH_SIZE])
# Execute QNode
@@ -438,8 +459,8 @@ if __name__ == "__main__":
"--encoding-method",
type=str,
default="amplitude",
- choices=["amplitude", "basis"],
- help="Encoding method to use for Mahout (amplitude or basis).",
+ choices=["amplitude", "angle", "basis"],
+ help="Encoding method to use for Mahout (amplitude, angle, or basis).",
)
args = parser.parse_args()
@@ -477,7 +498,9 @@ if __name__ == "__main__":
clean_cache()
if "qiskit" in args.frameworks:
- t_qiskit, qiskit_all_states = run_qiskit(args.qubits, args.samples)
+ t_qiskit, qiskit_all_states = run_qiskit(
+ args.qubits, args.samples, args.encoding_method
+ )
# Clean cache between framework benchmarks
clean_cache()
diff --git a/qdp/qdp-python/benchmark/benchmark_latency.py
b/qdp/qdp-python/benchmark/benchmark_latency.py
index 252647d4a..6e692b63b 100644
--- a/qdp/qdp-python/benchmark/benchmark_latency.py
+++ b/qdp/qdp-python/benchmark/benchmark_latency.py
@@ -98,7 +98,7 @@ def run_mahout(
print(f"[Mahout] Init failed: {exc}")
return 0.0, 0.0
- vector_len = 1 << num_qubits
+ vector_len = num_qubits if encoding_method == "angle" else (1 <<
num_qubits)
sync_cuda()
start = time.perf_counter()
processed = 0
@@ -241,8 +241,8 @@ def main():
"--encoding-method",
type=str,
default="amplitude",
- choices=["amplitude", "basis"],
- help="Encoding method to use for Mahout (amplitude or basis).",
+ choices=["amplitude", "angle", "basis"],
+ help="Encoding method to use for Mahout (amplitude, angle, or basis).",
)
args = parser.parse_args()
diff --git a/qdp/qdp-python/benchmark/benchmark_latency_pytorch.py
b/qdp/qdp-python/benchmark/benchmark_latency_pytorch.py
index e1c842a8f..7fbc87a63 100755
--- a/qdp/qdp-python/benchmark/benchmark_latency_pytorch.py
+++ b/qdp/qdp-python/benchmark/benchmark_latency_pytorch.py
@@ -56,7 +56,7 @@ def run_mahout_pytorch(
print(f"[Mahout-PyTorch] Init failed: {exc}")
return 0.0, 0.0
- vector_len = 1 << num_qubits
+ vector_len = num_qubits if encoding_method == "angle" else (1 <<
num_qubits)
sync_cuda()
start = time.perf_counter()
processed = 0
@@ -90,7 +90,7 @@ def run_mahout_numpy(
print(f"[Mahout-NumPy] Init failed: {exc}")
return 0.0, 0.0
- vector_len = 1 << num_qubits
+ vector_len = num_qubits if encoding_method == "angle" else (1 <<
num_qubits)
sync_cuda()
start = time.perf_counter()
processed = 0
@@ -103,7 +103,7 @@ def run_mahout_numpy(
for i in range(batch_size)
]
batch_np = np.stack(batch)
- if encoding_method == "basis":
+ if encoding_method in ("basis", "angle"):
normalized = batch_np
else:
norms = np.linalg.norm(batch_np, axis=1, keepdims=True)
@@ -140,8 +140,8 @@ def main():
"--encoding-method",
type=str,
default="amplitude",
- choices=["amplitude", "basis"],
- help="Encoding method to use for Mahout (amplitude or basis).",
+ choices=["amplitude", "angle", "basis"],
+ help="Encoding method to use for Mahout (amplitude, angle, or basis).",
)
args = parser.parse_args()
diff --git a/qdp/qdp-python/benchmark/benchmark_numpy_io.py
b/qdp/qdp-python/benchmark/benchmark_numpy_io.py
index d79661b39..31dd08b45 100644
--- a/qdp/qdp-python/benchmark/benchmark_numpy_io.py
+++ b/qdp/qdp-python/benchmark/benchmark_numpy_io.py
@@ -64,6 +64,9 @@ def generate_test_data(
if encoding_method == "basis":
# Basis encoding: single index per sample
data = rng.randint(0, sample_size, size=(num_samples,
1)).astype(np.float64)
+ elif encoding_method == "angle":
+ # Angle encoding: per-qubit angles in [0, 2*pi)
+ data = (rng.rand(num_samples, sample_size) * (2.0 *
np.pi)).astype(np.float64)
else:
# Amplitude encoding: full vectors (using Gaussian distribution)
data = rng.randn(num_samples, sample_size).astype(np.float64)
@@ -195,8 +198,8 @@ def main():
"--encoding-method",
type=str,
default="amplitude",
- choices=["amplitude", "basis"],
- help="Encoding method to use for Mahout (amplitude or basis).",
+ choices=["amplitude", "angle", "basis"],
+ help="Encoding method to use for Mahout (amplitude, angle, or basis).",
)
args = parser.parse_args()
@@ -208,7 +211,7 @@ def main():
num_qubits = args.qubits
num_samples = args.samples
- sample_size = 1 << num_qubits # 2^qubits
+ sample_size = num_qubits if args.encoding_method == "angle" else (1 <<
num_qubits)
print(BAR)
print("NUMPY I/O + ENCODING BENCHMARK")
diff --git a/qdp/qdp-python/benchmark/benchmark_throughput.py
b/qdp/qdp-python/benchmark/benchmark_throughput.py
index 8c0305402..15c67646f 100644
--- a/qdp/qdp-python/benchmark/benchmark_throughput.py
+++ b/qdp/qdp-python/benchmark/benchmark_throughput.py
@@ -92,9 +92,10 @@ def run_mahout(
torch.cuda.synchronize()
start = time.perf_counter()
+ vector_len = num_qubits if encoding_method == "angle" else (1 <<
num_qubits)
processed = 0
for batch in prefetched_batches(
- total_batches, batch_size, 1 << num_qubits, prefetch, encoding_method
+ total_batches, batch_size, vector_len, prefetch, encoding_method
):
normalized = np.ascontiguousarray(
normalize_batch(batch, encoding_method), dtype=np.float64
@@ -216,8 +217,8 @@ def main():
"--encoding-method",
type=str,
default="amplitude",
- choices=["amplitude", "basis"],
- help="Encoding method to use for Mahout (amplitude or basis).",
+ choices=["amplitude", "angle", "basis"],
+ help="Encoding method to use for Mahout (amplitude, angle, or basis).",
)
args = parser.parse_args()
diff --git a/qdp/qdp-python/benchmark/utils.py
b/qdp/qdp-python/benchmark/utils.py
index 753405831..1dc59b6b6 100644
--- a/qdp/qdp-python/benchmark/utils.py
+++ b/qdp/qdp-python/benchmark/utils.py
@@ -39,8 +39,8 @@ def build_sample(
Args:
seed: Seed value used to generate deterministic data.
- vector_len: Length of the vector (2^num_qubits for amplitude encoding).
- encoding_method: Either "amplitude" or "basis".
+ vector_len: Length of the vector (2^num_qubits for amplitude,
num_qubits for angle).
+ encoding_method: "amplitude", "angle", or "basis".
Returns:
NumPy array containing the sample data.
@@ -50,6 +50,14 @@ def build_sample(
mask = np.uint64(vector_len - 1)
idx = np.uint64(seed) & mask
return np.array([idx], dtype=np.float64)
+ if encoding_method == "angle":
+ # Angle encoding: one angle per qubit, scaled to [0, 2*pi)
+ if vector_len == 0:
+ return np.array([], dtype=np.float64)
+ scale = (2.0 * np.pi) / vector_len
+ idx = np.arange(vector_len, dtype=np.uint64)
+ mixed = (idx + np.uint64(seed)) % np.uint64(vector_len)
+ return mixed.astype(np.float64) * scale
else:
# Amplitude encoding: full vector
mask = np.uint64(vector_len - 1)
@@ -71,17 +79,20 @@ def generate_batch_data(
Args:
n_samples: Number of samples to generate.
dim: Dimension of each sample (2^num_qubits for amplitude encoding).
- encoding_method: Either "amplitude" or "basis".
+ encoding_method: "amplitude", "angle", or "basis".
seed: Random seed for reproducibility.
Returns:
- NumPy array of shape (n_samples, dim) for amplitude encoding
+ NumPy array of shape (n_samples, dim) for amplitude/angle encoding
or (n_samples, 1) for basis encoding.
"""
np.random.seed(seed)
if encoding_method == "basis":
# Basis encoding: single index per sample
return np.random.randint(0, dim, size=(n_samples,
1)).astype(np.float64)
+ if encoding_method == "angle":
+ # Angle encoding: per-qubit angles in [0, 2*pi)
+ return (np.random.rand(n_samples, dim) * (2.0 *
np.pi)).astype(np.float64)
else:
# Amplitude encoding: full vectors
return np.random.rand(n_samples, dim).astype(np.float64)
@@ -95,13 +106,13 @@ def normalize_batch(
Args:
batch: NumPy array of shape (batch_size, vector_len).
- encoding_method: Either "amplitude" or "basis".
+ encoding_method: "amplitude", "angle", or "basis".
Returns:
- Normalized batch. For basis encoding, returns the input unchanged.
+ Normalized batch. For basis/angle encoding, returns the input
unchanged.
"""
- if encoding_method == "basis":
- # Basis encoding doesn't need normalization (indices)
+ if encoding_method in ("basis", "angle"):
+ # Basis/angle encodings don't need normalization
return batch
# Amplitude encoding: normalize vectors
norms = np.linalg.norm(batch, axis=1, keepdims=True)
@@ -117,13 +128,13 @@ def normalize_batch_torch(
Args:
batch: PyTorch tensor of shape (batch_size, vector_len).
- encoding_method: Either "amplitude" or "basis".
+ encoding_method: "amplitude", "angle", or "basis".
Returns:
- Normalized batch. For basis encoding, returns the input unchanged.
+ Normalized batch. For basis/angle encoding, returns the input
unchanged.
"""
- if encoding_method == "basis":
- # Basis encoding doesn't need normalization (indices)
+ if encoding_method in ("basis", "angle"):
+ # Basis/angle encodings don't need normalization
return batch
# Amplitude encoding: normalize vectors
norms = torch.norm(batch, dim=1, keepdim=True)
@@ -146,9 +157,9 @@ def prefetched_batches(
Args:
total_batches: Total number of batches to generate.
batch_size: Number of samples per batch.
- vector_len: Length of each vector (2^num_qubits).
+ vector_len: Length of each vector (2^num_qubits for amplitude,
num_qubits for angle).
prefetch: Number of batches to prefetch.
- encoding_method: Either "amplitude" or "basis".
+ encoding_method: "amplitude", "angle", or "basis".
Yields:
NumPy arrays of shape (batch_size, vector_len) or (batch_size, 1).
@@ -189,9 +200,9 @@ def prefetched_batches_torch(
Args:
total_batches: Total number of batches to generate.
batch_size: Number of samples per batch.
- vector_len: Length of each vector (2^num_qubits).
+ vector_len: Length of each vector (2^num_qubits for amplitude,
num_qubits for angle).
prefetch: Number of batches to prefetch.
- encoding_method: Either "amplitude" or "basis".
+ encoding_method: "amplitude", "angle", or "basis".
Yields:
PyTorch tensors of shape (batch_size, vector_len) or (batch_size, 1).