This is an automated email from the ASF dual-hosted git repository.

hcr pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/mahout.git


The following commit(s) were added to refs/heads/main by this push:
     new 744e8dd8d [QDP] Add angle encoding support to benchmark suite (#894)
744e8dd8d is described below

commit 744e8dd8d844afe52ee46f860f25f4c90f26ccd5
Author: Jie-Kai Chang <[email protected]>
AuthorDate: Wed Jan 21 23:33:39 2026 +0800

    [QDP] Add angle encoding support to benchmark suite (#894)
    
    * Add angle encoding support to benchmark suite
    
    Signed-off-by: 400Ping <[email protected]>
    
    * fix pre-commit
    
    Signed-off-by: 400Ping <[email protected]>
    
    ---------
    
    Signed-off-by: 400Ping <[email protected]>
---
 qdp/qdp-python/benchmark/benchmark_e2e.py          | 51 ++++++++++++++++------
 qdp/qdp-python/benchmark/benchmark_latency.py      |  6 +--
 .../benchmark/benchmark_latency_pytorch.py         | 10 ++---
 qdp/qdp-python/benchmark/benchmark_numpy_io.py     |  9 ++--
 qdp/qdp-python/benchmark/benchmark_throughput.py   |  7 +--
 qdp/qdp-python/benchmark/utils.py                  | 43 +++++++++++-------
 6 files changed, 82 insertions(+), 44 deletions(-)

diff --git a/qdp/qdp-python/benchmark/benchmark_e2e.py 
b/qdp/qdp-python/benchmark/benchmark_e2e.py
index 28e0afbd5..689ab28a7 100644
--- a/qdp/qdp-python/benchmark/benchmark_e2e.py
+++ b/qdp/qdp-python/benchmark/benchmark_e2e.py
@@ -88,7 +88,7 @@ def generate_data(n_qubits, n_samples, encoding_method: str = 
"amplitude"):
             os.remove(f)
 
     print(f"Generating {n_samples} samples of {n_qubits} qubits...")
-    dim = 1 << n_qubits
+    dim = n_qubits if encoding_method == "angle" else (1 << n_qubits)
 
     # Generate all data at once
     all_data = generate_batch_data(n_samples, dim, encoding_method, seed=42)
@@ -98,7 +98,7 @@ def generate_data(n_qubits, n_samples, encoding_method: str = 
"amplitude"):
         # For basis encoding, save single scalar indices (not lists)
         table = pa.table({"index": pa.array(all_data.flatten(), 
type=pa.float64())})
     else:
-        # For amplitude encoding, use List format for PennyLane/Qiskit 
compatibility
+        # For amplitude/angle encoding, use List format for PennyLane/Qiskit 
compatibility
         feature_vectors = [row.tolist() for row in all_data]
         table = pa.table(
             {"feature_vector": pa.array(feature_vectors, 
type=pa.list_(pa.float64()))}
@@ -111,7 +111,7 @@ def generate_data(n_qubits, n_samples, encoding_method: str 
= "amplitude"):
         arr = pa.FixedSizeListArray.from_arrays(pa.array(all_data.flatten()), 
1)
         arrow_table = pa.table({"data": arr})
     else:
-        # For amplitude encoding, use FixedSizeList format
+        # For amplitude/angle encoding, use FixedSizeList format
         arr = pa.FixedSizeListArray.from_arrays(pa.array(all_data.flatten()), 
dim)
         arrow_table = pa.table({"data": arr})
     with ipc.RecordBatchFileWriter(ARROW_FILE, arrow_table.schema) as writer:
@@ -129,7 +129,7 @@ def generate_data(n_qubits, n_samples, encoding_method: str 
= "amplitude"):
 # -----------------------------------------------------------
 # 1. Qiskit Full Pipeline
 # -----------------------------------------------------------
-def run_qiskit(n_qubits, n_samples):
+def run_qiskit(n_qubits, n_samples, encoding_method: str = "amplitude"):
     if not HAS_QISKIT:
         print("\n[Qiskit] Not installed, skipping.")
         return 0.0, None
@@ -137,7 +137,7 @@ def run_qiskit(n_qubits, n_samples):
     # Clean cache before starting benchmark
     clean_cache()
 
-    print("\n[Qiskit] Full Pipeline (Disk -> GPU)...")
+    print(f"\n[Qiskit] Full Pipeline (Disk -> GPU) - {encoding_method} 
encoding...")
     model = DummyQNN(n_qubits).cuda()
     backend = AerSimulator(method="statevector")
 
@@ -148,7 +148,10 @@ def run_qiskit(n_qubits, n_samples):
     import pandas as pd
 
     df = pd.read_parquet(DATA_FILE)
-    raw_data = np.stack(df["feature_vector"].values)
+    if encoding_method == "basis":
+        raw_data = df["index"].values.astype(np.int64)
+    else:
+        raw_data = np.stack(df["feature_vector"].values)
     io_time = time.perf_counter() - start_time
     print(f"  IO Time: {io_time:.4f} s")
 
@@ -156,16 +159,22 @@ def run_qiskit(n_qubits, n_samples):
 
     # Process batches
     for i in range(0, n_samples, BATCH_SIZE):
-        batch = raw_data[i : i + BATCH_SIZE]
-
-        # Normalize
-        batch = normalize_batch(batch)
+        batch = normalize_batch(raw_data[i : i + BATCH_SIZE], encoding_method)
 
         # State preparation
         batch_states = []
         for vec_idx, vec in enumerate(batch):
             qc = QuantumCircuit(n_qubits)
-            qc.initialize(vec, range(n_qubits))
+            if encoding_method == "basis":
+                idx = int(vec)
+                for bit in range(n_qubits):
+                    if (idx >> bit) & 1:
+                        qc.x(bit)
+            elif encoding_method == "angle":
+                for qubit, angle in enumerate(vec):
+                    qc.ry(2.0 * float(angle), qubit)
+            else:
+                qc.initialize(vec, range(n_qubits))
             qc.save_statevector()
             t_qc = transpile(qc, backend)
             result = backend.run(t_qc).result().get_statevector().data
@@ -220,6 +229,11 @@ def run_pennylane(n_qubits, n_samples, encoding_method: 
str = "amplitude"):
         qml.BasisEmbedding(features=basis_state, wires=range(n_qubits))
         return qml.state()
 
+    @qml.qnode(dev, interface="torch")
+    def angle_circuit(inputs):
+        qml.AngleEmbedding(features=inputs * 2.0, wires=range(n_qubits), 
rotation="Y")
+        return qml.state()
+
     model = DummyQNN(n_qubits).cuda()
 
     torch.cuda.synchronize()
@@ -249,6 +263,13 @@ def run_pennylane(n_qubits, n_samples, encoding_method: 
str = "amplitude"):
                 state_cpu = basis_circuit(binary_list)
                 batch_states.append(state_cpu)
             state_cpu = torch.stack(batch_states)
+        elif encoding_method == "angle":
+            batch_cpu = torch.tensor(raw_data[i : i + BATCH_SIZE])
+            # Execute QNode
+            try:
+                state_cpu = angle_circuit(batch_cpu)
+            except Exception:
+                state_cpu = torch.stack([angle_circuit(x) for x in batch_cpu])
         else:
             batch_cpu = torch.tensor(raw_data[i : i + BATCH_SIZE])
             # Execute QNode
@@ -438,8 +459,8 @@ if __name__ == "__main__":
         "--encoding-method",
         type=str,
         default="amplitude",
-        choices=["amplitude", "basis"],
-        help="Encoding method to use for Mahout (amplitude or basis).",
+        choices=["amplitude", "angle", "basis"],
+        help="Encoding method to use for Mahout (amplitude, angle, or basis).",
     )
     args = parser.parse_args()
 
@@ -477,7 +498,9 @@ if __name__ == "__main__":
         clean_cache()
 
     if "qiskit" in args.frameworks:
-        t_qiskit, qiskit_all_states = run_qiskit(args.qubits, args.samples)
+        t_qiskit, qiskit_all_states = run_qiskit(
+            args.qubits, args.samples, args.encoding_method
+        )
         # Clean cache between framework benchmarks
         clean_cache()
 
diff --git a/qdp/qdp-python/benchmark/benchmark_latency.py 
b/qdp/qdp-python/benchmark/benchmark_latency.py
index 252647d4a..6e692b63b 100644
--- a/qdp/qdp-python/benchmark/benchmark_latency.py
+++ b/qdp/qdp-python/benchmark/benchmark_latency.py
@@ -98,7 +98,7 @@ def run_mahout(
         print(f"[Mahout] Init failed: {exc}")
         return 0.0, 0.0
 
-    vector_len = 1 << num_qubits
+    vector_len = num_qubits if encoding_method == "angle" else (1 << 
num_qubits)
     sync_cuda()
     start = time.perf_counter()
     processed = 0
@@ -241,8 +241,8 @@ def main():
         "--encoding-method",
         type=str,
         default="amplitude",
-        choices=["amplitude", "basis"],
-        help="Encoding method to use for Mahout (amplitude or basis).",
+        choices=["amplitude", "angle", "basis"],
+        help="Encoding method to use for Mahout (amplitude, angle, or basis).",
     )
     args = parser.parse_args()
 
diff --git a/qdp/qdp-python/benchmark/benchmark_latency_pytorch.py 
b/qdp/qdp-python/benchmark/benchmark_latency_pytorch.py
index e1c842a8f..7fbc87a63 100755
--- a/qdp/qdp-python/benchmark/benchmark_latency_pytorch.py
+++ b/qdp/qdp-python/benchmark/benchmark_latency_pytorch.py
@@ -56,7 +56,7 @@ def run_mahout_pytorch(
         print(f"[Mahout-PyTorch] Init failed: {exc}")
         return 0.0, 0.0
 
-    vector_len = 1 << num_qubits
+    vector_len = num_qubits if encoding_method == "angle" else (1 << 
num_qubits)
     sync_cuda()
     start = time.perf_counter()
     processed = 0
@@ -90,7 +90,7 @@ def run_mahout_numpy(
         print(f"[Mahout-NumPy] Init failed: {exc}")
         return 0.0, 0.0
 
-    vector_len = 1 << num_qubits
+    vector_len = num_qubits if encoding_method == "angle" else (1 << 
num_qubits)
     sync_cuda()
     start = time.perf_counter()
     processed = 0
@@ -103,7 +103,7 @@ def run_mahout_numpy(
             for i in range(batch_size)
         ]
         batch_np = np.stack(batch)
-        if encoding_method == "basis":
+        if encoding_method in ("basis", "angle"):
             normalized = batch_np
         else:
             norms = np.linalg.norm(batch_np, axis=1, keepdims=True)
@@ -140,8 +140,8 @@ def main():
         "--encoding-method",
         type=str,
         default="amplitude",
-        choices=["amplitude", "basis"],
-        help="Encoding method to use for Mahout (amplitude or basis).",
+        choices=["amplitude", "angle", "basis"],
+        help="Encoding method to use for Mahout (amplitude, angle, or basis).",
     )
     args = parser.parse_args()
 
diff --git a/qdp/qdp-python/benchmark/benchmark_numpy_io.py 
b/qdp/qdp-python/benchmark/benchmark_numpy_io.py
index d79661b39..31dd08b45 100644
--- a/qdp/qdp-python/benchmark/benchmark_numpy_io.py
+++ b/qdp/qdp-python/benchmark/benchmark_numpy_io.py
@@ -64,6 +64,9 @@ def generate_test_data(
     if encoding_method == "basis":
         # Basis encoding: single index per sample
         data = rng.randint(0, sample_size, size=(num_samples, 
1)).astype(np.float64)
+    elif encoding_method == "angle":
+        # Angle encoding: per-qubit angles in [0, 2*pi)
+        data = (rng.rand(num_samples, sample_size) * (2.0 * 
np.pi)).astype(np.float64)
     else:
         # Amplitude encoding: full vectors (using Gaussian distribution)
         data = rng.randn(num_samples, sample_size).astype(np.float64)
@@ -195,8 +198,8 @@ def main():
         "--encoding-method",
         type=str,
         default="amplitude",
-        choices=["amplitude", "basis"],
-        help="Encoding method to use for Mahout (amplitude or basis).",
+        choices=["amplitude", "angle", "basis"],
+        help="Encoding method to use for Mahout (amplitude, angle, or basis).",
     )
     args = parser.parse_args()
 
@@ -208,7 +211,7 @@ def main():
 
     num_qubits = args.qubits
     num_samples = args.samples
-    sample_size = 1 << num_qubits  # 2^qubits
+    sample_size = num_qubits if args.encoding_method == "angle" else (1 << 
num_qubits)
 
     print(BAR)
     print("NUMPY I/O + ENCODING BENCHMARK")
diff --git a/qdp/qdp-python/benchmark/benchmark_throughput.py 
b/qdp/qdp-python/benchmark/benchmark_throughput.py
index 8c0305402..15c67646f 100644
--- a/qdp/qdp-python/benchmark/benchmark_throughput.py
+++ b/qdp/qdp-python/benchmark/benchmark_throughput.py
@@ -92,9 +92,10 @@ def run_mahout(
     torch.cuda.synchronize()
     start = time.perf_counter()
 
+    vector_len = num_qubits if encoding_method == "angle" else (1 << 
num_qubits)
     processed = 0
     for batch in prefetched_batches(
-        total_batches, batch_size, 1 << num_qubits, prefetch, encoding_method
+        total_batches, batch_size, vector_len, prefetch, encoding_method
     ):
         normalized = np.ascontiguousarray(
             normalize_batch(batch, encoding_method), dtype=np.float64
@@ -216,8 +217,8 @@ def main():
         "--encoding-method",
         type=str,
         default="amplitude",
-        choices=["amplitude", "basis"],
-        help="Encoding method to use for Mahout (amplitude or basis).",
+        choices=["amplitude", "angle", "basis"],
+        help="Encoding method to use for Mahout (amplitude, angle, or basis).",
     )
     args = parser.parse_args()
 
diff --git a/qdp/qdp-python/benchmark/utils.py 
b/qdp/qdp-python/benchmark/utils.py
index 753405831..1dc59b6b6 100644
--- a/qdp/qdp-python/benchmark/utils.py
+++ b/qdp/qdp-python/benchmark/utils.py
@@ -39,8 +39,8 @@ def build_sample(
 
     Args:
         seed: Seed value used to generate deterministic data.
-        vector_len: Length of the vector (2^num_qubits for amplitude encoding).
-        encoding_method: Either "amplitude" or "basis".
+        vector_len: Length of the vector (2^num_qubits for amplitude, 
num_qubits for angle).
+        encoding_method: "amplitude", "angle", or "basis".
 
     Returns:
         NumPy array containing the sample data.
@@ -50,6 +50,14 @@ def build_sample(
         mask = np.uint64(vector_len - 1)
         idx = np.uint64(seed) & mask
         return np.array([idx], dtype=np.float64)
+    if encoding_method == "angle":
+        # Angle encoding: one angle per qubit, scaled to [0, 2*pi)
+        if vector_len == 0:
+            return np.array([], dtype=np.float64)
+        scale = (2.0 * np.pi) / vector_len
+        idx = np.arange(vector_len, dtype=np.uint64)
+        mixed = (idx + np.uint64(seed)) % np.uint64(vector_len)
+        return mixed.astype(np.float64) * scale
     else:
         # Amplitude encoding: full vector
         mask = np.uint64(vector_len - 1)
@@ -71,17 +79,20 @@ def generate_batch_data(
     Args:
         n_samples: Number of samples to generate.
         dim: Dimension of each sample (2^num_qubits for amplitude encoding).
-        encoding_method: Either "amplitude" or "basis".
+        encoding_method: "amplitude", "angle", or "basis".
         seed: Random seed for reproducibility.
 
     Returns:
-        NumPy array of shape (n_samples, dim) for amplitude encoding
+        NumPy array of shape (n_samples, dim) for amplitude/angle encoding
         or (n_samples, 1) for basis encoding.
     """
     np.random.seed(seed)
     if encoding_method == "basis":
         # Basis encoding: single index per sample
         return np.random.randint(0, dim, size=(n_samples, 
1)).astype(np.float64)
+    if encoding_method == "angle":
+        # Angle encoding: per-qubit angles in [0, 2*pi)
+        return (np.random.rand(n_samples, dim) * (2.0 * 
np.pi)).astype(np.float64)
     else:
         # Amplitude encoding: full vectors
         return np.random.rand(n_samples, dim).astype(np.float64)
@@ -95,13 +106,13 @@ def normalize_batch(
 
     Args:
         batch: NumPy array of shape (batch_size, vector_len).
-        encoding_method: Either "amplitude" or "basis".
+        encoding_method: "amplitude", "angle", or "basis".
 
     Returns:
-        Normalized batch. For basis encoding, returns the input unchanged.
+        Normalized batch. For basis/angle encoding, returns the input 
unchanged.
     """
-    if encoding_method == "basis":
-        # Basis encoding doesn't need normalization (indices)
+    if encoding_method in ("basis", "angle"):
+        # Basis/angle encodings don't need normalization
         return batch
     # Amplitude encoding: normalize vectors
     norms = np.linalg.norm(batch, axis=1, keepdims=True)
@@ -117,13 +128,13 @@ def normalize_batch_torch(
 
     Args:
         batch: PyTorch tensor of shape (batch_size, vector_len).
-        encoding_method: Either "amplitude" or "basis".
+        encoding_method: "amplitude", "angle", or "basis".
 
     Returns:
-        Normalized batch. For basis encoding, returns the input unchanged.
+        Normalized batch. For basis/angle encoding, returns the input 
unchanged.
     """
-    if encoding_method == "basis":
-        # Basis encoding doesn't need normalization (indices)
+    if encoding_method in ("basis", "angle"):
+        # Basis/angle encodings don't need normalization
         return batch
     # Amplitude encoding: normalize vectors
     norms = torch.norm(batch, dim=1, keepdim=True)
@@ -146,9 +157,9 @@ def prefetched_batches(
     Args:
         total_batches: Total number of batches to generate.
         batch_size: Number of samples per batch.
-        vector_len: Length of each vector (2^num_qubits).
+        vector_len: Length of each vector (2^num_qubits for amplitude, 
num_qubits for angle).
         prefetch: Number of batches to prefetch.
-        encoding_method: Either "amplitude" or "basis".
+        encoding_method: "amplitude", "angle", or "basis".
 
     Yields:
         NumPy arrays of shape (batch_size, vector_len) or (batch_size, 1).
@@ -189,9 +200,9 @@ def prefetched_batches_torch(
     Args:
         total_batches: Total number of batches to generate.
         batch_size: Number of samples per batch.
-        vector_len: Length of each vector (2^num_qubits).
+        vector_len: Length of each vector (2^num_qubits for amplitude, 
num_qubits for angle).
         prefetch: Number of batches to prefetch.
-        encoding_method: Either "amplitude" or "basis".
+        encoding_method: "amplitude", "angle", or "basis".
 
     Yields:
         PyTorch tensors of shape (batch_size, vector_len) or (batch_size, 1).

Reply via email to