(mahout) branch main updated: feat(qdp): Encoding + Dtype enums, static encoder dispatch (#1276)

hcr Mon, 11 May 2026 03:24:49 -0700

This is an automated email from the ASF dual-hosted git repository.

ryankert01 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/mahout.git



The following commit(s) were added to refs/heads/main by this push:
     new 6cfbf10ec feat(qdp): Encoding + Dtype enums, static encoder dispatch 
(#1276)
6cfbf10ec is described below

commit 6cfbf10ecb023ddb870c5fe80623b0893442f6ac
Author: KUAN-HAO HUANG <[email protected]>
AuthorDate: Mon May 11 18:24:28 2026 +0800

    feat(qdp): Encoding + Dtype enums, static encoder dispatch (#1276)
---
 qdp/DEVELOPMENT.md                           |   6 +
 qdp/qdp-core/src/encoding/mod.rs             |  12 +-
 qdp/qdp-core/src/gpu/encodings/iqp.rs        |  16 ++
 qdp/qdp-core/src/gpu/encodings/mod.rs        |  20 +-
 qdp/qdp-core/src/gpu/mod.rs                  |   2 +-
 qdp/qdp-core/src/lib.rs                      |  55 +++++-
 qdp/qdp-core/src/pipeline_runner.rs          | 232 +++++++++++-----------
 qdp/qdp-core/src/types.rs                    | 160 +++++++++++++++
 qdp/qdp-core/tests/gpu_angle_encoding.rs     |  22 +++
 qdp/qdp-core/tests/gpu_iqp_encoding.rs       |   4 +-
 qdp/qdp-core/tests/gpu_validation.rs         |   4 +-
 qdp/qdp-core/tests/types.rs                  |  73 +++++++
 qdp/qdp-python/README.md                     |  10 +
 qdp/qdp-python/qumat_qdp/api.py              |   4 +-
 qdp/qdp-python/qumat_qdp/loader.py           |  15 ++
 qdp/qdp-python/src/engine.rs                 | 283 +++++++--------------------
 qdp/qdp-python/src/lib.rs                    |  10 +-
 qdp/qdp-python/src/loader.rs                 |  16 +-
 qdp/qdp-python/src/pytorch.rs                |  42 ++--
 testing/qdp/test_bindings.py                 |   6 +-
 testing/qdp_python/test_dlpack_validation.py |   4 +-
 21 files changed, 584 insertions(+), 412 deletions(-)

diff --git a/qdp/DEVELOPMENT.md b/qdp/DEVELOPMENT.md
index fc927022d..42ad61c4b 100644
--- a/qdp/DEVELOPMENT.md
+++ b/qdp/DEVELOPMENT.md
@@ -84,6 +84,12 @@ cargo test --workspace
 cd ..
 ```
 
+**Encoding / pipeline dtype:** `qdp_core::Encoding::supports_f32` gates whether
+`PipelineConfig::normalize()` keeps `dtype = Float32` for the synthetic 
pipeline. It reflects
+**which encoders implement `encode_batch_f32` today** (currently amplitude 
only), not every
+encoding that might eventually get a batch f32 path. When angle/basis gain 
real batch f32
+support, widen `supports_f32` and adjust tests accordingly.
+
 Run Python tests:
 
 ```bash
diff --git a/qdp/qdp-core/src/encoding/mod.rs b/qdp/qdp-core/src/encoding/mod.rs
index d795ca4a7..2d09b5146 100644
--- a/qdp/qdp-core/src/encoding/mod.rs
+++ b/qdp/qdp-core/src/encoding/mod.rs
@@ -63,6 +63,7 @@ use crate::dlpack::DLManagedTensor;
 use crate::gpu::PipelineContext;
 use crate::gpu::memory::{GpuStateVector, PinnedHostBuffer};
 use crate::reader::StreamingDataReader;
+use crate::types::Encoding;
 use crate::{MahoutError, QdpEngine, Result};
 
 /// 512MB staging buffer for large Parquet row groups (reduces fragmentation)
@@ -370,22 +371,23 @@ pub(crate) fn encode_from_parquet(
     num_qubits: usize,
     encoding_method: &str,
 ) -> Result<*mut DLManagedTensor> {
-    match encoding_method {
-        "amplitude" => {
+    let encoding = Encoding::from_str_ci(encoding_method)?;
+    match encoding {
+        Encoding::Amplitude => {
             crate::profile_scope!("Mahout::EncodeAmplitudeFromParquet");
             stream_encode(engine, path, num_qubits, 
amplitude::AmplitudeEncoder)
         }
-        "angle" => {
+        Encoding::Angle => {
             crate::profile_scope!("Mahout::EncodeAngleFromParquet");
             stream_encode(engine, path, num_qubits, angle::AngleEncoder)
         }
-        "basis" => {
+        Encoding::Basis => {
             crate::profile_scope!("Mahout::EncodeBasisFromParquet");
             stream_encode(engine, path, num_qubits, basis::BasisEncoder)
         }
         _ => Err(MahoutError::NotImplemented(format!(
             "Encoding method '{}' not supported for streaming",
-            encoding_method
+            encoding.as_str()
         ))),
     }
 }
diff --git a/qdp/qdp-core/src/gpu/encodings/iqp.rs 
b/qdp/qdp-core/src/gpu/encodings/iqp.rs
index c6ecf1762..33d18cfaf 100644
--- a/qdp/qdp-core/src/gpu/encodings/iqp.rs
+++ b/qdp/qdp-core/src/gpu/encodings/iqp.rs
@@ -23,6 +23,7 @@ use crate::error::{MahoutError, Result};
 use crate::gpu::memory::{GpuStateVector, Precision};
 use cudarc::driver::CudaDevice;
 use std::sync::Arc;
+use std::sync::OnceLock;
 
 #[cfg(target_os = "linux")]
 use crate::gpu::memory::map_allocation_error;
@@ -405,3 +406,18 @@ impl QuantumEncoder for IqpEncoder {
         }
     }
 }
+
+static IQP_FULL: OnceLock<IqpEncoder> = OnceLock::new();
+static IQP_Z_ONLY: OnceLock<IqpEncoder> = OnceLock::new();
+
+/// Shared `'static` IQP encoder (full ZZ). Used by 
[`crate::Encoding::encoder`](crate::Encoding::encoder).
+#[must_use]
+pub fn iqp_full_encoder() -> &'static IqpEncoder {
+    IQP_FULL.get_or_init(IqpEncoder::full)
+}
+
+/// Shared `'static` IQP-Z encoder. Used by 
[`crate::Encoding::encoder`](crate::Encoding::encoder).
+#[must_use]
+pub fn iqp_z_encoder() -> &'static IqpEncoder {
+    IQP_Z_ONLY.get_or_init(IqpEncoder::z_only)
+}
diff --git a/qdp/qdp-core/src/gpu/encodings/mod.rs 
b/qdp/qdp-core/src/gpu/encodings/mod.rs
index fa6362d4c..3f256e68a 100644
--- a/qdp/qdp-core/src/gpu/encodings/mod.rs
+++ b/qdp/qdp-core/src/gpu/encodings/mod.rs
@@ -58,7 +58,7 @@ pub fn validate_qubit_count(num_qubits: usize) -> Result<()> {
 
 /// Quantum encoding strategy interface
 /// Implemented by: AmplitudeEncoder, AngleEncoder, BasisEncoder
-pub trait QuantumEncoder: Send + Sync {
+pub trait QuantumEncoder: Send + Sync + 'static {
     /// Encode classical data to quantum state on GPU
     fn encode(
         &self,
@@ -181,21 +181,5 @@ pub mod phase;
 pub use amplitude::AmplitudeEncoder;
 pub use angle::AngleEncoder;
 pub use basis::BasisEncoder;
-pub use iqp::IqpEncoder;
+pub use iqp::{IqpEncoder, iqp_full_encoder, iqp_z_encoder};
 pub use phase::PhaseEncoder;
-
-/// Create encoder by name: "amplitude", "angle", "basis", "iqp", or "iqp-z"
-pub fn get_encoder(name: &str) -> Result<Box<dyn QuantumEncoder>> {
-    match name.to_lowercase().as_str() {
-        "amplitude" => Ok(Box::new(AmplitudeEncoder)),
-        "angle" => Ok(Box::new(AngleEncoder)),
-        "basis" => Ok(Box::new(BasisEncoder)),
-        "iqp" => Ok(Box::new(IqpEncoder::full())),
-        "iqp-z" => Ok(Box::new(IqpEncoder::z_only())),
-        "phase" => Ok(Box::new(PhaseEncoder)),
-        _ => Err(crate::error::MahoutError::InvalidInput(format!(
-            "Unknown encoder: {}. Available: amplitude, angle, basis, iqp, 
iqp-z, phase",
-            name
-        ))),
-    }
-}
diff --git a/qdp/qdp-core/src/gpu/mod.rs b/qdp/qdp-core/src/gpu/mod.rs
index 7e16be7be..73c4d4628 100644
--- a/qdp/qdp-core/src/gpu/mod.rs
+++ b/qdp/qdp-core/src/gpu/mod.rs
@@ -31,7 +31,7 @@ pub(crate) mod cuda_ffi;
 
 #[cfg(target_os = "linux")]
 pub use buffer_pool::{PinnedBufferHandle, PinnedBufferPool};
-pub use encodings::{AmplitudeEncoder, AngleEncoder, BasisEncoder, 
QuantumEncoder, get_encoder};
+pub use encodings::{AmplitudeEncoder, AngleEncoder, BasisEncoder, 
QuantumEncoder};
 pub use memory::GpuStateVector;
 pub use pipeline::run_dual_stream_pipeline;
 
diff --git a/qdp/qdp-core/src/lib.rs b/qdp/qdp-core/src/lib.rs
index 799eb7b18..4828297d2 100644
--- a/qdp/qdp-core/src/lib.rs
+++ b/qdp/qdp-core/src/lib.rs
@@ -31,12 +31,14 @@ pub mod readers;
 #[cfg(feature = "remote-io")]
 pub mod remote;
 pub mod tf_proto;
+pub mod types;
 #[macro_use]
 mod profiling;
 
 pub use error::{MahoutError, Result, cuda_error_to_string};
 pub use gpu::memory::Precision;
 pub use reader::{NullHandling, handle_float64_nulls};
+pub use types::{Dtype, Encoding};
 
 // Throughput/latency pipeline runner: single path using QdpEngine and 
encode_batch in Rust.
 #[cfg(target_os = "linux")]
@@ -52,7 +54,6 @@ use std::ffi::c_void;
 use std::sync::Arc;
 
 use crate::dlpack::DLManagedTensor;
-use crate::gpu::get_encoder;
 use cudarc::driver::CudaDevice;
 
 #[cfg(target_os = "linux")]
@@ -160,7 +161,8 @@ impl QdpEngine {
     ) -> Result<*mut DLManagedTensor> {
         crate::profile_scope!("Mahout::Encode");
 
-        let encoder = get_encoder(encoding_method)?;
+        let encoding = Encoding::from_str_ci(encoding_method)?;
+        let encoder = encoding.encoder();
         let state_vector = encoder.encode(&self.device, data, num_qubits)?;
         let state_vector = state_vector.to_precision(&self.device, 
self.precision)?;
         let dlpack_ptr = {
@@ -205,10 +207,23 @@ impl QdpEngine {
         sample_size: usize,
         num_qubits: usize,
         encoding_method: &str,
+    ) -> Result<*mut DLManagedTensor> {
+        let encoding = Encoding::from_str_ci(encoding_method)?;
+        self.encode_batch_for_pipeline(batch_data, num_samples, sample_size, 
num_qubits, encoding)
+    }
+
+    /// Same as [`encode_batch`](Self::encode_batch) with a resolved 
[`Encoding`] (no string parse).
+    pub(crate) fn encode_batch_for_pipeline(
+        &self,
+        batch_data: &[f64],
+        num_samples: usize,
+        sample_size: usize,
+        num_qubits: usize,
+        encoding: Encoding,
     ) -> Result<*mut DLManagedTensor> {
         crate::profile_scope!("Mahout::EncodeBatch");
 
-        let encoder = get_encoder(encoding_method)?;
+        let encoder = encoding.encoder();
         let state_vector = encoder.encode_batch(
             &self.device,
             batch_data,
@@ -230,10 +245,29 @@ impl QdpEngine {
         sample_size: usize,
         num_qubits: usize,
         encoding_method: &str,
+    ) -> Result<*mut DLManagedTensor> {
+        let encoding = Encoding::from_str_ci(encoding_method)?;
+        self.encode_batch_f32_for_pipeline(
+            batch_data,
+            num_samples,
+            sample_size,
+            num_qubits,
+            encoding,
+        )
+    }
+
+    /// Same as [`encode_batch_f32`](Self::encode_batch_f32) with a resolved 
[`Encoding`].
+    pub(crate) fn encode_batch_f32_for_pipeline(
+        &self,
+        batch_data: &[f32],
+        num_samples: usize,
+        sample_size: usize,
+        num_qubits: usize,
+        encoding: Encoding,
     ) -> Result<*mut DLManagedTensor> {
         crate::profile_scope!("Mahout::EncodeBatchF32");
 
-        let encoder = get_encoder(encoding_method)?;
+        let encoder = encoding.encoder();
         let state_vector = encoder.encode_batch_f32(
             &self.device,
             batch_data,
@@ -263,8 +297,9 @@ impl QdpEngine {
         encoding_method: &str,
     ) -> Result<()> {
         crate::profile_scope!("Mahout::RunDualStreamEncode");
-        match encoding_method.to_lowercase().as_str() {
-            "amplitude" => {
+        let encoding = Encoding::from_str_ci(encoding_method)?;
+        match encoding {
+            Encoding::Amplitude => {
                 
gpu::encodings::amplitude::AmplitudeEncoder::run_amplitude_dual_stream_pipeline(
                     &self.device,
                     host_data,
@@ -273,7 +308,7 @@ impl QdpEngine {
             }
             _ => Err(MahoutError::InvalidInput(format!(
                 "run_dual_stream_encode supports only 'amplitude' for now, got 
'{}'",
-                encoding_method
+                encoding.as_str()
             ))),
         }
     }
@@ -507,7 +542,8 @@ impl QdpEngine {
 
         validate_cuda_input_ptr(&self.device, input_d)?;
 
-        let encoder = get_encoder(encoding_method)?;
+        let encoding = Encoding::from_str_ci(encoding_method)?;
+        let encoder = encoding.encoder();
         let state_vector = unsafe {
             encoder.encode_from_gpu_ptr(&self.device, input_d, input_len, 
num_qubits, stream)
         }?;
@@ -913,7 +949,8 @@ impl QdpEngine {
 
         validate_cuda_input_ptr(&self.device, input_batch_d)?;
 
-        let encoder = get_encoder(encoding_method)?;
+        let encoding = Encoding::from_str_ci(encoding_method)?;
+        let encoder = encoding.encoder();
         let batch_state_vector = unsafe {
             encoder.encode_batch_from_gpu_ptr(
                 &self.device,
diff --git a/qdp/qdp-core/src/pipeline_runner.rs 
b/qdp/qdp-core/src/pipeline_runner.rs
index fc19dd6a3..bfbf4bc81 100644
--- a/qdp/qdp-core/src/pipeline_runner.rs
+++ b/qdp/qdp-core/src/pipeline_runner.rs
@@ -24,9 +24,11 @@ use std::time::Instant;
 use crate::QdpEngine;
 use crate::dlpack::DLManagedTensor;
 use crate::error::{MahoutError, Result};
+use crate::gpu::memory::Precision;
 use crate::io;
 use crate::reader::{NullHandling, StreamingDataReader};
 use crate::readers::ParquetStreamingReader;
+use crate::types::Encoding;
 
 /// Configuration for throughput/latency pipeline runs (Python 
run_throughput_pipeline_py).
 #[derive(Clone, Debug)]
@@ -35,24 +37,31 @@ pub struct PipelineConfig {
     pub num_qubits: u32,
     pub batch_size: usize,
     pub total_batches: usize,
-    pub encoding_method: String,
+    pub encoding: Encoding,
     pub seed: Option<u64>,
     pub warmup_batches: usize,
     pub null_handling: NullHandling,
-    pub float32_pipeline: bool,
+    /// Pipeline element dtype for synthetic batch fill and `encode_batch` 
dispatch.
+    ///
+    /// If [`Encoding::supports_f32`](crate::types::Encoding::supports_f32) is 
false for the
+    /// chosen [`encoding`](PipelineConfig::encoding), 
[`normalize`](PipelineConfig::normalize)
+    /// downgrades this to [`Precision::Float64`] (see `types` module docs: 
batch f32 is wired
+    /// only for encodings with a real `encode_batch_f32` today).
+    pub dtype: Precision,
     pub prefetch_depth: usize,
 }
 
 impl PipelineConfig {
-    /// Normalizes the configuration, such as falling back to f64 if f32 is 
requested
-    /// but the encoding doesn't support it.
+    /// Normalizes the configuration: if `dtype` is float32 but the encoding 
cannot use the
+    /// f32 batch encode path 
([`Encoding::supports_f32`](crate::types::Encoding::supports_f32)),
+    /// falls back to float64.
     pub fn normalize(&mut self) {
-        if self.float32_pipeline && 
!encoding_supports_f32(&self.encoding_method) {
+        if matches!(self.dtype, Precision::Float32) && 
!self.encoding.supports_f32() {
             log::info!(
-                "float32_pipeline requested but encoding '{}' does not support 
f32; falling back to f64",
-                self.encoding_method
+                "float32 pipeline requested but encoding '{}' does not support 
f32; falling back to f64",
+                self.encoding.as_str()
             );
-            self.float32_pipeline = false;
+            self.dtype = Precision::Float64;
         }
     }
 }
@@ -64,11 +73,11 @@ impl Default for PipelineConfig {
             num_qubits: 16,
             batch_size: 64,
             total_batches: 100,
-            encoding_method: "amplitude".to_string(),
+            encoding: Encoding::Amplitude,
             seed: None,
             warmup_batches: 0,
             null_handling: NullHandling::FillZero,
-            float32_pipeline: false,
+            dtype: Precision::Float64,
             prefetch_depth: 16,
         }
     }
@@ -99,12 +108,6 @@ pub trait BatchProducer: Send + 'static {
     fn produce(&mut self, recycled: Option<BatchData>) -> 
Result<Option<PrefetchedBatch>>;
 }
 
-/// Returns true if the given encoding method has a native f32 GPU kernel.
-/// Used to auto-gate `float32_pipeline` so unsupported encodings fall back to 
f64.
-fn encoding_supports_f32(encoding_method: &str) -> bool {
-    matches!(encoding_method.to_lowercase().as_str(), "amplitude")
-}
-
 pub struct SyntheticProducer {
     pub config: PipelineConfig,
     pub vector_len: usize,
@@ -131,16 +134,16 @@ impl BatchProducer for SyntheticProducer {
         }
 
         let mut data = match recycled {
-            Some(BatchData::F32(mut buf)) if self.config.float32_pipeline => {
+            Some(BatchData::F32(mut buf)) if matches!(self.config.dtype, 
Precision::Float32) => {
                 buf.resize(self.config.batch_size * self.vector_len, 0.0);
                 BatchData::F32(buf)
             }
-            Some(BatchData::F64(mut buf)) if !self.config.float32_pipeline => {
+            Some(BatchData::F64(mut buf)) if matches!(self.config.dtype, 
Precision::Float64) => {
                 buf.resize(self.config.batch_size * self.vector_len, 0.0);
                 BatchData::F64(buf)
             }
             _ => {
-                if self.config.float32_pipeline {
+                if matches!(self.config.dtype, Precision::Float32) {
                     BatchData::F32(vec![0.0f32; self.config.batch_size * 
self.vector_len])
                 } else {
                     BatchData::F64(vec![0.0f64; self.config.batch_size * 
self.vector_len])
@@ -366,7 +369,7 @@ pub struct PipelineIterator {
 impl PipelineIterator {
     pub fn new_synthetic(engine: QdpEngine, mut config: PipelineConfig) -> 
Result<Self> {
         config.normalize();
-        let vector_len = vector_len(config.num_qubits, 
&config.encoding_method);
+        let vector_len = vector_len(config.num_qubits, config.encoding);
         let producer = SyntheticProducer::new(config.clone(), vector_len);
         let prefetch_depth = config.prefetch_depth;
         let (rx, recycle_tx, _producer_handle) = spawn_producer(producer, 
prefetch_depth)?;
@@ -393,13 +396,16 @@ impl PipelineIterator {
         config.normalize();
         let path = path.as_ref();
         let (data, num_samples, sample_size) = read_file_by_extension(path, 
config.null_handling)?;
-        let vector_len = vector_len(config.num_qubits, 
&config.encoding_method);
+        let vector_len = vector_len(config.num_qubits, config.encoding);
 
         // Dimension validation at construction.
         if sample_size != vector_len {
             return Err(MahoutError::InvalidInput(format!(
                 "File feature length {} does not match vector_len {} for 
num_qubits={}, encoding={}",
-                sample_size, vector_len, config.num_qubits, 
config.encoding_method
+                sample_size,
+                vector_len,
+                config.num_qubits,
+                config.encoding.as_str()
             )));
         }
         if data.len() != num_samples * sample_size {
@@ -454,7 +460,7 @@ impl PipelineIterator {
             Some(DEFAULT_PARQUET_ROW_GROUP_SIZE),
             config.null_handling,
         )?;
-        let vector_len = vector_len(config.num_qubits, 
&config.encoding_method);
+        let vector_len = vector_len(config.num_qubits, config.encoding);
 
         // Read first chunk to learn sample_size; reuse as initial buffer.
         const INITIAL_CHUNK_CAP: usize = 64 * 1024;
@@ -474,7 +480,10 @@ impl PipelineIterator {
         if sample_size != vector_len {
             return Err(MahoutError::InvalidInput(format!(
                 "File feature length {} does not match vector_len {} for 
num_qubits={}, encoding={}",
-                sample_size, vector_len, config.num_qubits, 
config.encoding_method
+                sample_size,
+                vector_len,
+                config.num_qubits,
+                config.encoding.as_str()
             )));
         }
 
@@ -511,19 +520,19 @@ impl PipelineIterator {
             Err(_) => return Ok(None),
         };
         let ptr = match &batch.data {
-            BatchData::F64(buf) => self.engine.encode_batch(
+            BatchData::F64(buf) => self.engine.encode_batch_for_pipeline(
                 buf,
                 batch.batch_n,
                 batch.sample_size,
                 batch.num_qubits,
-                &self.config.encoding_method,
+                self.config.encoding,
             )?,
-            BatchData::F32(buf) => self.engine.encode_batch_f32(
+            BatchData::F32(buf) => self.engine.encode_batch_f32_for_pipeline(
                 buf,
                 batch.batch_n,
                 batch.sample_size,
                 batch.num_qubits,
-                &self.config.encoding_method,
+                self.config.encoding,
             )?,
         };
         let _ = self.recycle_tx.lock().unwrap().send(batch.data);
@@ -532,47 +541,32 @@ impl PipelineIterator {
 }
 
 /// Vector length per sample for given encoding (used by pipeline and 
iterator).
-pub fn vector_len(num_qubits: u32, encoding_method: &str) -> usize {
-    let n = num_qubits as usize;
-    match encoding_method.to_lowercase().as_str() {
-        "angle" => n,
-        "basis" => 1,
-        "iqp-z" => n,
-        "iqp" => n + n.saturating_mul(n.saturating_sub(1)) / 2,
-        _ => 1 << n, // amplitude
-    }
+pub fn vector_len(num_qubits: u32, encoding: Encoding) -> usize {
+    encoding.vector_len(num_qubits)
 }
 
-/// Deterministic sample generation matching Python benchmark helpers.
-fn fill_sample(seed: u64, out: &mut [f64], encoding_method: &str, num_qubits: 
usize) -> Result<()> {
+/// Deterministic sample generation matching Python utils.build_sample.
+fn fill_sample(seed: u64, out: &mut [f64], encoding: Encoding, num_qubits: 
usize) -> Result<()> {
     let len = out.len();
     if len == 0 {
         return Ok(());
     }
-    match encoding_method.to_lowercase().as_str() {
-        "basis" => {
+    match encoding {
+        Encoding::Basis => {
             // For basis encoding, use 2^num_qubits as the state space size 
for mask calculation
             let state_space_size = 1 << num_qubits;
             let mask = (state_space_size - 1) as u64;
             let idx = seed & mask;
             out[0] = idx as f64;
         }
-        "angle" => {
-            let scale = (2.0 * PI) / len as f64;
-            for (i, v) in out.iter_mut().enumerate() {
-                let mixed = (i as u64 + seed) % (len as u64);
-                *v = mixed as f64 * scale;
-            }
-        }
-        "iqp-z" | "iqp" => {
+        Encoding::Angle | Encoding::Iqp | Encoding::IqpZ | Encoding::Phase => {
             let scale = (2.0 * PI) / len as f64;
             for (i, v) in out.iter_mut().enumerate() {
                 let mixed = (i as u64 + seed) % (len as u64);
                 *v = mixed as f64 * scale;
             }
         }
-        _ => {
-            // amplitude
+        Encoding::Amplitude => {
             let mask = (len - 1) as u64;
             let scale = 1.0 / len as f64;
             for (i, v) in out.iter_mut().enumerate() {
@@ -609,7 +603,7 @@ fn fill_batch_inplace(
         let _ = fill_sample(
             seed_base + i as u64,
             &mut batch_buf[offset..offset + vector_len],
-            &config.encoding_method,
+            config.encoding,
             config.num_qubits as usize,
         );
     }
@@ -619,36 +613,28 @@ fn fill_batch_inplace(
 fn fill_sample_f32(
     seed: u64,
     out: &mut [f32],
-    encoding_method: &str,
+    encoding: Encoding,
     num_qubits: usize,
 ) -> Result<()> {
     let len = out.len();
     if len == 0 {
         return Ok(());
     }
-    match encoding_method.to_lowercase().as_str() {
-        "basis" => {
+    match encoding {
+        Encoding::Basis => {
             let state_space_size = 1 << num_qubits;
             let mask = (state_space_size - 1) as u64;
             let idx = seed & mask;
             out[0] = idx as f32;
         }
-        "angle" => {
-            let scale = (2.0 * std::f32::consts::PI) / len as f32;
-            for (i, v) in out.iter_mut().enumerate() {
-                let mixed = (i as u64 + seed) % (len as u64);
-                *v = mixed as f32 * scale;
-            }
-        }
-        "iqp-z" | "iqp" => {
+        Encoding::Angle | Encoding::Iqp | Encoding::IqpZ | Encoding::Phase => {
             let scale = (2.0 * std::f32::consts::PI) / len as f32;
             for (i, v) in out.iter_mut().enumerate() {
                 let mixed = (i as u64 + seed) % (len as u64);
                 *v = mixed as f32 * scale;
             }
         }
-        _ => {
-            // amplitude
+        Encoding::Amplitude => {
             let mask = (len - 1) as u64;
             let scale = 1.0 / len as f32;
             for (i, v) in out.iter_mut().enumerate() {
@@ -676,7 +662,7 @@ fn fill_batch_inplace_f32(
         let _ = fill_sample_f32(
             seed_base + i as u64,
             &mut batch_buf[offset..offset + vector_len],
-            &config.encoding_method,
+            config.encoding,
             config.num_qubits as usize,
         );
     }
@@ -699,20 +685,20 @@ pub fn run_throughput_pipeline(config: &PipelineConfig) 
-> Result<PipelineRunRes
     config.normalize();
 
     let engine = QdpEngine::new(config.device_id)?;
-    let vector_len = vector_len(config.num_qubits, &config.encoding_method);
+    let vector_len = vector_len(config.num_qubits, config.encoding);
     let num_qubits = config.num_qubits as usize;
 
     // Warmup
-    if config.float32_pipeline {
+    if matches!(config.dtype, Precision::Float32) {
         let mut batch_buf = vec![0.0f32; config.batch_size * vector_len];
         for b in 0..config.warmup_batches {
             fill_batch_inplace_f32(&config, b, vector_len, &mut batch_buf);
-            let ptr = engine.encode_batch_f32(
+            let ptr = engine.encode_batch_f32_for_pipeline(
                 &batch_buf,
                 config.batch_size,
                 vector_len,
                 num_qubits,
-                &config.encoding_method,
+                config.encoding,
             )?;
             unsafe { release_dlpack(ptr) };
         }
@@ -720,12 +706,12 @@ pub fn run_throughput_pipeline(config: &PipelineConfig) 
-> Result<PipelineRunRes
         let mut batch_buf = vec![0.0f64; config.batch_size * vector_len];
         for b in 0..config.warmup_batches {
             fill_batch_inplace(&config, b, vector_len, &mut batch_buf);
-            let ptr = engine.encode_batch(
+            let ptr = engine.encode_batch_for_pipeline(
                 &batch_buf,
                 config.batch_size,
                 vector_len,
                 num_qubits,
-                &config.encoding_method,
+                config.encoding,
             )?;
             unsafe { release_dlpack(ptr) };
         }
@@ -742,19 +728,19 @@ pub fn run_throughput_pipeline(config: &PipelineConfig) 
-> Result<PipelineRunRes
     while let Ok(result) = rx.recv() {
         let batch = result?;
         let ptr = match &batch.data {
-            BatchData::F64(buf) => engine.encode_batch(
+            BatchData::F64(buf) => engine.encode_batch_for_pipeline(
                 buf,
                 batch.batch_n,
                 batch.sample_size,
                 batch.num_qubits,
-                &config.encoding_method,
+                config.encoding,
             )?,
-            BatchData::F32(buf) => engine.encode_batch_f32(
+            BatchData::F32(buf) => engine.encode_batch_f32_for_pipeline(
                 buf,
                 batch.batch_n,
                 batch.sample_size,
                 batch.num_qubits,
-                &config.encoding_method,
+                config.encoding,
             )?,
         };
         unsafe { release_dlpack(ptr) };
@@ -797,12 +783,12 @@ mod tests {
         let config = PipelineConfig {
             num_qubits: 5,
             batch_size: 8,
-            encoding_method: encoding_method.to_string(),
+            encoding: Encoding::from_str_ci(encoding_method).unwrap(),
             seed: Some(123),
             ..Default::default()
         };
 
-        let vector_len = vector_len(config.num_qubits, 
&config.encoding_method);
+        let vector_len = vector_len(config.num_qubits, config.encoding);
 
         // Test edge cases: 0 and batch_size-1
         for batch_idx in [0, config.batch_size - 1, 7] {
@@ -818,12 +804,12 @@ mod tests {
         let config = PipelineConfig {
             num_qubits: 5,
             batch_size: 8,
-            encoding_method: encoding_method.to_string(),
+            encoding: Encoding::from_str_ci(encoding_method).unwrap(),
             seed: Some(123),
             ..Default::default()
         };
 
-        let vector_len = vector_len(config.num_qubits, 
&config.encoding_method);
+        let vector_len = vector_len(config.num_qubits, config.encoding);
 
         let batch0 = generate_batch(&config, 0, vector_len);
         let batch1 = generate_batch(&config, 1, vector_len);
@@ -885,12 +871,12 @@ mod tests {
         let config = PipelineConfig {
             num_qubits: 5,
             batch_size: 8,
-            encoding_method: "amplitude".to_string(),
+            encoding: Encoding::Amplitude,
             seed: None,
             ..Default::default()
         };
 
-        let vector_len = vector_len(config.num_qubits, 
&config.encoding_method);
+        let vector_len = vector_len(config.num_qubits, config.encoding);
         let batch = generate_batch(&config, 0, vector_len);
         assert_eq!(batch.len(), config.batch_size * vector_len);
 
@@ -904,12 +890,12 @@ mod tests {
         let config = PipelineConfig {
             num_qubits: 5,
             batch_size: 1,
-            encoding_method: "amplitude".to_string(),
+            encoding: Encoding::Amplitude,
             seed: Some(123),
             ..Default::default()
         };
 
-        let vector_len = vector_len(config.num_qubits, 
&config.encoding_method);
+        let vector_len = vector_len(config.num_qubits, config.encoding);
         let batch = generate_batch(&config, 0, vector_len);
         assert_eq!(batch.len(), vector_len);
 
@@ -927,7 +913,7 @@ mod tests {
         let config_lower = PipelineConfig {
             num_qubits: 5,
             batch_size: 8,
-            encoding_method: "amplitude".to_string(),
+            encoding: Encoding::Amplitude,
             seed: Some(123),
             ..Default::default()
         };
@@ -935,12 +921,12 @@ mod tests {
         let config_upper = PipelineConfig {
             num_qubits: 5,
             batch_size: 8,
-            encoding_method: "AMPLITUDE".to_string(),
+            encoding: Encoding::from_str_ci("AMPLITUDE").unwrap(),
             seed: Some(123),
             ..Default::default()
         };
 
-        let vector_len = vector_len(config_lower.num_qubits, 
&config_lower.encoding_method);
+        let vector_len = vector_len(config_lower.num_qubits, 
config_lower.encoding);
         let batch_lower = generate_batch(&config_lower, 0, vector_len);
         let batch_upper = generate_batch(&config_upper, 0, vector_len);
         assert_eq!(batch_lower, batch_upper);
@@ -951,12 +937,12 @@ mod tests {
         let config = PipelineConfig {
             num_qubits: 5,
             batch_size: 8,
-            encoding_method: "amplitude".to_string(),
+            encoding: Encoding::Amplitude,
             seed: Some(123),
             ..Default::default()
         };
 
-        let vector_len = vector_len(config.num_qubits, 
&config.encoding_method);
+        let vector_len = vector_len(config.num_qubits, config.encoding);
 
         for batch_idx in 0..5 {
             let batch = generate_batch(&config, batch_idx, vector_len);
@@ -976,12 +962,12 @@ mod tests {
         let config = PipelineConfig {
             num_qubits: 5,
             batch_size: 8,
-            encoding_method: "amplitude".to_string(),
+            encoding: Encoding::Amplitude,
             seed: None,
             ..Default::default()
         };
 
-        let vector_len = vector_len(config.num_qubits, 
&config.encoding_method);
+        let vector_len = vector_len(config.num_qubits, config.encoding);
         let batch = generate_batch(&config, 0, vector_len);
 
         for &value in &batch {
@@ -998,12 +984,12 @@ mod tests {
         let config = PipelineConfig {
             num_qubits: 5,
             batch_size: 1,
-            encoding_method: "amplitude".to_string(),
+            encoding: Encoding::Amplitude,
             seed: Some(123),
             ..Default::default()
         };
 
-        let vector_len = vector_len(config.num_qubits, 
&config.encoding_method);
+        let vector_len = vector_len(config.num_qubits, config.encoding);
         let batch = generate_batch(&config, 0, vector_len);
 
         for &value in &batch {
@@ -1020,10 +1006,10 @@ mod tests {
             total_batches: 5,
             num_qubits: 3,
             batch_size: 4,
-            encoding_method: "amplitude".to_string(),
+            encoding: Encoding::Amplitude,
             ..Default::default()
         };
-        let vector_len = super::vector_len(config.num_qubits, 
&config.encoding_method);
+        let vector_len = super::vector_len(config.num_qubits, config.encoding);
         let mut producer = SyntheticProducer::new(config, vector_len);
 
         let mut count = 0;
@@ -1039,10 +1025,10 @@ mod tests {
             total_batches: 1,
             num_qubits: 3,
             batch_size: 4,
-            encoding_method: "amplitude".to_string(),
+            encoding: Encoding::Amplitude,
             ..Default::default()
         };
-        let vector_len = super::vector_len(config.num_qubits, 
&config.encoding_method);
+        let vector_len = super::vector_len(config.num_qubits, config.encoding);
         let mut producer = SyntheticProducer::new(config.clone(), vector_len);
 
         let batch_from_producer = producer.produce(None).unwrap().unwrap();
@@ -1056,7 +1042,7 @@ mod tests {
         let config = PipelineConfig {
             batch_size: 5,
             num_qubits: 2,
-            encoding_method: "amplitude".to_string(),
+            encoding: Encoding::Amplitude,
             ..Default::default()
         };
         let sample_size = 4; // 2^2
@@ -1086,7 +1072,7 @@ mod tests {
             prefetch_depth: 16,
             ..Default::default()
         };
-        let vector_len = super::vector_len(config.num_qubits, 
&config.encoding_method);
+        let vector_len = super::vector_len(config.num_qubits, config.encoding);
         let producer = SyntheticProducer::new(config, vector_len);
 
         let (rx, _recycle_tx, handle) = spawn_producer(producer, 16).unwrap();
@@ -1108,7 +1094,7 @@ mod tests {
             prefetch_depth: 16,
             ..Default::default()
         };
-        let vector_len = super::vector_len(config.num_qubits, 
&config.encoding_method);
+        let vector_len = super::vector_len(config.num_qubits, config.encoding);
         let producer = SyntheticProducer::new(config, vector_len);
 
         let (rx, _recycle_tx, handle) = spawn_producer(producer, 16).unwrap();
@@ -1129,18 +1115,18 @@ mod tests {
             total_batches: 2,
             num_qubits: 3,
             batch_size: 4,
-            encoding_method: "amplitude".to_string(),
-            float32_pipeline: true,
+            encoding: Encoding::Amplitude,
+            dtype: Precision::Float32,
             ..Default::default()
         };
         config.normalize();
-        let vector_len = super::vector_len(config.num_qubits, 
&config.encoding_method);
+        let vector_len = super::vector_len(config.num_qubits, config.encoding);
         let mut producer = SyntheticProducer::new(config, vector_len);
 
         let batch = producer.produce(None).unwrap().unwrap();
         assert!(
             matches!(batch.data, BatchData::F32(_)),
-            "amplitude with float32_pipeline=true should produce F32 data"
+            "amplitude with dtype=Float32 should produce F32 data"
         );
 
         // Verify data is non-zero (was actually filled)
@@ -1158,18 +1144,18 @@ mod tests {
             total_batches: 1,
             num_qubits: 3,
             batch_size: 4,
-            encoding_method: "angle".to_string(),
-            float32_pipeline: true, // requested f32, but angle doesn't 
support it
+            encoding: Encoding::Angle,
+            dtype: Precision::Float32, // requested f32, but angle doesn't 
support native f32 batch path
             ..Default::default()
         };
         config.normalize();
-        let vector_len = super::vector_len(config.num_qubits, 
&config.encoding_method);
+        let vector_len = super::vector_len(config.num_qubits, config.encoding);
         let mut producer = SyntheticProducer::new(config, vector_len);
 
         let batch = producer.produce(None).unwrap().unwrap();
         assert!(
             matches!(batch.data, BatchData::F64(_)),
-            "angle with float32_pipeline=true should fall back to F64 data"
+            "angle with requested Float32 should fall back to F64 batch data 
(no encode_batch_f32 yet)"
         );
     }
 
@@ -1179,36 +1165,36 @@ mod tests {
             total_batches: 1,
             num_qubits: 3,
             batch_size: 4,
-            encoding_method: "basis".to_string(),
-            float32_pipeline: true,
+            encoding: Encoding::Basis,
+            dtype: Precision::Float32,
             ..Default::default()
         };
         config.normalize();
-        let vector_len = super::vector_len(config.num_qubits, 
&config.encoding_method);
+        let vector_len = super::vector_len(config.num_qubits, config.encoding);
         let mut producer = SyntheticProducer::new(config, vector_len);
 
         let batch = producer.produce(None).unwrap().unwrap();
         assert!(
             matches!(batch.data, BatchData::F64(_)),
-            "basis with float32_pipeline=true should fall back to F64 data"
+            "basis with requested Float32 should fall back to F64 batch data 
(no encode_batch_f32 yet)"
         );
     }
 
     #[test]
     fn test_encoding_supports_f32() {
-        assert!(super::encoding_supports_f32("amplitude"));
-        assert!(super::encoding_supports_f32("Amplitude"));
-        assert!(super::encoding_supports_f32("AMPLITUDE"));
-        assert!(!super::encoding_supports_f32("angle"));
-        assert!(!super::encoding_supports_f32("basis"));
-        assert!(!super::encoding_supports_f32("iqp-z"));
-        assert!(!super::encoding_supports_f32("iqp"));
+        assert!(Encoding::Amplitude.supports_f32());
+        assert!(Encoding::from_str_ci("Amplitude").unwrap().supports_f32());
+        assert!(Encoding::from_str_ci("AMPLITUDE").unwrap().supports_f32());
+        assert!(!Encoding::Angle.supports_f32());
+        assert!(!Encoding::Basis.supports_f32());
+        assert!(!Encoding::Iqp.supports_f32());
+        assert!(!Encoding::IqpZ.supports_f32());
     }
 
     #[test]
     fn test_vector_len_for_iqp_variants() {
-        assert_eq!(super::vector_len(4, "iqp-z"), 4);
-        assert_eq!(super::vector_len(4, "iqp"), 10);
+        assert_eq!(super::vector_len(4, Encoding::IqpZ), 4);
+        assert_eq!(super::vector_len(4, Encoding::Iqp), 10);
     }
 
     #[test]
@@ -1216,12 +1202,12 @@ mod tests {
         let config = PipelineConfig {
             num_qubits: 4,
             batch_size: 3,
-            encoding_method: "iqp".to_string(),
+            encoding: Encoding::Iqp,
             seed: Some(7),
             ..Default::default()
         };
 
-        let vector_len = super::vector_len(config.num_qubits, 
&config.encoding_method);
+        let vector_len = super::vector_len(config.num_qubits, config.encoding);
         let batch = generate_batch(&config, 0, vector_len);
         let upper = 2.0 * PI;
         for &value in &batch {
diff --git a/qdp/qdp-core/src/types.rs b/qdp/qdp-core/src/types.rs
new file mode 100644
index 000000000..f8a98834b
--- /dev/null
+++ b/qdp/qdp-core/src/types.rs
@@ -0,0 +1,160 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Canonical domain types for encodings and element dtypes (`Dtype`).
+//!
+//! ## `Encoding::supports_f32`
+//!
+//! A future shape of this API may return true for amplitude, angle, and basis 
once each encoder
+//! has a batch float32 GPU path. **Today only amplitude implements**
+//! [`QuantumEncoder::encode_batch_f32`] for the synthetic prefetch pipeline, 
so
+//! [`Encoding::supports_f32`](Encoding::supports_f32) stays amplitude-only and
+//! [`crate::pipeline_runner::PipelineConfig::normalize`] avoids routing other 
encodings through
+//! `encode_batch_f32`. Widen this method when angle/basis gain real 
`encode_batch_f32`
+//! implementations.
+
+use crate::error::{MahoutError, Result};
+use crate::gpu::encodings::{
+    AmplitudeEncoder, AngleEncoder, BasisEncoder, PhaseEncoder, 
QuantumEncoder, iqp_full_encoder,
+    iqp_z_encoder,
+};
+
+/// Dtype for pipeline configuration (re-export of 
[`crate::gpu::memory::Precision`]).
+pub use crate::gpu::memory::Precision as Dtype;
+
+impl crate::gpu::memory::Precision {
+    /// Parse dtype from a short user string (case-insensitive, trimmed).
+    pub fn from_str_ci(s: &str) -> Result<Self> {
+        let t = s.trim();
+        if t.eq_ignore_ascii_case("f32")
+            || t.eq_ignore_ascii_case("float32")
+            || t.eq_ignore_ascii_case("float")
+        {
+            Ok(Self::Float32)
+        } else if t.eq_ignore_ascii_case("f64")
+            || t.eq_ignore_ascii_case("float64")
+            || t.eq_ignore_ascii_case("double")
+        {
+            Ok(Self::Float64)
+        } else {
+            Err(MahoutError::InvalidInput(format!(
+                "Unknown dtype: {s}. Use 'f32' or 'f64'."
+            )))
+        }
+    }
+
+    /// Element size in bytes for real scalar components (f32/f64).
+    #[must_use]
+    pub const fn bytes(self) -> usize {
+        match self {
+            Self::Float32 => 4,
+            Self::Float64 => 8,
+        }
+    }
+}
+
+/// Quantum encoding method (canonical; parse user strings once at API 
boundaries).
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum Encoding {
+    Amplitude,
+    Angle,
+    Basis,
+    Iqp,
+    IqpZ,
+    Phase,
+}
+
+impl Encoding {
+    /// Parse encoding name (case-insensitive ASCII, stack buffer; no heap 
allocation).
+    pub fn from_str_ci(s: &str) -> Result<Self> {
+        let mut buf = [0u8; 16];
+        let bytes = s.as_bytes();
+        if bytes.len() > buf.len() {
+            return Err(MahoutError::InvalidInput(format!(
+                "Unknown encoding: {s}. Available: amplitude, angle, basis, 
iqp, iqp-z, phase"
+            )));
+        }
+        for (i, b) in bytes.iter().enumerate() {
+            buf[i] = b.to_ascii_lowercase();
+        }
+        match &buf[..bytes.len()] {
+            b"amplitude" => Ok(Self::Amplitude),
+            b"angle" => Ok(Self::Angle),
+            b"basis" => Ok(Self::Basis),
+            b"iqp" => Ok(Self::Iqp),
+            b"iqp-z" => Ok(Self::IqpZ),
+            b"phase" => Ok(Self::Phase),
+            _ => Err(MahoutError::InvalidInput(format!(
+                "Unknown encoding: {s}. Available: amplitude, angle, basis, 
iqp, iqp-z, phase"
+            ))),
+        }
+    }
+
+    #[must_use]
+    pub const fn as_str(self) -> &'static str {
+        match self {
+            Self::Amplitude => "amplitude",
+            Self::Angle => "angle",
+            Self::Basis => "basis",
+            Self::Iqp => "iqp",
+            Self::IqpZ => "iqp-z",
+            Self::Phase => "phase",
+        }
+    }
+
+    /// Input feature dimension per sample for this encoding and qubit count.
+    ///
+    /// Matches each encoder's `expected_data_len` / `sample_size` contract:
+    /// - `Amplitude`: full state vector (`2^n`)
+    /// - `Angle` / `IqpZ` / `Phase`: one value per qubit (`n`)
+    /// - `Iqp`: single-qubit + pairwise ZZ terms (`n + n*(n-1)/2`)
+    /// - `Basis`: single integer index (`1`)
+    #[must_use]
+    pub const fn vector_len(self, num_qubits: u32) -> usize {
+        let n = num_qubits as usize;
+        match self {
+            Self::Amplitude => 1 << n,
+            Self::Angle | Self::IqpZ | Self::Phase => n,
+            Self::Iqp => n + n * n.saturating_sub(1) / 2,
+            Self::Basis => 1,
+        }
+    }
+
+    /// Whether the **synthetic batch pipeline** may keep 
[`crate::gpu::memory::Precision::Float32`]
+    /// end-to-end (prefetched host `Vec<f32>` plus 
[`crate::QdpEngine::encode_batch_f32`]).
+    ///
+    /// This must match encoders that actually implement 
[`QuantumEncoder::encode_batch_f32`].
+    /// Long-term design may include angle/basis here; today only amplitude 
does, so angle/basis
+    /// still normalize to `Float64` in 
[`crate::pipeline_runner::PipelineConfig::normalize`]
+    /// until their batch f32 GPU paths exist in the encoder implementations.
+    #[must_use]
+    pub const fn supports_f32(self) -> bool {
+        matches!(self, Self::Amplitude)
+    }
+
+    /// Static encoder dispatch (no per-call heap allocation).
+    #[must_use]
+    pub fn encoder(self) -> &'static dyn QuantumEncoder {
+        match self {
+            Self::Amplitude => &AmplitudeEncoder,
+            Self::Angle => &AngleEncoder,
+            Self::Basis => &BasisEncoder,
+            Self::Iqp => iqp_full_encoder(),
+            Self::IqpZ => iqp_z_encoder(),
+            Self::Phase => &PhaseEncoder,
+        }
+    }
+}
diff --git a/qdp/qdp-core/tests/gpu_angle_encoding.rs 
b/qdp/qdp-core/tests/gpu_angle_encoding.rs
index 6b60d5153..dee51331b 100644
--- a/qdp/qdp-core/tests/gpu_angle_encoding.rs
+++ b/qdp/qdp-core/tests/gpu_angle_encoding.rs
@@ -114,6 +114,28 @@ fn test_angle_infinity_rejected() {
 
 // ---- Successful encoding (kernel launch path) ----
 
+/// Regression: streaming Parquet path accepts mixed-case encoding names via 
`Encoding::from_str_ci`.
+#[test]
+fn test_angle_parquet_encoding_case_insensitive() {
+    let Some(engine) = common::qdp_engine() else {
+        return;
+    };
+
+    let num_qubits = 2;
+    let data: Vec<f64> = vec![0.1, 0.2];
+    let path = "/tmp/test_angle_case.parquet";
+    common::write_fixed_size_list_parquet(path, &data, num_qubits);
+
+    let dlpack_ptr = engine
+        .encode_from_parquet(path, num_qubits, "Angle")
+        .expect("mixed-case 'Angle' should match streaming angle encoder");
+    let _ = std::fs::remove_file(path);
+
+    unsafe {
+        common::assert_dlpack_shape_2d_and_delete(dlpack_ptr, 1, (1 << 
num_qubits) as i64);
+    }
+}
+
 #[test]
 fn test_angle_successful_encoding_from_parquet() {
     let Some(engine) = common::qdp_engine() else {
diff --git a/qdp/qdp-core/tests/gpu_iqp_encoding.rs 
b/qdp/qdp-core/tests/gpu_iqp_encoding.rs
index f45ba3eac..4954ab5b3 100644
--- a/qdp/qdp-core/tests/gpu_iqp_encoding.rs
+++ b/qdp/qdp-core/tests/gpu_iqp_encoding.rs
@@ -797,7 +797,7 @@ fn test_iqp_fwt_zero_parameters_identity() {
 #[test]
 #[cfg(target_os = "linux")]
 fn test_iqp_encoder_via_factory() {
-    println!("Testing IQP encoder creation via get_encoder...");
+    println!("Testing IQP encoder creation via Encoding::from_str_ci / 
encode...");
 
     let Some(engine) = common::qdp_engine() else {
         println!("SKIP: No GPU available");
@@ -836,7 +836,7 @@ fn test_iqp_encoder_via_factory() {
 #[test]
 #[cfg(target_os = "linux")]
 fn test_iqp_z_encoder_via_factory() {
-    println!("Testing IQP-Z encoder creation via get_encoder...");
+    println!("Testing IQP-Z encoder creation via encode...");
 
     let Some(engine) = common::qdp_engine() else {
         println!("SKIP: No GPU available");
diff --git a/qdp/qdp-core/tests/gpu_validation.rs 
b/qdp/qdp-core/tests/gpu_validation.rs
index 3235b24fa..291f92dce 100644
--- a/qdp/qdp-core/tests/gpu_validation.rs
+++ b/qdp/qdp-core/tests/gpu_validation.rs
@@ -38,8 +38,8 @@ fn test_input_validation_invalid_strategy() {
     match result {
         Err(MahoutError::InvalidInput(msg)) => {
             assert!(
-                msg.contains("Unknown encoder"),
-                "Error message should mention unknown encoder"
+                msg.contains("Unknown encoding"),
+                "Error message should mention unknown encoding"
             );
             println!("PASS: Correctly rejected invalid strategy: {}", msg);
         }
diff --git a/qdp/qdp-core/tests/types.rs b/qdp/qdp-core/tests/types.rs
new file mode 100644
index 000000000..cf2c8b174
--- /dev/null
+++ b/qdp/qdp-core/tests/types.rs
@@ -0,0 +1,73 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Tests for [`qdp_core::Encoding`] and [`qdp_core::Dtype`].
+
+use qdp_core::{Dtype, Encoding};
+
+#[test]
+fn encoding_case_insensitive() {
+    assert_eq!(
+        Encoding::from_str_ci("Amplitude").unwrap(),
+        Encoding::Amplitude
+    );
+    assert_eq!(
+        Encoding::from_str_ci("AMPLITUDE").unwrap(),
+        Encoding::Amplitude
+    );
+    assert_eq!(Encoding::from_str_ci("iqp-z").unwrap(), Encoding::IqpZ);
+}
+
+#[test]
+fn encoding_unknown_returns_err() {
+    assert!(Encoding::from_str_ci("not_real").is_err());
+}
+
+#[test]
+fn vector_len_matches_encoder_contracts() {
+    let n = 5u32;
+    assert_eq!(Encoding::Amplitude.vector_len(n), 32); // 2^5
+    assert_eq!(Encoding::Angle.vector_len(n), 5); // n
+    assert_eq!(Encoding::IqpZ.vector_len(n), 5); // n (z-only)
+    assert_eq!(Encoding::Phase.vector_len(n), 5); // n (one angle per qubit)
+    assert_eq!(Encoding::Iqp.vector_len(n), 5 + 5 * 4 / 2); // n + n*(n-1)/2 = 
15
+    assert_eq!(Encoding::Basis.vector_len(n), 1);
+}
+
+#[test]
+fn static_encoder_same_instance_across_calls() {
+    assert!(
+        std::ptr::eq(Encoding::Amplitude.encoder(), 
Encoding::Amplitude.encoder(),),
+        "static dispatch must return the same 'static reference"
+    );
+}
+
+#[test]
+fn supports_f32_amplitude_only() {
+    assert!(Encoding::Amplitude.supports_f32());
+    assert!(!Encoding::Angle.supports_f32());
+    assert!(!Encoding::Basis.supports_f32());
+    assert!(!Encoding::Iqp.supports_f32());
+    assert!(!Encoding::IqpZ.supports_f32());
+    assert!(!Encoding::Phase.supports_f32());
+}
+
+#[test]
+fn dtype_from_str_ci() {
+    assert_eq!(Dtype::from_str_ci("f32").unwrap(), Dtype::Float32);
+    assert_eq!(Dtype::from_str_ci("Float64").unwrap(), Dtype::Float64);
+    assert!(Dtype::from_str_ci("bf16").is_err());
+}
diff --git a/qdp/qdp-python/README.md b/qdp/qdp-python/README.md
index cacfa0417..a81f95324 100644
--- a/qdp/qdp-python/README.md
+++ b/qdp/qdp-python/README.md
@@ -86,6 +86,16 @@ Backend support boundary:
     a follow-up.
 - AMD (`QdpEngine(..., backend="amd")`): `amplitude`, `angle`, `basis`, `iqp`, 
`iqp-z`, `phase`
 
+### Pipeline / loader dtype (Rust internals)
+
+`QuantumDataLoader` and `run_throughput_pipeline` build a Rust 
`PipelineConfig` with an
+`encoding` plus a `dtype` (float32 vs float64). The prefetch thread can only 
keep an
+end-to-end **float32 host batch** for encodings whose GPU stack implements the 
batch **f32**
+path (`encode_batch_f32`). **Today that is amplitude only.** Angle and basis 
still fall back
+to float64 for that loop until their batch f32 implementations exist. The 
eventual full
+matrix (e.g. angle/basis under `supports_f32` once kernels are wired) is 
broader than what
+the pipeline uses today.
+
 ## Input Sources
 
 ```python
diff --git a/qdp/qdp-python/qumat_qdp/api.py b/qdp/qdp-python/qumat_qdp/api.py
index 2ae4e45e6..6493dd0f3 100644
--- a/qdp/qdp-python/qumat_qdp/api.py
+++ b/qdp/qdp-python/qumat_qdp/api.py
@@ -161,7 +161,7 @@ class QdpBenchmark:
             encoding_method=self._encoding_method,
             warmup_batches=self._warmup_batches,
             seed=None,
-            float32_pipeline=True,
+            dtype="f32",
         )
         return ThroughputResult(
             duration_sec=duration_sec, vectors_per_sec=vectors_per_sec
@@ -177,7 +177,7 @@ class QdpBenchmark:
             encoding_method=self._encoding_method,
             warmup_batches=self._warmup_batches,
             seed=None,
-            float32_pipeline=True,
+            dtype="f32",
         )
         return LatencyResult(
             duration_sec=duration_sec,
diff --git a/qdp/qdp-python/qumat_qdp/loader.py 
b/qdp/qdp-python/qumat_qdp/loader.py
index 7873f2bf6..5a53f6c19 100644
--- a/qdp/qdp-python/qumat_qdp/loader.py
+++ b/qdp/qdp-python/qumat_qdp/loader.py
@@ -41,6 +41,11 @@ if TYPE_CHECKING:
 # Seed must fit Rust u64: 0 <= seed <= 2^64 - 1.
 _U64_MAX = 2**64 - 1
 
+# Canonical encoding names (must match Encoding enum in qdp-core/src/types.rs).
+_VALID_ENCODINGS: frozenset[str] = frozenset(
+    {"amplitude", "angle", "basis", "iqp", "iqp-z", "phase"}
+)
+
 # Fallback-supported file extensions (loadable without _qdp).
 _TORCH_FILE_EXTS = frozenset({".pt", ".pth"})
 _NUMPY_FILE_EXTS = frozenset({".npy"})
@@ -71,6 +76,11 @@ def _validate_loader_args(
         raise ValueError(
             f"encoding_method must be a non-empty string, got 
{encoding_method!r}"
         )
+    if encoding_method.lower() not in _VALID_ENCODINGS:
+        raise ValueError(
+            f"Unknown encoding_method {encoding_method!r}. "
+            f"Valid options: {sorted(_VALID_ENCODINGS)}"
+        )
     if seed is not None:
         if not isinstance(seed, int):
             raise ValueError(
@@ -172,6 +182,11 @@ class QuantumDataLoader:
             raise ValueError(
                 f"encoding_method must be a non-empty string, got {method!r}"
             )
+        if method.lower() not in _VALID_ENCODINGS:
+            raise ValueError(
+                f"Unknown encoding {method!r}. "
+                f"Valid options: {sorted(_VALID_ENCODINGS)}"
+            )
         self._encoding_method = method
         return self
 
diff --git a/qdp/qdp-python/src/engine.rs b/qdp/qdp-python/src/engine.rs
index 58c46babc..cfebfd86e 100644
--- a/qdp/qdp-python/src/engine.rs
+++ b/qdp/qdp-python/src/engine.rs
@@ -23,41 +23,17 @@ use crate::tensor::QuantumTensor;
 use numpy::{PyReadonlyArray1, PyReadonlyArray2, PyUntypedArrayMethods};
 use pyo3::exceptions::PyRuntimeError;
 use pyo3::prelude::*;
-use qdp_core::{Precision, QdpEngine as CoreEngine};
+use qdp_core::{Dtype, Encoding, QdpEngine as CoreEngine};
 
 #[cfg(target_os = "linux")]
 use crate::loader::{PyQuantumLoader, config_from_args, parse_null_handling, 
path_from_py};
 
-struct CudaEngineAdapter {
-    engine: CoreEngine,
-}
-
-impl CudaEngineAdapter {
-    fn new(device_id: usize, precision: Precision) -> PyResult<Self> {
-        let engine = CoreEngine::new_with_precision(device_id, 
precision).map_err(|e| {
-            PyRuntimeError::new_err(format!("Failed to initialize CUDA 
backend: {}", e))
-        })?;
-        Ok(Self { engine })
-    }
-
-    fn engine(&self) -> &CoreEngine {
-        &self.engine
-    }
-}
-
-/// PyO3 wrapper for the Rust/CUDA QdpEngine.
+/// PyO3 wrapper for QdpEngine
 ///
-/// The public Python facade routes AMD/Triton directly in `qumat_qdp.backend`.
-/// `_qdp.QdpEngine` stays focused on the Rust CUDA core and its tensor 
contract.
-
+/// Provides Python bindings for GPU-accelerated quantum state encoding.
 #[pyclass]
 pub struct QdpEngine {
-    engine: CudaEngineAdapter,
-    #[allow(dead_code)]
-    device_id: usize,
-    #[allow(dead_code)]
-    precision: Precision,
-    backend: String,
+    pub engine: CoreEngine,
 }
 
 #[pymethods]
@@ -74,44 +50,14 @@ impl QdpEngine {
     /// Raises:
     ///     RuntimeError: If CUDA device initialization fails
     #[new]
-    #[pyo3(signature = (device_id=0, precision="float32", backend="cuda"))]
-    fn new(device_id: usize, precision: &str, backend: &str) -> PyResult<Self> 
{
-        let precision = match precision.to_ascii_lowercase().as_str() {
-            "float32" | "f32" | "float" => Precision::Float32,
-            "float64" | "f64" | "double" => Precision::Float64,
-            other => {
-                return Err(PyRuntimeError::new_err(format!(
-                    "Unsupported precision '{}'. Use 'float32' (default) or 
'float64'.",
-                    other
-                )));
-            }
-        };
-
-        let backend_name = backend.to_ascii_lowercase();
-        let (engine, resolved_backend) = match backend_name.as_str() {
-            "cuda" => (
-                CudaEngineAdapter::new(device_id, precision)?,
-                "cuda".to_string(),
-            ),
-            "amd" | "triton_amd" => {
-                return Err(PyRuntimeError::new_err(
-                    "AMD/Triton routing is provided by the Python facade 
`qumat_qdp.QdpEngine`; `_qdp.QdpEngine` only supports the Rust CUDA backend.",
-                ));
-            }
-            other => {
-                return Err(PyRuntimeError::new_err(format!(
-                    "Unsupported backend '{}'. Use 'cuda'.",
-                    other
-                )));
-            }
-        };
+    #[pyo3(signature = (device_id=0, precision="float32"))]
+    fn new(device_id: usize, precision: &str) -> PyResult<Self> {
+        let precision =
+            Dtype::from_str_ci(precision).map_err(|e| 
PyRuntimeError::new_err(e.to_string()))?;
 
-        Ok(Self {
-            engine,
-            device_id,
-            precision,
-            backend: resolved_backend,
-        })
+        let engine = CoreEngine::new_with_precision(device_id, precision)
+            .map_err(|e| PyRuntimeError::new_err(format!("Failed to 
initialize: {}", e)))?;
+        Ok(Self { engine })
     }
 
     /// Encode classical data into quantum state (auto-detects input type)
@@ -125,11 +71,10 @@ impl QdpEngine {
     ///         - String path: .parquet, .arrow, .feather, .npy, .pt, .pth, 
.pb file
     ///         - pathlib.Path: Path object (converted via os.fspath())
     ///     num_qubits: Number of qubits for encoding
-    ///     encoding_method: Encoding strategy ("amplitude" default, "angle", 
"basis",
-    ///         "iqp", or "iqp-z"). CUDA tensor notes:
-    ///         - amplitude and angle accept float64 and float32
-    ///         - basis requires int64
-    ///         - iqp and iqp-z require float64
+    ///     encoding_method: Encoding strategy ("amplitude" default, "angle", 
or "basis")
+    ///         CUDA tensor note:
+    ///         - amplitude accepts float64 and float32
+    ///         - angle accepts float64 generally, plus float32 for 1D 
single-sample tensors
     ///
     /// Returns:
     ///     QuantumTensor: DLPack-compatible tensor for zero-copy PyTorch 
integration
@@ -152,94 +97,6 @@ impl QdpEngine {
         data: &Bound<'_, PyAny>,
         num_qubits: usize,
         encoding_method: &str,
-    ) -> PyResult<QuantumTensor> {
-        self.encode_with_core(data, num_qubits, encoding_method)
-    }
-
-    fn backend(&self) -> &str {
-        &self.backend
-    }
-
-    #[cfg(target_os = "linux")]
-    #[pyo3(signature = (total_batches, batch_size, num_qubits, 
encoding_method, seed=None, null_handling=None))]
-    fn create_synthetic_loader(
-        &self,
-        total_batches: usize,
-        batch_size: usize,
-        num_qubits: u32,
-        encoding_method: &str,
-        seed: Option<u64>,
-        null_handling: Option<&str>,
-    ) -> PyResult<PyQuantumLoader> {
-        self.create_synthetic_loader_impl(
-            total_batches,
-            batch_size,
-            num_qubits,
-            encoding_method,
-            seed,
-            null_handling,
-        )
-    }
-
-    #[cfg(target_os = "linux")]
-    #[allow(clippy::too_many_arguments)]
-    #[pyo3(signature = (path, batch_size, num_qubits, encoding_method, 
batch_limit=None, null_handling=None))]
-    fn create_file_loader(
-        &self,
-        py: Python<'_>,
-        path: &Bound<'_, PyAny>,
-        batch_size: usize,
-        num_qubits: u32,
-        encoding_method: &str,
-        batch_limit: Option<usize>,
-        null_handling: Option<&str>,
-    ) -> PyResult<PyQuantumLoader> {
-        self.create_file_loader_impl(
-            py,
-            path,
-            batch_size,
-            num_qubits,
-            encoding_method,
-            batch_limit,
-            null_handling,
-        )
-    }
-
-    #[cfg(target_os = "linux")]
-    #[allow(clippy::too_many_arguments)]
-    #[pyo3(signature = (path, batch_size, num_qubits, encoding_method, 
batch_limit=None, null_handling=None))]
-    fn create_streaming_file_loader(
-        &self,
-        py: Python<'_>,
-        path: &Bound<'_, PyAny>,
-        batch_size: usize,
-        num_qubits: u32,
-        encoding_method: &str,
-        batch_limit: Option<usize>,
-        null_handling: Option<&str>,
-    ) -> PyResult<PyQuantumLoader> {
-        self.create_streaming_file_loader_impl(
-            py,
-            path,
-            batch_size,
-            num_qubits,
-            encoding_method,
-            batch_limit,
-            null_handling,
-        )
-    }
-}
-
-impl QdpEngine {
-    fn core_engine(&self) -> PyResult<&CoreEngine> {
-        Ok(self.engine.engine())
-    }
-
-    fn encode_with_core(
-        &self,
-        data: &Bound<'_, PyAny>,
-        num_qubits: usize,
-        encoding_method: &str,
     ) -> PyResult<QuantumTensor> {
         // Check if it's a string path
         if let Ok(path) = data.extract::<String>() {
@@ -293,7 +150,7 @@ impl QdpEngine {
                     PyRuntimeError::new_err("NumPy array must be contiguous 
(C-order)")
                 })?;
                 let ptr = self
-                    .core_engine()?
+                    .engine
                     .encode(data_slice, num_qubits, encoding_method)
                     .map_err(|e| PyRuntimeError::new_err(format!("Encoding 
failed: {}", e)))?;
                 Ok(QuantumTensor {
@@ -315,7 +172,7 @@ impl QdpEngine {
                     PyRuntimeError::new_err("NumPy array must be contiguous 
(C-order)")
                 })?;
                 let ptr = self
-                    .core_engine()?
+                    .engine
                     .encode_batch(
                         data_slice,
                         num_samples,
@@ -345,7 +202,7 @@ impl QdpEngine {
             // Validate CUDA tensor for direct GPU encoding
             validate_cuda_tensor_for_encoding(
                 data,
-                self.core_engine()?.device().ordinal(),
+                self.engine.device().ordinal(),
                 encoding_method,
             )?;
 
@@ -374,7 +231,7 @@ impl QdpEngine {
                     // (held by Python's GIL), and we validated 
dtype/contiguity/device above.
                     // The DLPackTensorInfo RAII wrapper will call deleter 
when dropped.
                     let ptr = unsafe {
-                        self.core_engine()?
+                        self.engine
                             .encode_from_gpu_ptr(
                                 dlpack_info.data_ptr,
                                 input_len,
@@ -396,7 +253,7 @@ impl QdpEngine {
                     let sample_size = dlpack_info.shape[1] as usize;
                     // SAFETY: Same as above - pointer from validated DLPack 
tensor
                     let ptr = unsafe {
-                        self.core_engine()?
+                        self.engine
                             .encode_batch_from_gpu_ptr(
                                 dlpack_info.data_ptr,
                                 num_samples,
@@ -452,7 +309,7 @@ impl QdpEngine {
                     )
                 })?;
                 let ptr = self
-                    .core_engine()?
+                    .engine
                     .encode(data_slice, num_qubits, encoding_method)
                     .map_err(|e| PyRuntimeError::new_err(format!("Encoding 
failed: {}", e)))?;
                 Ok(QuantumTensor {
@@ -478,7 +335,7 @@ impl QdpEngine {
                     )
                 })?;
                 let ptr = self
-                    .core_engine()?
+                    .engine
                     .encode_batch(
                         data_slice,
                         num_samples,
@@ -509,7 +366,7 @@ impl QdpEngine {
             )
         })?;
         let ptr = self
-            .core_engine()?
+            .engine
             .encode(&vec_data, num_qubits, encoding_method)
             .map_err(|e| PyRuntimeError::new_err(format!("Encoding failed: 
{}", e)))?;
         Ok(QuantumTensor {
@@ -540,31 +397,31 @@ impl QdpEngine {
         };
 
         let ptr = if path.ends_with(".parquet") {
-            self.core_engine()?
+            self.engine
                 .encode_from_parquet(path, num_qubits, encoding_method)
                 .map_err(|e| {
                     PyRuntimeError::new_err(format!("Encoding from parquet 
failed: {}", e))
                 })?
         } else if path.ends_with(".arrow") || path.ends_with(".feather") {
-            self.core_engine()?
+            self.engine
                 .encode_from_arrow_ipc(path, num_qubits, encoding_method)
                 .map_err(|e| {
                     PyRuntimeError::new_err(format!("Encoding from Arrow IPC 
failed: {}", e))
                 })?
         } else if path.ends_with(".npy") {
-            self.core_engine()?
+            self.engine
                 .encode_from_numpy(path, num_qubits, encoding_method)
                 .map_err(|e| {
                     PyRuntimeError::new_err(format!("Encoding from NumPy 
failed: {}", e))
                 })?
         } else if path.ends_with(".pt") || path.ends_with(".pth") {
-            self.core_engine()?
+            self.engine
                 .encode_from_torch(path, num_qubits, encoding_method)
                 .map_err(|e| {
                     PyRuntimeError::new_err(format!("Encoding from PyTorch 
failed: {}", e))
                 })?
         } else if path.ends_with(".pb") {
-            self.core_engine()?
+            self.engine
                 .encode_from_tensorflow(path, num_qubits, encoding_method)
                 .map_err(|e| {
                     PyRuntimeError::new_err(format!("Encoding from TensorFlow 
failed: {}", e))
@@ -596,7 +453,6 @@ impl QdpEngine {
     ///     >>> engine = QdpEngine(device_id=0)
     ///     >>> batched = engine.encode_from_tensorflow("data.pb", 16, 
"amplitude")
     ///     >>> torch_tensor = torch.from_dlpack(batched)  # Shape: [200, 
65536]
-    #[allow(dead_code)]
     fn encode_from_tensorflow(
         &self,
         path: &str,
@@ -604,7 +460,7 @@ impl QdpEngine {
         encoding_method: &str,
     ) -> PyResult<QuantumTensor> {
         let ptr = self
-            .core_engine()?
+            .engine
             .encode_from_tensorflow(path, num_qubits, encoding_method)
             .map_err(|e| {
                 PyRuntimeError::new_err(format!("Encoding from TensorFlow 
failed: {}", e))
@@ -625,21 +481,19 @@ impl QdpEngine {
         num_qubits: usize,
         encoding_method: &str,
     ) -> PyResult<QuantumTensor> {
-        validate_cuda_tensor_for_encoding(
+        let encoding = validate_cuda_tensor_for_encoding(
             data,
-            self.core_engine()?.device().ordinal(),
+            self.engine.device().ordinal(),
             encoding_method,
         )?;
-
         let dtype = data.getattr("dtype")?;
         let dtype_str: String = dtype.str()?.extract()?;
-        let dtype_str_lower = dtype_str.to_ascii_lowercase();
-        let is_f32 = dtype_str_lower.contains("float32");
-        let method = encoding_method.to_ascii_lowercase();
+        let is_f32 = dtype_str.to_ascii_lowercase().contains("float32");
         let ndim: usize = data.call_method0("dim")?.extract()?;
         let tensor_info = extract_cuda_tensor_info(data)?;
 
-        if is_f32 && matches!(method.as_str(), "amplitude" | "angle") {
+        let f32_fast_path = is_f32 && matches!(encoding, Encoding::Amplitude | 
Encoding::Angle);
+        if f32_fast_path {
             match ndim {
                 1 => {
                     let input_len: usize = 
data.call_method0("numel")?.extract()?;
@@ -648,9 +502,9 @@ impl QdpEngine {
                     let data_ptr = data_ptr_u64 as *const f32;
 
                     let ptr = unsafe {
-                        match method.as_str() {
-                            "amplitude" => self
-                                .core_engine()?
+                        match encoding {
+                            Encoding::Amplitude => self
+                                .engine
                                 .encode_from_gpu_ptr_f32_with_stream(
                                     data_ptr, input_len, num_qubits, 
stream_ptr,
                                 )
@@ -660,8 +514,8 @@ impl QdpEngine {
                                         e
                                     ))
                                 })?,
-                            "angle" => self
-                                .core_engine()?
+                            Encoding::Angle => self
+                                .engine
                                 .encode_angle_from_gpu_ptr_f32_with_stream(
                                     data_ptr, input_len, num_qubits, 
stream_ptr,
                                 )
@@ -671,7 +525,7 @@ impl QdpEngine {
                                         e
                                     ))
                                 })?,
-                            _ => unreachable!("unreachable: unhandled f32 
encoding method"),
+                            _ => unreachable!("f32_fast_path guard allows only 
Amplitude or Angle"),
                         }
                     };
 
@@ -688,9 +542,9 @@ impl QdpEngine {
                     let data_ptr = data_ptr_u64 as *const f32;
 
                     let ptr = unsafe {
-                        match method.as_str() {
-                            "amplitude" => self
-                                .core_engine()?
+                        match encoding {
+                            Encoding::Amplitude => self
+                                .engine
                                 .encode_batch_from_gpu_ptr_f32_with_stream(
                                     data_ptr,
                                     num_samples,
@@ -704,8 +558,8 @@ impl QdpEngine {
                                         e
                                     ))
                                 })?,
-                            "angle" => self
-                                .core_engine()?
+                            Encoding::Angle => self
+                                .engine
                                 
.encode_angle_batch_from_gpu_ptr_f32_with_stream(
                                     data_ptr,
                                     num_samples,
@@ -719,7 +573,7 @@ impl QdpEngine {
                                         e
                                     ))
                                 })?,
-                            _ => unreachable!("unreachable: unhandled f32 
batch encoding method"),
+                            _ => unreachable!("f32_fast_path guard allows only 
Amplitude or Angle"),
                         }
                     };
 
@@ -741,7 +595,7 @@ impl QdpEngine {
                 1 => {
                     let input_len = tensor_info.shape[0] as usize;
                     let ptr = unsafe {
-                        self.core_engine()?
+                        self.engine
                             .encode_from_gpu_ptr_with_stream(
                                 tensor_info.data_ptr as *const 
std::ffi::c_void,
                                 input_len,
@@ -762,7 +616,7 @@ impl QdpEngine {
                     let num_samples = tensor_info.shape[0] as usize;
                     let sample_size = tensor_info.shape[1] as usize;
                     let ptr = unsafe {
-                        self.core_engine()?
+                        self.engine
                             .encode_batch_from_gpu_ptr_with_stream(
                                 tensor_info.data_ptr as *const 
std::ffi::c_void,
                                 num_samples,
@@ -791,7 +645,9 @@ impl QdpEngine {
 
     // --- Loader factory methods (Linux only) ---
     #[cfg(target_os = "linux")]
-    fn create_synthetic_loader_impl(
+    /// Create a synthetic-data pipeline iterator (for 
QuantumDataLoader.source_synthetic()).
+    #[pyo3(signature = (total_batches, batch_size, num_qubits, 
encoding_method, seed=None, null_handling=None))]
+    fn create_synthetic_loader(
         &self,
         total_batches: usize,
         batch_size: usize,
@@ -800,27 +656,28 @@ impl QdpEngine {
         seed: Option<u64>,
         null_handling: Option<&str>,
     ) -> PyResult<PyQuantumLoader> {
-        let engine = self.core_engine()?.clone();
         let nh = parse_null_handling(null_handling)?;
         let config = config_from_args(
-            &engine,
+            &self.engine,
             batch_size,
             num_qubits,
             encoding_method,
             total_batches,
             seed,
             nh,
-            true,
-        );
-        let iter = qdp_core::PipelineIterator::new_synthetic(engine, 
config).map_err(|e| {
-            PyRuntimeError::new_err(format!("create_synthetic_loader failed: 
{}", e))
-        })?;
+            Dtype::Float32,
+        )?;
+        let iter = 
qdp_core::PipelineIterator::new_synthetic(self.engine.clone(), config).map_err(
+            |e| PyRuntimeError::new_err(format!("create_synthetic_loader 
failed: {}", e)),
+        )?;
         Ok(PyQuantumLoader::new(Some(iter)))
     }
 
     #[cfg(target_os = "linux")]
+    /// Create a file-backed pipeline iterator (full read then batch; for 
QuantumDataLoader.source_file(path)).
     #[allow(clippy::too_many_arguments)]
-    fn create_file_loader_impl(
+    #[pyo3(signature = (path, batch_size, num_qubits, encoding_method, 
batch_limit=None, null_handling=None))]
+    fn create_file_loader(
         &self,
         py: Python<'_>,
         path: &Bound<'_, PyAny>,
@@ -832,18 +689,18 @@ impl QdpEngine {
     ) -> PyResult<PyQuantumLoader> {
         let path_str = path_from_py(path)?;
         let batch_limit = batch_limit.unwrap_or(usize::MAX);
-        let engine = self.core_engine()?.clone();
         let nh = parse_null_handling(null_handling)?;
         let config = config_from_args(
-            &engine,
+            &self.engine,
             batch_size,
             num_qubits,
             encoding_method,
             0,
             None,
             nh,
-            true, // float32_pipeline
-        );
+            Dtype::Float32,
+        )?;
+        let engine = self.engine.clone();
         // Resolve remote URLs before detaching from GIL. The _resolved guard 
keeps the
         // temp file alive until after the file is fully read inside py.detach.
         #[cfg(feature = "remote-io")]
@@ -866,8 +723,10 @@ impl QdpEngine {
     }
 
     #[cfg(target_os = "linux")]
+    /// Create a streaming Parquet pipeline iterator (for 
QuantumDataLoader.source_file(path, streaming=True)).
     #[allow(clippy::too_many_arguments)]
-    fn create_streaming_file_loader_impl(
+    #[pyo3(signature = (path, batch_size, num_qubits, encoding_method, 
batch_limit=None, null_handling=None))]
+    fn create_streaming_file_loader(
         &self,
         py: Python<'_>,
         path: &Bound<'_, PyAny>,
@@ -879,18 +738,18 @@ impl QdpEngine {
     ) -> PyResult<PyQuantumLoader> {
         let path_str = path_from_py(path)?;
         let batch_limit = batch_limit.unwrap_or(usize::MAX);
-        let engine = self.core_engine()?.clone();
         let nh = parse_null_handling(null_handling)?;
         let config = config_from_args(
-            &engine,
+            &self.engine,
             batch_size,
             num_qubits,
             encoding_method,
             0,
             None,
             nh,
-            true, // float32_pipeline
-        );
+            Dtype::Float32,
+        )?;
+        let engine = self.engine.clone();
         // Resolve remote URLs before detaching from GIL. The _resolved guard 
keeps the
         // temp file alive; the streaming reader's open fd preserves data 
after drop.
         #[cfg(feature = "remote-io")]
diff --git a/qdp/qdp-python/src/lib.rs b/qdp/qdp-python/src/lib.rs
index 5348c3f4a..04d772a90 100644
--- a/qdp/qdp-python/src/lib.rs
+++ b/qdp/qdp-python/src/lib.rs
@@ -31,7 +31,7 @@ use loader::PyQuantumLoader;
 
 #[cfg(target_os = "linux")]
 #[pyfunction]
-#[pyo3(signature = (device_id, num_qubits, batch_size, total_batches, 
encoding_method, warmup_batches=0, seed=None, float32_pipeline=false))]
+#[pyo3(signature = (device_id, num_qubits, batch_size, total_batches, 
encoding_method, warmup_batches=0, seed=None, dtype="f64"))]
 #[allow(clippy::too_many_arguments)]
 fn run_throughput_pipeline_py(
     py: Python<'_>,
@@ -42,18 +42,20 @@ fn run_throughput_pipeline_py(
     encoding_method: String,
     warmup_batches: usize,
     seed: Option<u64>,
-    float32_pipeline: bool,
+    dtype: &str,
 ) -> PyResult<(f64, f64, f64)> {
     let config = qdp_core::PipelineConfig {
         device_id,
         num_qubits,
         batch_size,
         total_batches,
-        encoding_method,
+        encoding: qdp_core::Encoding::from_str_ci(&encoding_method)
+            .map_err(|e| PyRuntimeError::new_err(format!("Invalid 
encoding_method: {e}")))?,
         seed,
         warmup_batches,
         null_handling: qdp_core::NullHandling::default(),
-        float32_pipeline,
+        dtype: qdp_core::Dtype::from_str_ci(dtype)
+            .map_err(|e| PyRuntimeError::new_err(format!("Invalid dtype: 
{e}")))?,
         prefetch_depth: 16,
     };
     let result = py
diff --git a/qdp/qdp-python/src/loader.rs b/qdp/qdp-python/src/loader.rs
index 7ad7632cb..a43f94794 100644
--- a/qdp/qdp-python/src/loader.rs
+++ b/qdp/qdp-python/src/loader.rs
@@ -21,7 +21,7 @@ mod loader_impl {
     use pyo3::exceptions::PyRuntimeError;
     use pyo3::prelude::*;
     use qdp_core::reader::NullHandling;
-    use qdp_core::{PipelineConfig, PipelineIterator, QdpEngine as CoreEngine};
+    use qdp_core::{Dtype, Encoding, PipelineConfig, PipelineIterator, 
QdpEngine as CoreEngine};
 
     /// Rust-backed iterator yielding one QuantumTensor per batch; used by 
QuantumDataLoader.
     #[pyclass]
@@ -93,20 +93,22 @@ mod loader_impl {
         total_batches: usize,
         seed: Option<u64>,
         null_handling: NullHandling,
-        float32_pipeline: bool,
-    ) -> PipelineConfig {
-        PipelineConfig {
+        dtype: Dtype,
+    ) -> PyResult<PipelineConfig> {
+        let encoding = Encoding::from_str_ci(encoding_method)
+            .map_err(|e| PyRuntimeError::new_err(format!("Invalid encoding: 
{e}")))?;
+        Ok(PipelineConfig {
             device_id: 0,
             num_qubits,
             batch_size,
             total_batches,
-            encoding_method: encoding_method.to_string(),
+            encoding,
             seed,
             warmup_batches: 0,
             null_handling,
-            float32_pipeline,
+            dtype,
             prefetch_depth: 16,
-        }
+        })
     }
 
     /// Resolve path from Python str or pathlib.Path (__fspath__).
diff --git a/qdp/qdp-python/src/pytorch.rs b/qdp/qdp-python/src/pytorch.rs
index af014c3f3..e3bb0ca22 100644
--- a/qdp/qdp-python/src/pytorch.rs
+++ b/qdp/qdp-python/src/pytorch.rs
@@ -18,7 +18,8 @@ use pyo3::exceptions::PyRuntimeError;
 use pyo3::prelude::*;
 use std::ffi::c_void;
 
-use crate::constants::{CUDA_ENCODING_METHODS, 
format_supported_cuda_encoding_methods};
+use crate::constants::format_supported_cuda_encoding_methods;
+use qdp_core::Encoding;
 
 /// Helper to detect PyTorch tensor
 pub fn is_pytorch_tensor(obj: &Bound<'_, PyAny>) -> PyResult<bool> {
@@ -143,16 +144,21 @@ pub fn get_torch_cuda_stream_ptr(tensor: &Bound<'_, 
PyAny>) -> PyResult<*mut c_v
     })
 }
 
-/// Validate a CUDA tensor for direct GPU encoding
-/// Checks: dtype matches encoding method, contiguous, non-empty, device_id 
matches engine
+/// Validate a CUDA tensor for direct GPU encoding and return the parsed 
`Encoding`.
+///
+/// Checks dtype compatibility, contiguity, non-empty, and device match.
+/// Returns the parsed `Encoding` so the caller avoids re-parsing the same 
string.
 pub fn validate_cuda_tensor_for_encoding(
     tensor: &Bound<'_, PyAny>,
     expected_device_id: usize,
     encoding_method: &str,
-) -> PyResult<()> {
-    let method = encoding_method.to_ascii_lowercase();
+) -> PyResult<Encoding> {
+    let encoding = Encoding::from_str_ci(encoding_method)
+        .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
 
-    if !CUDA_ENCODING_METHODS.contains(&method.as_str()) {
+    // Phase encoding has no zero-copy CUDA tensor kernel yet; the user-facing
+    // error below tells callers to fall back to a CPU tensor.
+    if matches!(encoding, Encoding::Phase) {
         return Err(PyRuntimeError::new_err(format!(
             "CUDA tensor encoding currently only supports {} methods, got 
'{}'. \
              Use tensor.cpu() to convert to CPU tensor for other encoding 
methods.",
@@ -161,30 +167,31 @@ pub fn validate_cuda_tensor_for_encoding(
         )));
     }
 
-    // Check encoding method support and dtype (ASCII lowercase for 
case-insensitive match).
     let dtype = tensor.getattr("dtype")?;
     let dtype_str: String = dtype.str()?.extract()?;
     let dtype_str_lower = dtype_str.to_ascii_lowercase();
-    match method.as_str() {
-        "amplitude" | "angle" => {
+    match encoding {
+        Encoding::Amplitude | Encoding::Angle => {
             if !(dtype_str_lower.contains("float64") || 
dtype_str_lower.contains("float32")) {
                 return Err(PyRuntimeError::new_err(format!(
                     "CUDA tensor must have dtype float64 or float32 for {} 
encoding, got {}. \
                      Use tensor.to(torch.float64) or tensor.to(torch.float32)",
-                    method, dtype_str
+                    encoding.as_str(),
+                    dtype_str
                 )));
             }
         }
-        "iqp" | "iqp-z" => {
+        Encoding::Iqp | Encoding::IqpZ => {
             if !dtype_str_lower.contains("float64") {
                 return Err(PyRuntimeError::new_err(format!(
                     "CUDA tensor must have dtype float64 for {} encoding, got 
{}. \
                      Use tensor.to(torch.float64)",
-                    method, dtype_str
+                    encoding.as_str(),
+                    dtype_str
                 )));
             }
         }
-        "basis" => {
+        Encoding::Basis => {
             if !dtype_str_lower.contains("int64") {
                 return Err(PyRuntimeError::new_err(format!(
                     "CUDA tensor must have dtype int64 for basis encoding, got 
{}. \
@@ -193,12 +200,7 @@ pub fn validate_cuda_tensor_for_encoding(
                 )));
             }
         }
-        _ => {
-            return Err(PyRuntimeError::new_err(format!(
-                "Internal error: missing CUDA validation branch for supported 
method '{}'",
-                method
-            )));
-        }
+        Encoding::Phase => unreachable!("Phase filtered above"),
     }
 
     // Check contiguous
@@ -225,7 +227,7 @@ pub fn validate_cuda_tensor_for_encoding(
         )));
     }
 
-    Ok(())
+    Ok(encoding)
 }
 
 /// Minimal CUDA tensor metadata extracted via PyTorch APIs.
diff --git a/testing/qdp/test_bindings.py b/testing/qdp/test_bindings.py
index 66d1f26a5..b6a2b60f5 100644
--- a/testing/qdp/test_bindings.py
+++ b/testing/qdp/test_bindings.py
@@ -398,9 +398,7 @@ def test_encode_cuda_tensor_angle_float16_rejected():
     engine = QdpEngine(0)
     data = torch.tensor([0.0, torch.pi / 2], dtype=torch.float16, 
device="cuda:0")
 
-    with pytest.raises(
-        RuntimeError, match="float64 for angle encoding|supports only 1D"
-    ):
+    with pytest.raises(RuntimeError, match="float64 or float32 for angle 
encoding"):
         engine.encode(data, 2, "angle")
 
 
@@ -537,7 +535,7 @@ def test_encode_cuda_tensor_invalid_encoding_method():
 
     with pytest.raises(
         RuntimeError,
-        match="only supports .*amplitude.*angle.*basis.*iqp.*iqp-z.*Use 
tensor.cpu",
+        match="Unknown encoding: unknown-encoding",
     ):
         engine.encode(data, 2, "unknown-encoding")
 
diff --git a/testing/qdp_python/test_dlpack_validation.py 
b/testing/qdp_python/test_dlpack_validation.py
index 5ac462d5f..b093ee1cb 100644
--- a/testing/qdp_python/test_dlpack_validation.py
+++ b/testing/qdp_python/test_dlpack_validation.py
@@ -183,9 +183,7 @@ def test_cuda_float16_angle_rejected() -> None:
     engine = _engine()
     t = torch.tensor([0.0, torch.pi / 2], dtype=torch.float16, device="cuda")
 
-    with pytest.raises(
-        RuntimeError, match="float64 for angle encoding|supports only 1D"
-    ):
+    with pytest.raises(RuntimeError, match="float64 or float32 for angle 
encoding"):
         engine.encode(t, num_qubits=2, encoding_method="angle")

(mahout) branch main updated: feat(qdp): Encoding + Dtype enums, static encoder dispatch (#1276)

Reply via email to