Re: [PR] [QDP] Refactor `encode()` method into helper functions with tests [mahout]

via GitHub Thu, 22 Jan 2026 22:47:23 -0800


rich7420 commented on code in PR #814:
URL: https://github.com/apache/mahout/pull/814#discussion_r2719829745



##########
qdp/qdp-python/src/lib.rs:
##########
@@ -358,237 +424,264 @@ impl QdpEngine {
 
         // Check if it's a NumPy array
         if data.hasattr("__array_interface__")? {
-            // Get the array's ndim for shape validation
-            let ndim: usize = data.getattr("ndim")?.extract()?;
-
-            match ndim {
-                1 => {
-                    // 1D array: single sample encoding (zero-copy if already 
contiguous)
-                    let array_1d = 
data.extract::<PyReadonlyArray1<f64>>().map_err(|_| {
-                        PyRuntimeError::new_err(
-                            "Failed to extract 1D NumPy array. Ensure dtype is 
float64.",
-                        )
-                    })?;
-                    let data_slice = array_1d.as_slice().map_err(|_| {
-                        PyRuntimeError::new_err("NumPy array must be 
contiguous (C-order)")
-                    })?;
-                    let ptr = self
-                        .engine
-                        .encode(data_slice, num_qubits, encoding_method)
-                        .map_err(|e| PyRuntimeError::new_err(format!("Encoding 
failed: {}", e)))?;
-                    return Ok(QuantumTensor {
-                        ptr,
-                        consumed: false,
-                    });
-                }
-                2 => {
-                    // 2D array: batch encoding (zero-copy if already 
contiguous)
-                    let array_2d = 
data.extract::<PyReadonlyArray2<f64>>().map_err(|_| {
-                        PyRuntimeError::new_err(
-                            "Failed to extract 2D NumPy array. Ensure dtype is 
float64.",
-                        )
-                    })?;
-                    let shape = array_2d.shape();
-                    let num_samples = shape[0];
-                    let sample_size = shape[1];
-                    let data_slice = array_2d.as_slice().map_err(|_| {
-                        PyRuntimeError::new_err("NumPy array must be 
contiguous (C-order)")
-                    })?;
-                    let ptr = self
-                        .engine
-                        .encode_batch(
-                            data_slice,
-                            num_samples,
-                            sample_size,
-                            num_qubits,
-                            encoding_method,
-                        )
-                        .map_err(|e| PyRuntimeError::new_err(format!("Encoding 
failed: {}", e)))?;
-                    return Ok(QuantumTensor {
-                        ptr,
-                        consumed: false,
-                    });
-                }
-                _ => {
-                    return Err(PyRuntimeError::new_err(format!(
-                        "Unsupported array shape: {}D. Expected 1D array for 
single sample \
-                         encoding or 2D array (batch_size, features) for batch 
encoding.",
-                        ndim
-                    )));
-                }
-            }
+            return self.encode_from_numpy(data, num_qubits, encoding_method);
         }
 
         // Check if it's a PyTorch tensor
         if is_pytorch_tensor(data)? {
-            // Check if it's a CUDA tensor - use zero-copy GPU encoding
-            if is_cuda_tensor(data)? {
-                // Validate CUDA tensor for direct GPU encoding
-                validate_cuda_tensor_for_encoding(
-                    data,
-                    self.engine.device().ordinal(),
-                    encoding_method,
-                )?;
-
-                // Extract GPU pointer directly from PyTorch tensor
-                let tensor_info = extract_cuda_tensor_info(data)?;
-
-                let ndim: usize = data.call_method0("dim")?.extract()?;
-
-                match ndim {
-                    1 => {
-                        // 1D CUDA tensor: single sample encoding
-                        let input_len = tensor_info.shape[0] as usize;
-                        // SAFETY: tensor_info.data_ptr was obtained via 
PyTorch's data_ptr() from a
-                        // valid CUDA tensor. The tensor remains alive during 
this call
-                        // (held by Python's GIL), and we validated 
dtype/contiguity/device above.
-                        let ptr = unsafe {
-                            self.engine
-                                .encode_from_gpu_ptr(
-                                    tensor_info.data_ptr,
-                                    input_len,
-                                    num_qubits,
-                                    encoding_method,
-                                )
-                                .map_err(|e| {
-                                    PyRuntimeError::new_err(format!("Encoding 
failed: {}", e))
-                                })?
-                        };
-                        return Ok(QuantumTensor {
-                            ptr,
-                            consumed: false,
-                        });
-                    }
-                    2 => {
-                        // 2D CUDA tensor: batch encoding
-                        let num_samples = tensor_info.shape[0] as usize;
-                        let sample_size = tensor_info.shape[1] as usize;
-                        // SAFETY: Same as above - pointer from validated 
PyTorch CUDA tensor
-                        let ptr = unsafe {
-                            self.engine
-                                .encode_batch_from_gpu_ptr(
-                                    tensor_info.data_ptr,
-                                    num_samples,
-                                    sample_size,
-                                    num_qubits,
-                                    encoding_method,
-                                )
-                                .map_err(|e| {
-                                    PyRuntimeError::new_err(format!("Encoding 
failed: {}", e))
-                                })?
-                        };
-                        return Ok(QuantumTensor {
-                            ptr,
-                            consumed: false,
-                        });
-                    }
-                    _ => {
-                        return Err(PyRuntimeError::new_err(format!(
-                            "Unsupported CUDA tensor shape: {}D. Expected 1D 
tensor for single \
-                             sample encoding or 2D tensor (batch_size, 
features) for batch encoding.",
-                            ndim
-                        )));
-                    }
-                }
-            }
+            return self.encode_from_pytorch(data, num_qubits, encoding_method);
+        }
 
-            // CPU tensor path (existing code)
-            validate_tensor(data)?;
-            // PERF: Avoid Tensor -> Python list -> Vec deep copies.
-            //
-            // For CPU tensors, `tensor.detach().numpy()` returns a NumPy view 
that shares the same
-            // underlying memory (zero-copy) when the tensor is C-contiguous. 
We can then borrow a
-            // `&[f64]` directly via pyo3-numpy.
-            let ndim: usize = data.call_method0("dim")?.extract()?;
-            let numpy_view = data
-                .call_method0("detach")?
-                .call_method0("numpy")
-                .map_err(|_| {
+        // Fallback: try to extract as Vec<f64> (Python list)
+        self.encode_from_list(data, num_qubits, encoding_method)
+    }
+
+    /// Encode from NumPy array (1D or 2D)
+    fn encode_from_numpy(

Review Comment:
   The shape validation logic is duplicated between `encode_from_numpy()` and 
`encode_from_pytorch()`. Maybe consider about extracting to a helper:
   
   ```rust
   fn validate_shape(ndim: usize, context: &str) -> PyResult<()> {
       match ndim {
           1 | 2 => Ok(()),
           _ => Err(PyRuntimeError::new_err(format!(
               "Unsupported {} shape: {}D. Expected 1D or 2D.",
               context, ndim
           ))),
       }
   }
   ```



##########
qdp/qdp-python/src/lib.rs:
##########
@@ -238,37 +238,103 @@ fn validate_cuda_tensor_for_encoding(
     Ok(())
 }
 
-/// CUDA tensor information extracted directly from PyTorch tensor
-struct CudaTensorInfo {
+/// DLPack tensor information extracted from a PyCapsule
+///
+/// This struct owns the DLManagedTensor pointer and ensures proper cleanup
+/// via the DLPack deleter when dropped (RAII pattern).
+struct DLPackTensorInfo {
+    /// Raw DLManagedTensor pointer from PyTorch DLPack capsule
+    /// This is owned by this struct and will be freed via deleter on drop
+    managed_ptr: *mut DLManagedTensor,
+    /// Data pointer inside dl_tensor (GPU memory, owned by managed_ptr)
     data_ptr: *const f64,
     shape: Vec<i64>,
+    /// CUDA device ID from DLPack metadata.
+    /// Currently unused but kept for potential future device validation or 
multi-GPU support.
+    #[allow(dead_code)]
+    device_id: i32,

Review Comment:
   Since this is a dead code but may be used in the future.
   I think maybe we could add a defensive assertion to verify DLPack device_id 
matches the validated device_id.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] [QDP] Refactor `encode()` method into helper functions with tests [mahout]

Reply via email to