(mahout) branch dev-qdp updated: [QDP] improve memory initialization (#668)

guanmingchiu Sun, 30 Nov 2025 09:20:03 -0800

This is an automated email from the ASF dual-hosted git repository.

guanmingchiu pushed a commit to branch dev-qdp
in repository https://gitbox.apache.org/repos/asf/mahout.git



The following commit(s) were added to refs/heads/dev-qdp by this push:
     new 957f56990 [QDP] improve memory initialization (#668)
957f56990 is described below

commit 957f569903d5ea628b074ffea7be5fe4f36e4848
Author: KUAN-HAO HUANG <[email protected]>
AuthorDate: Mon Dec 1 01:19:47 2025 +0800

    [QDP] improve memory initialization (#668)
---
 qdp/qdp-core/src/gpu/memory.rs | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/qdp/qdp-core/src/gpu/memory.rs b/qdp/qdp-core/src/gpu/memory.rs
index 38bb90e27..49f26602a 100644
--- a/qdp/qdp-core/src/gpu/memory.rs
+++ b/qdp/qdp-core/src/gpu/memory.rs
@@ -57,21 +57,17 @@ impl GpuStateVector {
     pub fn new(_device: &Arc<CudaDevice>, qubits: usize) -> Result<Self> {
         let _size_elements = 1 << qubits;
 
-        // Use device-side allocation (critical for performance):
-        // - No CPU RAM usage (avoids OOM for large states)
-        // - Fast: microseconds vs seconds for 30 qubits (16GB)
-        // TODO: Use uninitialized alloc() when kernel fully implements padding
         #[cfg(target_os = "linux")]
         {
-            // Allocate GPU memory (zero-initialized)
-            let zeros = vec![CuDoubleComplex { x: 0.0, y: 0.0 }; 
_size_elements];
-            let slice = _device.htod_sync_copy(&zeros)
-                .map_err(|e| MahoutError::MemoryAllocation(
-                    format!("Failed to allocate {} bytes of GPU memory 
(qubits={}): {:?}",
-                            _size_elements * 
std::mem::size_of::<CuDoubleComplex>(),
-                            qubits,
-                            e)
-                ))?;
+            // Use uninitialized allocation to avoid memory bandwidth waste.
+            let slice = unsafe {
+                _device.alloc::<CuDoubleComplex>(_size_elements)
+            }.map_err(|e| MahoutError::MemoryAllocation(
+                format!("Failed to allocate {} bytes of GPU memory 
(qubits={}): {:?}",
+                        _size_elements * 
std::mem::size_of::<CuDoubleComplex>(),
+                        qubits,
+                        e)
+            ))?;
 
             Ok(Self {
                 buffer: Arc::new(GpuBufferRaw { slice }),

(mahout) branch dev-qdp updated: [QDP] improve memory initialization (#668)

Reply via email to