This is an automated email from the ASF dual-hosted git repository.
guanmingchiu pushed a commit to branch dev-qdp
in repository https://gitbox.apache.org/repos/asf/mahout.git
The following commit(s) were added to refs/heads/dev-qdp by this push:
new 957f56990 [QDP] improve memory initialization (#668)
957f56990 is described below
commit 957f569903d5ea628b074ffea7be5fe4f36e4848
Author: KUAN-HAO HUANG <[email protected]>
AuthorDate: Mon Dec 1 01:19:47 2025 +0800
[QDP] improve memory initialization (#668)
---
qdp/qdp-core/src/gpu/memory.rs | 22 +++++++++-------------
1 file changed, 9 insertions(+), 13 deletions(-)
diff --git a/qdp/qdp-core/src/gpu/memory.rs b/qdp/qdp-core/src/gpu/memory.rs
index 38bb90e27..49f26602a 100644
--- a/qdp/qdp-core/src/gpu/memory.rs
+++ b/qdp/qdp-core/src/gpu/memory.rs
@@ -57,21 +57,17 @@ impl GpuStateVector {
pub fn new(_device: &Arc<CudaDevice>, qubits: usize) -> Result<Self> {
let _size_elements = 1 << qubits;
- // Use device-side allocation (critical for performance):
- // - No CPU RAM usage (avoids OOM for large states)
- // - Fast: microseconds vs seconds for 30 qubits (16GB)
- // TODO: Use uninitialized alloc() when kernel fully implements padding
#[cfg(target_os = "linux")]
{
- // Allocate GPU memory (zero-initialized)
- let zeros = vec![CuDoubleComplex { x: 0.0, y: 0.0 };
_size_elements];
- let slice = _device.htod_sync_copy(&zeros)
- .map_err(|e| MahoutError::MemoryAllocation(
- format!("Failed to allocate {} bytes of GPU memory
(qubits={}): {:?}",
- _size_elements *
std::mem::size_of::<CuDoubleComplex>(),
- qubits,
- e)
- ))?;
+ // Use uninitialized allocation to avoid memory bandwidth waste.
+ let slice = unsafe {
+ _device.alloc::<CuDoubleComplex>(_size_elements)
+ }.map_err(|e| MahoutError::MemoryAllocation(
+ format!("Failed to allocate {} bytes of GPU memory
(qubits={}): {:?}",
+ _size_elements *
std::mem::size_of::<CuDoubleComplex>(),
+ qubits,
+ e)
+ ))?;
Ok(Self {
buffer: Arc::new(GpuBufferRaw { slice }),