This is an automated email from the ASF dual-hosted git repository.
guanmingchiu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/mahout.git
The following commit(s) were added to refs/heads/main by this push:
new a5428c0c3 [QDP] add document hardcoded NullHandling::FillZero in
stream_encode (#1120)
a5428c0c3 is described below
commit a5428c0c3e62262805f146d67e5fc2a42cfc6cc5
Author: ChenChen Lai <[email protected]>
AuthorDate: Thu Mar 5 16:05:01 2026 +0800
[QDP] add document hardcoded NullHandling::FillZero in stream_encode (#1120)
---
qdp/qdp-core/src/encoding/mod.rs | 8 ++++++++
qdp/qdp-core/src/lib.rs | 7 +++++++
2 files changed, 15 insertions(+)
diff --git a/qdp/qdp-core/src/encoding/mod.rs b/qdp/qdp-core/src/encoding/mod.rs
index 851ed6502..d795ca4a7 100644
--- a/qdp/qdp-core/src/encoding/mod.rs
+++ b/qdp/qdp-core/src/encoding/mod.rs
@@ -134,6 +134,14 @@ pub(crate) trait ChunkEncoder {
///
/// This function handles all the common IO, buffering, and GPU memory
/// management logic. The actual encoding is delegated to the `ChunkEncoder`.
+///
+/// # Null handling
+///
+/// The streaming Parquet path always uses
[`crate::reader::NullHandling::FillZero`]
+/// when constructing the [`crate::io::ParquetBlockReader`]. This replaces any
+/// null values in the input with `0.0`, matching Mahout's historical behavior
+/// and keeping the API backward compatible. Callers that require stricter
+/// validation should ensure the input data contains no nulls.
pub(crate) fn stream_encode<E: ChunkEncoder>(
engine: &QdpEngine,
path: &str,
diff --git a/qdp/qdp-core/src/lib.rs b/qdp/qdp-core/src/lib.rs
index c8146003b..a58075aef 100644
--- a/qdp/qdp-core/src/lib.rs
+++ b/qdp/qdp-core/src/lib.rs
@@ -261,6 +261,13 @@ impl QdpEngine {
/// * `num_qubits` - Number of qubits
/// * `encoding_method` - Strategy: "amplitude", "angle", or "basis"
///
+ /// # Null handling
+ ///
+ /// When reading from Parquet, the streaming encoder always uses
+ /// [`NullHandling::FillZero`] for the underlying `ParquetBlockReader`.
This
+ /// replaces any null values with `0.0`, matching the behavior of the batch
+ /// readers and preserving backward compatibility.
+ ///
/// # Returns
/// DLPack pointer to encoded states [num_samples, 2^num_qubits]
pub fn encode_from_parquet(