viiccwen commented on code in PR #1101:
URL: https://github.com/apache/mahout/pull/1101#discussion_r2867107563


##########
qdp/qdp-core/src/io.rs:
##########
@@ -33,38 +33,36 @@ use 
parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
 use parquet::file::properties::WriterProperties;
 
 use crate::error::{MahoutError, Result};
+use crate::reader::{NullHandling, handle_float64_nulls};
 
 /// Converts an Arrow Float64Array to Vec<f64>.
-pub fn arrow_to_vec(array: &Float64Array) -> Vec<f64> {
-    if array.null_count() == 0 {
-        array.values().to_vec()
-    } else {
-        array.iter().map(|opt| opt.unwrap_or(0.0)).collect()
-    }
+pub fn arrow_to_vec(array: &Float64Array, null_handling: NullHandling) -> 
Result<Vec<f64>> {
+    let mut result = Vec::with_capacity(array.len());
+    handle_float64_nulls(&mut result, array, null_handling)?;
+    Ok(result)
 }
 
 /// Flattens multiple Arrow Float64Arrays into a single Vec<f64>.
-pub fn arrow_to_vec_chunked(arrays: &[Float64Array]) -> Vec<f64> {
+pub fn arrow_to_vec_chunked(
+    arrays: &[Float64Array],
+    null_handling: NullHandling,
+) -> Result<Vec<f64>> {
     let total_len: usize = arrays.iter().map(|a| a.len()).sum();
     let mut result = Vec::with_capacity(total_len);
 
     for array in arrays {
-        if array.null_count() == 0 {
-            result.extend_from_slice(array.values());
-        } else {
-            result.extend(array.iter().map(|opt| opt.unwrap_or(0.0)));
-        }
+        handle_float64_nulls(&mut result, array, null_handling)?;
     }
 
-    result
+    Ok(result)
 }
 
 /// Reads Float64 data from a Parquet file.
 ///
 /// Expects a single Float64 column. For zero-copy access, use 
[`read_parquet_to_arrow`].
 pub fn read_parquet<P: AsRef<Path>>(path: P) -> Result<Vec<f64>> {

Review Comment:
   currently we specify `read_*` ( `read_parquet(...)` ) to use 
`NullHandling::FillZero`, should we adopt same strategy like `*Readers` 
(`ParquetReader`)?
   
   like this?
   ```rust
   pub fn read_parquet<P: AsRef<Path>>(path: P) -> Result<Vec<f64>>;
   // to
   pub fn read_parquet<P: AsRef<Path>>(
       path: P,
       null_handling: Option<NullHandling>,
   ) -> Result<Vec<f64>>;
   ```



##########
qdp/qdp-core/src/reader.rs:
##########
@@ -45,8 +45,48 @@
 //! }
 //! ```
 
+use arrow::array::{Array, Float64Array};

Review Comment:
   `Array` is not used in this file. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to