Matthew Franglen created ARROW-6901: ---------------------------------------
Summary: [Rust][Parquet] Rust Parquet SerializedFileWriter writes total_num_rows as zero Key: ARROW-6901 URL: https://issues.apache.org/jira/browse/ARROW-6901 Project: Apache Arrow Issue Type: Bug Components: Rust Affects Versions: 0.15.0, 0.14.1 Reporter: Matthew Franglen The SerializedFileWriter does not update total_num_rows at any point. This results in consistently writing zero as the number of rows in the file. This code will fail: {code:java} let data = vec![vec![1, 2, 3, 4, 5]]; let file = ...; // a file path herelet schema = Rc::new( types::Type::group_type_builder("schema") .with_fields(&mut vec![Rc::new( types::Type::primitive_type_builder("col1", Type::INT32) .with_repetition(Repetition::REQUIRED) .build() .unwrap(), )]) .build() .unwrap(), ); let props = Rc::new(WriterProperties::builder().build()); let mut file_writer = SerializedFileWriter::new(file.try_clone().unwrap(), schema, props).unwrap(); let mut rows: i64 = 0;for subset in &data { let mut row_group_writer = file_writer.next_row_group().unwrap(); let col_writer = row_group_writer.next_column().unwrap(); if let Some(mut writer) = col_writer { match writer { ColumnWriter::Int32ColumnWriter(ref mut typed) => { rows += typed.write_batch(&subset[..], None, None).unwrap() as i64; } _ => { unimplemented!(); } } row_group_writer.close_column(writer).unwrap(); } file_writer.close_row_group(row_group_writer).unwrap(); }file_writer.close().unwrap();let reader = SerializedFileReader::new(file).unwrap(); assert_eq!(reader.num_row_groups(), data.len()); assert_eq!(reader.metadata().file_metadata().num_rows(), rows, "row count in metadata not equal to number of rows written"); {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)