jmfiaschi commented on a change in pull request #1468: URL: https://github.com/apache/arrow-rs/pull/1468#discussion_r835581814
########## File path: parquet/src/arrow/arrow_writer.rs ########## @@ -772,6 +813,74 @@ mod tests { } } + #[test] + fn arrow_writer_append_data_to_existing_file() { + let schema = Arc::new(Schema::new(vec![ + Field::new("a", DataType::Int32, false), + Field::new("b", DataType::Int64, true), + ])); + + let a = Int32Array::from(vec![1]); + let b = Int64Array::from(vec![Some(1)]); + + let batch = + RecordBatch::try_new(schema.clone(), vec![Arc::new(a), Arc::new(b)]).unwrap(); + let output_cursor = InMemoryWriteableCursor::default(); + + { + let mut writer = + ArrowWriter::try_new(output_cursor.clone(), schema.clone(), None) + .unwrap(); + writer.write(&batch).unwrap(); + writer.close().unwrap(); + } + + // Append new data to the chunk cursor + let chunk_cursor = SliceableCursor::new(output_cursor.into_inner().unwrap()); + + let a = Int32Array::from(vec![2]); + let b = Int64Array::from(vec![None]); + + let batch = + RecordBatch::try_new(schema.clone(), vec![Arc::new(a), Arc::new(b)]).unwrap(); + + let output_cursor = InMemoryWriteableCursor::default(); Review comment: same explication than before, one cursor contain the reader, the slice cursor, and the other to write the data: reader (file data with footer) => | header | data | footer | writer => | header | data | The next write() will append the data in the writer writer => | header | data | next_row_group | ... etc close() => | header | data | row_groups | footer | we don't touch the previous data -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org