jmfiaschi commented on a change in pull request #1468:
URL: https://github.com/apache/arrow-rs/pull/1468#discussion_r835581814



##########
File path: parquet/src/arrow/arrow_writer.rs
##########
@@ -772,6 +813,74 @@ mod tests {
         }
     }
 
+    #[test]
+    fn arrow_writer_append_data_to_existing_file() {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Int64, true),
+        ]));
+
+        let a = Int32Array::from(vec![1]);
+        let b = Int64Array::from(vec![Some(1)]);
+
+        let batch =
+            RecordBatch::try_new(schema.clone(), vec![Arc::new(a), 
Arc::new(b)]).unwrap();
+        let output_cursor = InMemoryWriteableCursor::default();
+
+        {
+            let mut writer =
+                ArrowWriter::try_new(output_cursor.clone(), schema.clone(), 
None)
+                    .unwrap();
+            writer.write(&batch).unwrap();
+            writer.close().unwrap();
+        }
+
+        // Append new data to the chunk cursor
+        let chunk_cursor = 
SliceableCursor::new(output_cursor.into_inner().unwrap());
+
+        let a = Int32Array::from(vec![2]);
+        let b = Int64Array::from(vec![None]);
+
+        let batch =
+            RecordBatch::try_new(schema.clone(), vec![Arc::new(a), 
Arc::new(b)]).unwrap();
+
+        let output_cursor = InMemoryWriteableCursor::default();

Review comment:
       same explication than before, one cursor contain the reader, the slice 
cursor, and the other to write the data:
   reader (file data with footer) => | header | data | footer |
   writer => | header | data |
   
   The next write() will append the data in the writer
   writer => | header | data | next_row_group |
   ... etc 
   close() => | header | data | row_groups | footer |
   
   we don't touch the previous data




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to