This is an automated email from the ASF dual-hosted git repository. nevime pushed a commit to branch rust-parquet-arrow-writer in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 3fb8bfa60f82b20b89965df6f67acf08f98fc431 Author: Carol (Nichols || Goulding) <carol.nich...@gmail.com> AuthorDate: Fri Sep 25 17:54:11 2020 +0200 ARROW-10095: [Rust] Update rust-parquet-arrow-writer branch's encode_arrow_schema with ipc changes Note that this PR is deliberately filed against the rust-parquet-arrow-writer branch, not master!! Hi! 👋 I'm looking to help out with the rust-parquet-arrow-writer branch, and I just pulled it down and it wasn't compiling because in 75f804efbfe367175fef5a2238d9cd2d30ed3afe, `schema_to_bytes` was changed to take `IpcWriteOptions` and to return `EncodedData`. This updates `encode_arrow_schema` to use those changes, which should get this branch compiling and passing tests again. I'm kind of guessing which JIRA ticket this should be associated with; honestly I think this commit can just be squashed with https://github.com/apache/arrow/commit/8f0ed91469f2e569472edaa3b69ffde051088555 next time this branch gets rebased. Please let me know if I should change anything, I'm happy to! Closes #8274 from carols10cents/update-with-ipc-changes Authored-by: Carol (Nichols || Goulding) <carol.nich...@gmail.com> Signed-off-by: Neville Dipale <nevilled...@gmail.com> --- rust/parquet/src/arrow/arrow_writer.rs | 2 +- rust/parquet/src/arrow/schema.rs | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/rust/parquet/src/arrow/arrow_writer.rs b/rust/parquet/src/arrow/arrow_writer.rs index 1ca8d50..e0ad207 100644 --- a/rust/parquet/src/arrow/arrow_writer.rs +++ b/rust/parquet/src/arrow/arrow_writer.rs @@ -22,7 +22,7 @@ use std::rc::Rc; use arrow::array as arrow_array; use arrow::datatypes::{DataType as ArrowDataType, SchemaRef}; use arrow::record_batch::RecordBatch; -use arrow_array::Array; +use arrow_array::{Array, PrimitiveArrayOps}; use super::schema::add_encoded_arrow_schema_to_metadata; use crate::column::writer::ColumnWriter; diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs index d4cfe1f..d5a0ff9 100644 --- a/rust/parquet/src/arrow/schema.rs +++ b/rust/parquet/src/arrow/schema.rs @@ -27,6 +27,7 @@ use std::collections::{HashMap, HashSet}; use std::rc::Rc; use arrow::datatypes::{DataType, DateUnit, Field, Schema, TimeUnit}; +use arrow::ipc::writer; use crate::basic::{LogicalType, Repetition, Type as PhysicalType}; use crate::errors::{ParquetError::ArrowError, Result}; @@ -120,15 +121,16 @@ fn get_arrow_schema_from_metadata(encoded_meta: &str) -> Option<Schema> { /// Encodes the Arrow schema into the IPC format, and base64 encodes it fn encode_arrow_schema(schema: &Schema) -> String { - let mut serialized_schema = arrow::ipc::writer::schema_to_bytes(&schema); + let options = writer::IpcWriteOptions::default(); + let mut serialized_schema = arrow::ipc::writer::schema_to_bytes(&schema, &options); // manually prepending the length to the schema as arrow uses the legacy IPC format // TODO: change after addressing ARROW-9777 - let schema_len = serialized_schema.len(); + let schema_len = serialized_schema.ipc_message.len(); let mut len_prefix_schema = Vec::with_capacity(schema_len + 8); len_prefix_schema.append(&mut vec![255u8, 255, 255, 255]); len_prefix_schema.append((schema_len as u32).to_le_bytes().to_vec().as_mut()); - len_prefix_schema.append(&mut serialized_schema); + len_prefix_schema.append(&mut serialized_schema.ipc_message); base64::encode(&len_prefix_schema) }