This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 689897ea99 Support round trip reading / writing Arrow `Duration` type
to parquet (#7482)
689897ea99 is described below
commit 689897ea99b61e94bd80d55796f701879375bf23
Author: liyixin <[email protected]>
AuthorDate: Sat May 10 00:50:07 2025 +0800
Support round trip reading / writing Arrow `Duration` type to parquet
(#7482)
* fix duration conversion error
* implement durtion type convert in parquet writer
* Use existing tests, remove new test
---------
Co-authored-by: liyixin <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
---
parquet/src/arrow/arrow_writer/mod.rs | 4 ----
parquet/src/arrow/schema/mod.rs | 5 ++++-
parquet/src/arrow/schema/primitive.rs | 1 +
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/parquet/src/arrow/arrow_writer/mod.rs
b/parquet/src/arrow/arrow_writer/mod.rs
index 66e1b06fa7..652e12d45a 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -2519,25 +2519,21 @@ mod tests {
}
#[test]
- #[should_panic(expected = "Converting Duration to parquet not supported")]
fn duration_second_single_column() {
required_and_optional::<DurationSecondArray, _>(0..SMALL_SIZE as i64);
}
#[test]
- #[should_panic(expected = "Converting Duration to parquet not supported")]
fn duration_millisecond_single_column() {
required_and_optional::<DurationMillisecondArray, _>(0..SMALL_SIZE as
i64);
}
#[test]
- #[should_panic(expected = "Converting Duration to parquet not supported")]
fn duration_microsecond_single_column() {
required_and_optional::<DurationMicrosecondArray, _>(0..SMALL_SIZE as
i64);
}
#[test]
- #[should_panic(expected = "Converting Duration to parquet not supported")]
fn duration_nanosecond_single_column() {
required_and_optional::<DurationNanosecondArray, _>(0..SMALL_SIZE as
i64);
}
diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index 89c42f5eaf..d264e0cce8 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -586,7 +586,10 @@ fn arrow_to_parquet_type(field: &Field, coerce_types:
bool) -> Result<Type> {
.with_repetition(repetition)
.with_id(id)
.build(),
- DataType::Duration(_) => Err(arrow_err!("Converting Duration to
parquet not supported",)),
+ DataType::Duration(_) => Type::primitive_type_builder(name,
PhysicalType::INT64)
+ .with_repetition(repetition)
+ .with_id(id)
+ .build(),
DataType::Interval(_) => {
Type::primitive_type_builder(name,
PhysicalType::FIXED_LEN_BYTE_ARRAY)
.with_converted_type(ConvertedType::INTERVAL)
diff --git a/parquet/src/arrow/schema/primitive.rs
b/parquet/src/arrow/schema/primitive.rs
index f1fed8f2a5..cc276eb611 100644
--- a/parquet/src/arrow/schema/primitive.rs
+++ b/parquet/src/arrow/schema/primitive.rs
@@ -43,6 +43,7 @@ fn apply_hint(parquet: DataType, hint: DataType) -> DataType {
(DataType::Int32 | DataType::Int64, DataType::Timestamp(_, _)) => hint,
(DataType::Int32, DataType::Time32(_)) => hint,
(DataType::Int64, DataType::Time64(_)) => hint,
+ (DataType::Int64, DataType::Duration(_)) => hint,
// Date64 doesn't have a corresponding LogicalType / ConvertedType
(DataType::Int64, DataType::Date64) => hint,