This is an automated email from the ASF dual-hosted git repository.
mbrobbel pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 9709c097d4 Add into_builder method for WriterProperties (#8272)
9709c097d4 is described below
commit 9709c097d4477828b35052e64831eb1d09ecd19b
Author: Corwin Joy <[email protected]>
AuthorDate: Fri Sep 5 05:38:26 2025 -0700
Add into_builder method for WriterProperties (#8272)
# Which issue does this PR close?
- Closes #8273 .
# Rationale for this change
When working with the library using encryption, we have sometimes found
it necessary to modify an existing set of `WriterProperties` on a
per-file basis to set specific encryption properties. More generally,
others may need to use an existing set of `WriterProperties` as a
template and modify the properties. I have implemented this feature by
adding an `into_builder` method, which appears to be the standard
approach in other parts of the library.
# Are these changes tested?
Yes, `test_writer_properties_builder` has been updated to add a
round-trip test for `into_builder`.
# Are there any user-facing changes?
Yes. `WriterProperties` now has a new `into_builder` method.
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
parquet/src/file/properties.rs | 124 +++++++++++++++++++++++++++--------------
1 file changed, 82 insertions(+), 42 deletions(-)
diff --git a/parquet/src/file/properties.rs b/parquet/src/file/properties.rs
index 96e3706e27..603db6660f 100644
--- a/parquet/src/file/properties.rs
+++ b/parquet/src/file/properties.rs
@@ -193,6 +193,12 @@ impl WriterProperties {
WriterPropertiesBuilder::default()
}
+ /// Converts this [`WriterProperties`] into a [`WriterPropertiesBuilder`]
+ /// Used for mutating existing property settings
+ pub fn into_builder(self) -> WriterPropertiesBuilder {
+ self.into()
+ }
+
/// Returns data page size limit.
///
/// Note: this is a best effort limit based on the write batch size
@@ -435,6 +441,7 @@ impl WriterProperties {
/// Builder for [`WriterProperties`] Parquet writer configuration.
///
/// See example on [`WriterProperties`]
+#[derive(Debug, Clone)]
pub struct WriterPropertiesBuilder {
data_page_size_limit: usize,
data_page_row_count_limit: usize,
@@ -934,6 +941,30 @@ impl WriterPropertiesBuilder {
}
}
+impl From<WriterProperties> for WriterPropertiesBuilder {
+ fn from(props: WriterProperties) -> Self {
+ WriterPropertiesBuilder {
+ data_page_size_limit: props.data_page_size_limit,
+ data_page_row_count_limit: props.data_page_row_count_limit,
+ write_batch_size: props.write_batch_size,
+ max_row_group_size: props.max_row_group_size,
+ bloom_filter_position: props.bloom_filter_position,
+ writer_version: props.writer_version,
+ created_by: props.created_by,
+ offset_index_disabled: props.offset_index_disabled,
+ key_value_metadata: props.key_value_metadata,
+ default_column_properties: props.default_column_properties,
+ column_properties: props.column_properties,
+ sorting_columns: props.sorting_columns,
+ column_index_truncate_length: props.column_index_truncate_length,
+ statistics_truncate_length: props.statistics_truncate_length,
+ coerce_types: props.coerce_types,
+ #[cfg(feature = "encryption")]
+ file_encryption_properties: props.file_encryption_properties,
+ }
+ }
+}
+
/// Controls the level of statistics to be computed by the writer and stored in
/// the parquet file.
///
@@ -1377,50 +1408,59 @@ mod tests {
.set_column_bloom_filter_fpp(ColumnPath::from("col"), 0.1)
.build();
- assert_eq!(props.writer_version(), WriterVersion::PARQUET_2_0);
- assert_eq!(props.data_page_size_limit(), 10);
- assert_eq!(props.dictionary_page_size_limit(), 20);
- assert_eq!(props.write_batch_size(), 30);
- assert_eq!(props.max_row_group_size(), 40);
- assert_eq!(props.created_by(), "default");
- assert_eq!(
- props.key_value_metadata(),
- Some(&vec![
- KeyValue::new("key".to_string(), "value".to_string(),)
- ])
- );
+ fn test_props(props: &WriterProperties) {
+ assert_eq!(props.writer_version(), WriterVersion::PARQUET_2_0);
+ assert_eq!(props.data_page_size_limit(), 10);
+ assert_eq!(props.dictionary_page_size_limit(), 20);
+ assert_eq!(props.write_batch_size(), 30);
+ assert_eq!(props.max_row_group_size(), 40);
+ assert_eq!(props.created_by(), "default");
+ assert_eq!(
+ props.key_value_metadata(),
+ Some(&vec![
+ KeyValue::new("key".to_string(), "value".to_string(),)
+ ])
+ );
- assert_eq!(
- props.encoding(&ColumnPath::from("a")),
- Some(Encoding::DELTA_BINARY_PACKED)
- );
- assert_eq!(
- props.compression(&ColumnPath::from("a")),
- Compression::GZIP(Default::default())
- );
- assert!(!props.dictionary_enabled(&ColumnPath::from("a")));
- assert_eq!(
- props.statistics_enabled(&ColumnPath::from("a")),
- EnabledStatistics::None
- );
+ assert_eq!(
+ props.encoding(&ColumnPath::from("a")),
+ Some(Encoding::DELTA_BINARY_PACKED)
+ );
+ assert_eq!(
+ props.compression(&ColumnPath::from("a")),
+ Compression::GZIP(Default::default())
+ );
+ assert!(!props.dictionary_enabled(&ColumnPath::from("a")));
+ assert_eq!(
+ props.statistics_enabled(&ColumnPath::from("a")),
+ EnabledStatistics::None
+ );
- assert_eq!(
- props.encoding(&ColumnPath::from("col")),
- Some(Encoding::RLE)
- );
- assert_eq!(
- props.compression(&ColumnPath::from("col")),
- Compression::SNAPPY
- );
- assert!(props.dictionary_enabled(&ColumnPath::from("col")));
- assert_eq!(
- props.statistics_enabled(&ColumnPath::from("col")),
- EnabledStatistics::Chunk
- );
- assert_eq!(
- props.bloom_filter_properties(&ColumnPath::from("col")),
- Some(&BloomFilterProperties { fpp: 0.1, ndv: 100 })
- );
+ assert_eq!(
+ props.encoding(&ColumnPath::from("col")),
+ Some(Encoding::RLE)
+ );
+ assert_eq!(
+ props.compression(&ColumnPath::from("col")),
+ Compression::SNAPPY
+ );
+ assert!(props.dictionary_enabled(&ColumnPath::from("col")));
+ assert_eq!(
+ props.statistics_enabled(&ColumnPath::from("col")),
+ EnabledStatistics::Chunk
+ );
+ assert_eq!(
+ props.bloom_filter_properties(&ColumnPath::from("col")),
+ Some(&BloomFilterProperties { fpp: 0.1, ndv: 100 })
+ );
+ }
+
+ // Test direct build of properties
+ test_props(&props);
+
+ // Test that into_builder() gives the same result
+ let props_into_builder_and_back = props.into_builder().build();
+ test_props(&props_into_builder_and_back);
}
#[test]