yonipeleg33 commented on code in PR #9357:
URL: https://github.com/apache/arrow-rs/pull/9357#discussion_r2778930325


##########
parquet/src/file/properties.rs:
##########
@@ -573,9 +594,42 @@ impl WriterPropertiesBuilder {
     ///
     /// # Panics
     /// If the value is set to 0.
+    #[deprecated(
+        since = "57.3.0",

Review Comment:
   Thanks, done



##########
parquet/src/file/properties.rs:
##########
@@ -573,9 +594,42 @@ impl WriterPropertiesBuilder {
     ///
     /// # Panics
     /// If the value is set to 0.
+    #[deprecated(
+        since = "57.3.0",
+        note = "Use `set_max_row_group_row_count` instead",
+    )]
     pub fn set_max_row_group_size(mut self, value: usize) -> Self {
         assert!(value > 0, "Cannot have a 0 max row group size");
-        self.max_row_group_size = value;
+        self.max_row_group_row_count = Some(value);
+        self
+    }
+
+    /// Sets maximum number of rows in a row group, or `None` for unlimited.
+    ///
+    /// If both `max_row_group_row_count` and `max_row_group_bytes` are set,
+    /// the row group with the smallest limit will be applied.
+    ///
+    /// # Panics
+    /// If the value is `Some(0)`.
+    pub fn set_max_row_group_row_count(mut self, value: Option<usize>) -> Self 
{
+        assert_ne!(value, Some(0), "Cannot have a 0 max row group bytes");
+        self.max_row_group_row_count = value;
+        self
+    }
+
+    /// Sets maximum size of a row group in bytes, or `None` for unlimited.
+    ///
+    /// Row groups are flushed when their estimated encoded size exceeds this 
threshold.
+    /// This is similar to the official Java implementation for 
`parquet.block.size`'s behavior.
+    ///
+    /// If both `max_row_group_row_count` and `max_row_group_bytes` are set,
+    /// the row group with the smallest limit will be applied.

Review Comment:
   Done



##########
parquet/src/file/properties.rs:
##########
@@ -45,6 +45,9 @@ pub const DEFAULT_STATISTICS_ENABLED: EnabledStatistics = 
EnabledStatistics::Pag
 pub const DEFAULT_WRITE_PAGE_HEADER_STATISTICS: bool = false;
 /// Default value for [`WriterProperties::max_row_group_size`]
 pub const DEFAULT_MAX_ROW_GROUP_SIZE: usize = 1024 * 1024;
+/// Default value for [`WriterProperties::max_row_group_bytes`] (128 MB, same 
as the official Java
+/// implementation for `parquet.block.size`)
+pub const DEFAULT_MAX_ROW_GROUP_BYTES: usize = 128 * 1024 * 1024;

Review Comment:
   Removed. Good catch! (Leftover from previous implementations)



##########
parquet/src/file/properties.rs:
##########
@@ -573,9 +594,42 @@ impl WriterPropertiesBuilder {
     ///
     /// # Panics
     /// If the value is set to 0.
+    #[deprecated(
+        since = "57.3.0",
+        note = "Use `set_max_row_group_row_count` instead",
+    )]
     pub fn set_max_row_group_size(mut self, value: usize) -> Self {
         assert!(value > 0, "Cannot have a 0 max row group size");
-        self.max_row_group_size = value;
+        self.max_row_group_row_count = Some(value);
+        self
+    }
+
+    /// Sets maximum number of rows in a row group, or `None` for unlimited.
+    ///
+    /// If both `max_row_group_row_count` and `max_row_group_bytes` are set,
+    /// the row group with the smallest limit will be applied.
+    ///
+    /// # Panics
+    /// If the value is `Some(0)`.
+    pub fn set_max_row_group_row_count(mut self, value: Option<usize>) -> Self 
{
+        assert_ne!(value, Some(0), "Cannot have a 0 max row group bytes");

Review Comment:
   Done



##########
parquet/src/arrow/arrow_writer/mod.rs:
##########
@@ -314,8 +320,12 @@ impl<W: Write + Send> ArrowWriter<W> {
     /// Encodes the provided [`RecordBatch`]
     ///
     /// If this would cause the current row group to exceed 
[`WriterProperties::max_row_group_size`]

Review Comment:
   Indeed! Done



##########
parquet/src/file/properties.rs:
##########
@@ -573,9 +594,42 @@ impl WriterPropertiesBuilder {
     ///
     /// # Panics
     /// If the value is set to 0.
+    #[deprecated(
+        since = "57.3.0",
+        note = "Use `set_max_row_group_row_count` instead",
+    )]
     pub fn set_max_row_group_size(mut self, value: usize) -> Self {
         assert!(value > 0, "Cannot have a 0 max row group size");
-        self.max_row_group_size = value;
+        self.max_row_group_row_count = Some(value);
+        self
+    }
+
+    /// Sets maximum number of rows in a row group, or `None` for unlimited.
+    ///
+    /// If both `max_row_group_row_count` and `max_row_group_bytes` are set,
+    /// the row group with the smallest limit will be applied.

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to