tustvold commented on code in PR #5863:
URL: https://github.com/apache/arrow-rs/pull/5863#discussion_r1634807527


##########
parquet/src/file/properties.rs:
##########
@@ -657,8 +709,17 @@ impl WriterPropertiesBuilder {
         self
     }
 
-    /// Sets the max length of min/max value fields in statistics. Must be 
greater than 0.
-    /// If set to `None` - there's no effective limit.
+    /// Sets the max length of min/max value fields in row group level
+    /// [`Statistics`] (defaults to `None`).
+    ///
+    /// # Notes
+    /// Row group level [`Statistics`] are written when 
[`Self::set_statistics_enabled`] is
+    /// set to [`EnabledStatistics::Chunk`] or [`EnabledStatistics::Page`].
+    ///
+    /// * If `Some`, Must be greater than 0, otherwise will panic
+    /// * If `None` - there's no effective limit.

Review Comment:
   ```suggestion
       /// * If `Some`, must be greater than 0, otherwise will panic
       /// * If `None`, there's no effective limit.
   ```



##########
parquet/src/file/properties.rs:
##########
@@ -615,39 +653,53 @@ impl WriterPropertiesBuilder {
         self
     }
 
-    /// Sets max size for statistics for a column.
-    /// Takes precedence over globally defined settings.
+    /// Sets max size for statistics for a specific column.
+    ///
+    /// Takes precedence over [`Self::set_max_statistics_size`].
     pub fn set_column_max_statistics_size(mut self, col: ColumnPath, value: 
usize) -> Self {
         self.get_mut_props(col).set_max_statistics_size(value);
         self
     }
 
-    /// Sets whether a bloom filter should be created for a specific column.
-    /// The behavior is similar to 
[`set_bloom_filter_enabled`](Self::set_bloom_filter_enabled).
-    /// Takes precedence over globally defined settings.
+    /// Sets whether a bloom filter should be written for a specific column.
+    ///
+    /// Takes precedence over [`Self::set_bloom_filter_enabled`].
     pub fn set_column_bloom_filter_enabled(mut self, col: ColumnPath, value: 
bool) -> Self {
         self.get_mut_props(col).set_bloom_filter_enabled(value);
         self
     }
 
     /// Sets the false positive probability for bloom filter for a specific 
column.
-    /// The behavior is similar to 
[`set_bloom_filter_fpp`](Self::set_bloom_filter_fpp) but will
-    /// override the default.
+    ///
+    /// Takes precedence over [`Self::set_bloom_filter_fpp`].
     pub fn set_column_bloom_filter_fpp(mut self, col: ColumnPath, value: f64) 
-> Self {
         self.get_mut_props(col).set_bloom_filter_fpp(value);
         self
     }
 
     /// Sets the number of distinct values for bloom filter for a specific 
column.
-    /// The behavior is similar to 
[`set_bloom_filter_ndv`](Self::set_bloom_filter_ndv) but will
-    /// override the default.
+    ///
+    /// Takes precedence over [`Self::set_bloom_filter_ndv`].
     pub fn set_column_bloom_filter_ndv(mut self, col: ColumnPath, value: u64) 
-> Self {
         self.get_mut_props(col).set_bloom_filter_ndv(value);
         self
     }
 
-    /// Sets the max length of min/max value fields in the column index. Must 
be greater than 0.
-    /// If set to `None` - there's no effective limit.
+    /// Sets the max length of min/max value fields when writing the column
+    /// [`Index`] (defaults to `None`).
+    ///
+    /// This can be used to prevent columns with very long values (hundreds of
+    /// bytes long) from causing the parquet metadata to become huge.
+    ///
+    /// # Notes
+    ///
+    /// The column [`Index`] is written when [`Self::set_statistics_enabled`] 
is
+    /// set to [`EnabledStatistics::Page`].
+    ///
+    /// * If `Some`, Must be greater than 0, otherwise will panic
+    /// * If `None` - there's no effective limit.

Review Comment:
   ```suggestion
       /// * If `Some`, must be greater than 0, otherwise will panic
       /// * If `None`, there's no effective limit.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to