alamb commented on code in PR #9877:
URL: https://github.com/apache/arrow-rs/pull/9877#discussion_r3190361313
##########
parquet/src/file/properties.rs:
##########
@@ -1103,11 +1104,17 @@ impl WriterPropertiesBuilder {
/// been called.
///
/// [`set_bloom_filter_enabled`]: Self::set_bloom_filter_enabled
- pub fn set_bloom_filter_ndv(mut self, value: u64) -> Self {
+ pub fn set_bloom_filter_max_ndv(mut self, value: u64) -> Self {
self.default_column_properties.set_bloom_filter_ndv(value);
self
}
+ /// Deprecated alias for [`Self::set_bloom_filter_max_ndv`].
Review Comment:
👍
##########
parquet/src/file/properties.rs:
##########
@@ -1363,6 +1429,81 @@ impl Default for BloomFilterProperties {
}
}
+impl BloomFilterProperties {
+ /// Returns a new [`BloomFilterPropertiesBuilder`] for constructing
+ /// [`BloomFilterProperties`] with custom values.
+ pub fn builder() -> BloomFilterPropertiesBuilder {
+ BloomFilterPropertiesBuilder::new()
+ }
+}
+
+/// Builder for [`BloomFilterProperties`].
+///
+/// Use [`BloomFilterProperties::builder`] or
[`BloomFilterPropertiesBuilder::new`]
+/// as the entry point. Unset fields fall back to [`DEFAULT_BLOOM_FILTER_FPP`]
+/// and [`DEFAULT_BLOOM_FILTER_NDV`] at build time.
Review Comment:
the note about unset fields seems like an implmentation detail and is
redundant with the comments on with_fpp and with_ndv
I recommend removing this mention
```suggestion
/// as the entry point.
```
##########
parquet/src/file/properties.rs:
##########
@@ -1363,6 +1429,81 @@ impl Default for BloomFilterProperties {
}
}
+impl BloomFilterProperties {
+ /// Returns a new [`BloomFilterPropertiesBuilder`] for constructing
+ /// [`BloomFilterProperties`] with custom values.
+ pub fn builder() -> BloomFilterPropertiesBuilder {
+ BloomFilterPropertiesBuilder::new()
+ }
+}
+
+/// Builder for [`BloomFilterProperties`].
+///
+/// Use [`BloomFilterProperties::builder`] or
[`BloomFilterPropertiesBuilder::new`]
+/// as the entry point. Unset fields fall back to [`DEFAULT_BLOOM_FILTER_FPP`]
+/// and [`DEFAULT_BLOOM_FILTER_NDV`] at build time.
+#[derive(Debug, Clone, Default)]
+pub struct BloomFilterPropertiesBuilder {
+ fpp: Option<f64>,
+ ndv: Option<u64>,
+}
+
+impl BloomFilterPropertiesBuilder {
+ /// Returns a new builder with no fields set.
+ ///
+ /// Equivalent to [`BloomFilterProperties::builder`].
+ pub fn new() -> Self {
+ Self::default()
+ }
+
+ /// Sets the target false positive probability.
+ ///
+ /// The value must be in `(0.0, 1.0)` exclusively; this is validated at
+ /// build time by [`Self::build`] / [`Self::try_build`]. When unset,
+ /// [`DEFAULT_BLOOM_FILTER_FPP`] is used.
+ pub fn with_fpp(mut self, fpp: f64) -> Self {
+ self.fpp = Some(fpp);
+ self
+ }
+
+ /// Sets the maximum expected number of distinct values used to size the
+ /// bloom filter before folding. When unset, [`DEFAULT_BLOOM_FILTER_NDV`]
+ /// is used.
+ pub fn with_max_ndv(mut self, ndv: u64) -> Self {
+ self.ndv = Some(ndv);
+ self
+ }
+
+ /// Builds [`BloomFilterProperties`].
+ ///
+ /// Panics if the configured `fpp` is not in `(0.0, 1.0)` exclusive.
+ /// Use [`Self::try_build`] for a non-panicking alternative.
+ pub fn build(self) -> BloomFilterProperties {
+ self.try_build().unwrap_or_else(|e| panic!("{e}"))
Review Comment:
this is not a new panic (it just moved from `set_bloom_filter_fpp`)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]