This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new ff0db32e49 Enhance index and field config validation to block adding
bloomfilter on boolean column (#15283)
ff0db32e49 is described below
commit ff0db32e491aee6d89e4cdd500006f0532ec5ea5
Author: ayesheepatra07 <[email protected]>
AuthorDate: Wed Mar 26 17:34:21 2025 -0700
Enhance index and field config validation to block adding bloomfilter on
boolean column (#15283)
---
.../src/test/resources/TableIndexingTest.csv | 4 +--
.../segment/local/utils/TableConfigUtils.java | 11 ++++++++
.../segment/local/utils/TableConfigUtilsTest.java | 30 ++++++++++++++++++++++
3 files changed, 43 insertions(+), 2 deletions(-)
diff --git a/pinot-core/src/test/resources/TableIndexingTest.csv
b/pinot-core/src/test/resources/TableIndexingTest.csv
index 0c3891be1c..d3b046a908 100644
--- a/pinot-core/src/test/resources/TableIndexingTest.csv
+++ b/pinot-core/src/test/resources/TableIndexingTest.csv
@@ -198,7 +198,7 @@ DECIMAL;sv_BIG;dict;range_index;true;
DECIMAL;sv_BIG;dict;startree_index;true;
DECIMAL;sv_BIG;dict;vector_index;false;Vector index is currently only
supported on float array columns
BOOLEAN;sv;raw;timestamp_index;true;
-BOOLEAN;sv;raw;bloom_filter;true;
+BOOLEAN;sv;raw;bloom_filter;false;Cannot create a bloom filter on boolean
column col
BOOLEAN;sv;raw;fst_index;false;Cannot create FST index on column: col, it can
only be applied to dictionary encoded single value string columns
BOOLEAN;sv;raw;h3_index;false;H3 index is currently only supported on BYTES
columns
BOOLEAN;sv;raw;inverted_index;false;Cannot create inverted index for raw index
column: col
@@ -209,7 +209,7 @@ BOOLEAN;sv;raw;range_index;false;Unsupported data type
BOOLEAN for range index
BOOLEAN;sv;raw;startree_index;false;Dimension: col does not have dictionary
BOOLEAN;sv;raw;vector_index;false;Vector index is currently only supported on
float array columns
BOOLEAN;mv;raw;timestamp_index;false;Caught exception while reading data
-BOOLEAN;mv;raw;bloom_filter;true;
+BOOLEAN;mv;raw;bloom_filter;false;Cannot create a bloom filter on boolean
column col
BOOLEAN;mv;raw;fst_index;false;Cannot create FST index on column: col, it can
only be applied to dictionary encoded single value string columns
BOOLEAN;mv;raw;h3_index;false;H3 index is currently only supported on
single-value columns
BOOLEAN;mv;raw;inverted_index;false;Cannot create inverted index for raw index
column: col
diff --git
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
index 9b1c484003..8e17162312 100644
---
a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
+++
b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/TableConfigUtils.java
@@ -1094,6 +1094,11 @@ public final class TableConfigUtils {
}
}
+ for (String bloomFilterColumn : bloomFilterColumns) {
+
Preconditions.checkState(schema.getFieldSpecFor(bloomFilterColumn).getDataType()
!= FieldSpec.DataType.BOOLEAN,
+ "Cannot create bloom filter on BOOLEAN column: " +
bloomFilterColumn);
+ }
+
for (String jsonIndexColumn : jsonIndexColumns) {
FieldSpec fieldSpec = schema.getFieldSpecFor(jsonIndexColumn);
Preconditions.checkState(
@@ -1235,6 +1240,12 @@ public final class TableConfigUtils {
// Validate the forward index disabled compatibility with other indexes
if enabled for this column
validateForwardIndexDisabledIndexCompatibility(columnName, fieldConfig,
indexingConfig, schema, tableType);
+ // Validate bloom filter is not added to boolean column
+ if (fieldConfig.getIndexes() != null &&
fieldConfig.getIndexes().has("bloom")) {
+ Preconditions.checkState(fieldSpec.getDataType() !=
FieldSpec.DataType.BOOLEAN,
+ "Cannot create a bloom filter on boolean column " + columnName);
+ }
+
if (CollectionUtils.isNotEmpty(fieldConfig.getIndexTypes())) {
for (FieldConfig.IndexType indexType : fieldConfig.getIndexTypes()) {
switch (indexType) {
diff --git
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
index b3bf9d3e1a..12489e38fd 100644
---
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
+++
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java
@@ -18,6 +18,8 @@
*/
package org.apache.pinot.segment.local.utils;
+import com.fasterxml.jackson.databind.node.JsonNodeFactory;
+import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import java.util.Arrays;
@@ -30,6 +32,7 @@ import org.apache.pinot.common.tier.TierFactory;
import org.apache.pinot.segment.spi.AggregationFunctionType;
import org.apache.pinot.segment.spi.Constants;
import
org.apache.pinot.segment.spi.index.startree.AggregationFunctionColumnPair;
+import org.apache.pinot.spi.config.table.BloomFilterConfig;
import org.apache.pinot.spi.config.table.ColumnPartitionConfig;
import org.apache.pinot.spi.config.table.DedupConfig;
import org.apache.pinot.spi.config.table.FieldConfig;
@@ -73,6 +76,8 @@ import org.mockito.Mockito;
import org.testng.Assert;
import org.testng.annotations.Test;
+import static org.testng.Assert.assertThrows;
+
/**
* Tests for the validations in {@link TableConfigUtils}
@@ -1321,6 +1326,31 @@ public class TableConfigUtilsTest {
}
}
+ @Test
+ public void testValidateBFOnBoolean() {
+ Schema schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME)
+ .addSingleValueDimension("myCol", FieldSpec.DataType.BOOLEAN)
+ .addSingleValueDimension("mycol2", FieldSpec.DataType.STRING).build();
+
+ TableConfig tableconfig1 = new TableConfigBuilder(TableType.REALTIME)
+
.setTableName(TABLE_NAME).setBloomFilterColumns(Arrays.asList("mycol")).build();
+ assertThrows(IllegalStateException.class, () ->
TableConfigUtils.validate(tableconfig1, schema));
+
+ TableConfig tableconfig2 = new
TableConfigBuilder(TableType.REALTIME).setTableName(TABLE_NAME).build();
+ tableconfig2.getIndexingConfig().setBloomFilterConfigs(
+ Collections.singletonMap("myCol", new BloomFilterConfig(0.01, 1000,
true)));
+ assertThrows(IllegalStateException.class, () ->
TableConfigUtils.validate(tableconfig2, schema));
+
+ TableConfig tableconfig3 = new
TableConfigBuilder(TableType.REALTIME).setTableName(TABLE_NAME).build();
+ ObjectNode indexesNode = JsonNodeFactory.instance.objectNode();
+ indexesNode.putObject("bloom");
+ FieldConfig fieldConfig = new FieldConfig(
+ "MyCol", FieldConfig.EncodingType.DICTIONARY, null, null, null,
+ null, indexesNode, null, null);
+ tableconfig3.setFieldConfigList(Arrays.asList(fieldConfig));
+ assertThrows(IllegalStateException.class, () ->
TableConfigUtils.validate(tableconfig3, schema));
+ }
+
@Test
public void testValidateIndexingConfig() {
Schema schema =
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]