This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new f5f1f8c5aab branch-3.0: [fix](ngram bloomfilter) fix narrow conversion
for ngram bf_size (#43645)
f5f1f8c5aab is described below
commit f5f1f8c5aab17bd44df01e8303ccd7d2c99da2ac
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Nov 12 11:25:09 2024 +0800
branch-3.0: [fix](ngram bloomfilter) fix narrow conversion for ngram
bf_size (#43645)
Cherry-picked from #43480
Co-authored-by: airborne12 <[email protected]>
---
be/src/olap/rowset/segment_v2/segment_writer.cpp | 15 ++++++-
.../rowset/segment_v2/vertical_segment_writer.cpp | 15 ++++++-
.../java/org/apache/doris/analysis/IndexDef.java | 4 +-
.../trees/plans/commands/info/IndexDefinition.java | 4 +-
.../index_p0/test_ngram_bloomfilter_index.groovy | 47 ++++++++++++++++++++++
5 files changed, 77 insertions(+), 8 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 5957a555ba7..c532969baa4 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -192,8 +192,19 @@ Status SegmentWriter::_create_column_writer(uint32_t cid,
const TabletColumn& co
if (tablet_index) {
opts.need_bloom_filter = true;
opts.is_ngram_bf_index = true;
- opts.gram_size = tablet_index->get_gram_size();
- opts.gram_bf_size = tablet_index->get_gram_bf_size();
+ //narrow convert from int32_t to uint8_t and uint16_t which is
dangerous
+ auto gram_size = tablet_index->get_gram_size();
+ auto gram_bf_size = tablet_index->get_gram_bf_size();
+ if (gram_size > 256 || gram_size < 1) {
+ return Status::NotSupported("Do not support ngram bloom filter for
ngram_size: ",
+ gram_size);
+ }
+ if (gram_bf_size > 65535 || gram_bf_size < 64) {
+ return Status::NotSupported("Do not support ngram bloom filter for
bf_size: ",
+ gram_bf_size);
+ }
+ opts.gram_size = gram_size;
+ opts.gram_bf_size = gram_bf_size;
}
opts.need_bitmap_index = column.has_bitmap_index();
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index 9ade9c1bfcc..2cea4c86c09 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -183,8 +183,19 @@ Status
VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
if (tablet_index) {
opts.need_bloom_filter = true;
opts.is_ngram_bf_index = true;
- opts.gram_size = tablet_index->get_gram_size();
- opts.gram_bf_size = tablet_index->get_gram_bf_size();
+ //narrow convert from int32_t to uint8_t and uint16_t which is
dangerous
+ auto gram_size = tablet_index->get_gram_size();
+ auto gram_bf_size = tablet_index->get_gram_bf_size();
+ if (gram_size > 256 || gram_size < 1) {
+ return Status::NotSupported("Do not support ngram bloom filter for
ngram_size: ",
+ gram_size);
+ }
+ if (gram_bf_size > 65535 || gram_bf_size < 64) {
+ return Status::NotSupported("Do not support ngram bloom filter for
bf_size: ",
+ gram_bf_size);
+ }
+ opts.gram_size = gram_size;
+ opts.gram_bf_size = gram_bf_size;
}
opts.need_bitmap_index = column.has_bitmap_index();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
index b2ee4537297..d98a3b93e45 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java
@@ -254,8 +254,8 @@ public class IndexDef {
if (ngramSize > 256 || ngramSize < 1) {
throw new AnalysisException("gram_size should be
integer and less than 256");
}
- if (bfSize > 65536 || bfSize < 64) {
- throw new AnalysisException("bf_size should be integer
and between 64 and 65536");
+ if (bfSize > 65535 || bfSize < 64) {
+ throw new AnalysisException("bf_size should be integer
and between 64 and 65535");
}
} catch (NumberFormatException e) {
throw new AnalysisException("invalid ngram properties:" +
e.getMessage(), e);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
index 340ea581504..61f2c874fd7 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java
@@ -141,9 +141,9 @@ public class IndexDefinition {
throw new AnalysisException(
"gram_size should be integer and less than
256");
}
- if (bfSize > 65536 || bfSize < 64) {
+ if (bfSize > 65535 || bfSize < 64) {
throw new AnalysisException(
- "bf_size should be integer and between 64 and
65536");
+ "bf_size should be integer and between 64 and
65535");
}
} catch (NumberFormatException e) {
throw new AnalysisException("invalid ngram properties:" +
e.getMessage(), e);
diff --git
a/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy
b/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy
index c56eed967a0..e2ab9b9c117 100644
--- a/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy
+++ b/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy
@@ -59,4 +59,51 @@ suite("test_ngram_bloomfilter_index") {
qt_select_eq_3 "SELECT * FROM ${tableName} WHERE http_url =
'/%/7212503657802320699%' ORDER BY key_id"
qt_select_in_3 "SELECT * FROM ${tableName} WHERE http_url IN
('/%/7212503657802320699%') ORDER BY key_id"
qt_select_like_3 "SELECT * FROM ${tableName} WHERE http_url like
'/%/7212503657802320699%' ORDER BY key_id"
+
+ //case for bf_size 65536
+ def tableName2 = 'test_ngram_bloomfilter_index2'
+ sql "DROP TABLE IF EXISTS ${tableName2}"
+ test {
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName2} (
+ `key_id` bigint(20) NULL COMMENT '',
+ `category` varchar(200) NULL COMMENT '',
+ `https_url` varchar(300) NULL COMMENT '',
+ `hostname` varchar(300) NULL,
+ `http_url` text NULL COMMENT '',
+ `url_path` varchar(2000) NULL COMMENT '',
+ `cnt` bigint(20) NULL COMMENT '',
+ `host_flag` boolean NULL COMMENT '',
+ INDEX idx_ngrambf (`http_url`) USING NGRAM_BF
PROPERTIES("gram_size" = "2", "bf_size" = "65536")
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`key_id`, `category`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`key_id`) BUCKETS 3
+ PROPERTIES("replication_num" = "1");
+ """
+ exception "bf_size should be integer and between 64 and 65535"
+ }
+
+ def tableName3 = 'test_ngram_bloomfilter_index3'
+ sql "DROP TABLE IF EXISTS ${tableName3}"
+ sql """
+ CREATE TABLE IF NOT EXISTS ${tableName3} (
+ `key_id` bigint(20) NULL COMMENT '',
+ `category` varchar(200) NULL COMMENT '',
+ `https_url` varchar(300) NULL COMMENT '',
+ `hostname` varchar(300) NULL,
+ `http_url` text NULL COMMENT '',
+ `url_path` varchar(2000) NULL COMMENT '',
+ `cnt` bigint(20) NULL COMMENT '',
+ `host_flag` boolean NULL COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`key_id`, `category`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`key_id`) BUCKETS 3
+ PROPERTIES("replication_num" = "1");
+ """
+ test {
+ sql """ALTER TABLE ${tableName3} ADD INDEX idx_http_url(http_url)
USING NGRAM_BF PROPERTIES("gram_size"="3", "bf_size"="65536") COMMENT 'http_url
ngram_bf index'"""
+ exception "bf_size should be integer and between 64 and 65535"
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]