This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 621944d4874 [InvertedIndex](Variant) supoort inverted index for array
type in variant (#48594)
621944d4874 is described below
commit 621944d4874e8475ba76baaf4f978a9a65cbe216
Author: lihangyu <[email protected]>
AuthorDate: Wed Mar 5 10:02:13 2025 +0800
[InvertedIndex](Variant) supoort inverted index for array type in variant
(#48594)
cherry-pick from #47688
---
.../rowset/segment_v2/inverted_index_writer.cpp | 8 +-
be/src/vec/exprs/vexpr.cpp | 4 +-
be/test/common/schema_util_test.cpp | 12 ++
.../data/variant_github_events_new_p2/load.out | Bin 152 -> 222 bytes
.../test_array_contains_with_inverted_index.out | Bin 0 -> 45214 bytes
.../test_array_inverted_index_profile.out | Bin 0 -> 2004 bytes
.../test_array_contains_with_inverted_index.groovy | 1 -
.../test_array_with_inverted_index_all_type.groovy | 1 -
.../variant_github_events_new_p2/load.groovy | 61 +++++++-
.../test_array_contains_with_inverted_index.groovy | 95 ++++++-------
.../with_index/test_array_index_write.groovy | 93 ++++++++++++
.../test_array_inverted_index_profile.groovy | 156 +++++++++++++++++++++
12 files changed, 376 insertions(+), 55 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
index 64c373db166..949a823055d 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
@@ -77,15 +77,19 @@ bool
InvertedIndexColumnWriter::check_support_inverted_index(const TabletColumn&
static std::set<FieldType> invalid_types = {
FieldType::OLAP_FIELD_TYPE_DOUBLE,
FieldType::OLAP_FIELD_TYPE_JSONB,
- FieldType::OLAP_FIELD_TYPE_ARRAY,
FieldType::OLAP_FIELD_TYPE_FLOAT,
};
- if (column.is_extracted_column() &&
(invalid_types.contains(column.type()))) {
+ if (invalid_types.contains(column.type())) {
return false;
}
if (column.is_variant_type()) {
return false;
}
+ if (column.is_array_type()) {
+ // only support one level array
+ const auto& subcolumn = column.get_sub_column(0);
+ return !subcolumn.is_array_type() &&
check_support_inverted_index(subcolumn);
+ }
return true;
}
diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
index 0035b2a292f..17985c57cf7 100644
--- a/be/src/vec/exprs/vexpr.cpp
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -643,7 +643,7 @@ Status VExpr::_evaluate_inverted_index(VExprContext*
context, const FunctionBase
context->get_inverted_index_context()
->get_storage_name_and_type_by_column_id(column_id);
auto storage_type = remove_nullable(storage_name_type->second);
- auto target_type = cast_expr->get_target_type();
+ auto target_type =
remove_nullable(cast_expr->get_target_type());
auto origin_primitive_type =
storage_type->get_type_as_type_descriptor().type;
auto target_primitive_type =
target_type->get_type_as_type_descriptor().type;
if (is_complex_type(storage_type)) {
@@ -663,7 +663,7 @@ Status VExpr::_evaluate_inverted_index(VExprContext*
context, const FunctionBase
}
}
if (origin_primitive_type != TYPE_VARIANT &&
- (origin_primitive_type == target_primitive_type ||
+ (storage_type->equals(*target_type) ||
(is_string_type(target_primitive_type) &&
is_string_type(origin_primitive_type)))) {
children_exprs.emplace_back(expr_without_cast(child));
diff --git a/be/test/common/schema_util_test.cpp
b/be/test/common/schema_util_test.cpp
index 5fd157756bf..c747dd13e2f 100644
--- a/be/test/common/schema_util_test.cpp
+++ b/be/test/common/schema_util_test.cpp
@@ -48,6 +48,18 @@ void construct_subcolumn(TabletSchemaSPtr schema, const
FieldType& type, int32_t
vectorized::PathInData col_path(path);
subcol.set_path_info(col_path);
subcol.set_name(col_path.get_path());
+
+ if (type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
+ TabletColumn array_item_col;
+ // double not support inverted index
+ array_item_col.set_type(FieldType::OLAP_FIELD_TYPE_DOUBLE);
+ array_item_col.set_is_nullable(true);
+ array_item_col.set_unique_id(-1);
+ array_item_col.set_parent_unique_id(col_unique_id);
+
+ subcol.add_sub_column(array_item_col);
+ }
+
schema->append_column(subcol);
subcolumns->emplace_back(std::move(subcol));
}
diff --git a/regression-test/data/variant_github_events_new_p2/load.out
b/regression-test/data/variant_github_events_new_p2/load.out
index 0aeaaeed024..2b5aaa3d4b9 100644
Binary files a/regression-test/data/variant_github_events_new_p2/load.out and
b/regression-test/data/variant_github_events_new_p2/load.out differ
diff --git
a/regression-test/data/variant_p0/with_index/test_array_contains_with_inverted_index.out
b/regression-test/data/variant_p0/with_index/test_array_contains_with_inverted_index.out
new file mode 100644
index 00000000000..ff409b7405a
Binary files /dev/null and
b/regression-test/data/variant_p0/with_index/test_array_contains_with_inverted_index.out
differ
diff --git
a/regression-test/data/variant_p0/with_index/test_array_inverted_index_profile.out
b/regression-test/data/variant_p0/with_index/test_array_inverted_index_profile.out
new file mode 100644
index 00000000000..d2e04cd5b95
Binary files /dev/null and
b/regression-test/data/variant_p0/with_index/test_array_inverted_index_profile.out
differ
diff --git
a/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy
b/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy
index 83ff988e039..80b31b05131 100644
---
a/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy
+++
b/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy
@@ -21,7 +21,6 @@ suite("test_array_contains_with_inverted_index"){
// If we use common expr pass to inverted index , we should set
enable_common_expr_pushdown = true
sql """ set enable_common_expr_pushdown = true; """
-// sql """ set enable_common_expr_pushdown_for_inverted_index = true; """
sql """ set enable_pipeline_x_engine = true;"""
sql """ set enable_profile = true;"""
diff --git
a/regression-test/suites/inverted_index_p0/test_array_with_inverted_index_all_type.groovy
b/regression-test/suites/inverted_index_p0/test_array_with_inverted_index_all_type.groovy
index 6b0c4b89f28..17a43ccdbfb 100644
---
a/regression-test/suites/inverted_index_p0/test_array_with_inverted_index_all_type.groovy
+++
b/regression-test/suites/inverted_index_p0/test_array_with_inverted_index_all_type.groovy
@@ -33,7 +33,6 @@ suite("test_array_with_inverted_index_all_type"){
sql """ set enable_profile = true;"""
// If we use common expr pass to inverted index , we should set
enable_common_expr_pushdown = true
sql """ set enable_common_expr_pushdown = true; """
-// sql """ set enable_common_expr_pushdown_for_inverted_index = true; """
// duplicate key table with all type using standard parser for inverted
index
sql "DROP TABLE IF EXISTS ${indexTblNames[0]}"
diff --git a/regression-test/suites/variant_github_events_new_p2/load.groovy
b/regression-test/suites/variant_github_events_new_p2/load.groovy
index 4291747f2d2..122e7001a3c 100644
--- a/regression-test/suites/variant_github_events_new_p2/load.groovy
+++ b/regression-test/suites/variant_github_events_new_p2/load.groovy
@@ -61,7 +61,19 @@ suite("regression_test_variant_github_events_p2",
"nonConcurrent,p2"){
CREATE TABLE IF NOT EXISTS ${table_name} (
k bigint,
v variant,
- INDEX idx_var(v) USING INVERTED PROPERTIES("parser" = "english")
COMMENT ''
+ INDEX idx_var(v) USING INVERTED COMMENT ''
+ )
+ DUPLICATE KEY(`k`)
+ DISTRIBUTED BY HASH(k) BUCKETS 4
+ properties("replication_num" = "1", "disable_auto_compaction" =
"false");
+ """
+
+ sql """DROP TABLE IF EXISTS github_events_arr"""
+ sql """
+ CREATE TABLE IF NOT EXISTS github_events_arr (
+ k bigint,
+ v array<text>,
+ INDEX idx_var(v) USING INVERTED COMMENT ''
)
DUPLICATE KEY(`k`)
DISTRIBUTED BY HASH(k) BUCKETS 4
@@ -69,14 +81,30 @@ suite("regression_test_variant_github_events_p2",
"nonConcurrent,p2"){
"""
// 2015
load_json_data.call(table_name, """${getS3Url() +
'/regression/gharchive.m/2015-01-01-0.json'}""")
+ sql """insert into github_events_arr select k,
cast(v['payload']['pull_request']['head']['repo']['topics'] as array<text>)
from github_events"""
load_json_data.call(table_name, """${getS3Url() +
'/regression/gharchive.m/2015-01-01-1.json'}""")
+ sql """insert into github_events_arr select k,
cast(v['payload']['pull_request']['head']['repo']['topics'] as array<text>)
from github_events"""
load_json_data.call(table_name, """${getS3Url() +
'/regression/gharchive.m/2015-01-01-2.json'}""")
+ sql """insert into github_events_arr select k,
cast(v['payload']['pull_request']['head']['repo']['topics'] as array<text>)
from github_events"""
load_json_data.call(table_name, """${getS3Url() +
'/regression/gharchive.m/2015-01-01-3.json'}""")
+ sql """insert into github_events_arr select k,
cast(v['payload']['pull_request']['head']['repo']['topics'] as array<text>)
from github_events"""
// 2022
load_json_data.call(table_name, """${getS3Url() +
'/regression/gharchive.m/2022-11-07-16.json'}""")
+ sql """insert into github_events_arr select k,
cast(v['payload']['pull_request']['head']['repo']['topics'] as array<text>)
from github_events"""
load_json_data.call(table_name, """${getS3Url() +
'/regression/gharchive.m/2022-11-07-10.json'}""")
+ sql """insert into github_events_arr select k,
cast(v['payload']['pull_request']['head']['repo']['topics'] as array<text>)
from github_events"""
load_json_data.call(table_name, """${getS3Url() +
'/regression/gharchive.m/2022-11-07-22.json'}""")
+ sql """insert into github_events_arr select k,
cast(v['payload']['pull_request']['head']['repo']['topics'] as array<text>)
from github_events"""
load_json_data.call(table_name, """${getS3Url() +
'/regression/gharchive.m/2022-11-07-23.json'}""")
+
+ // test array index
+ sql """insert into github_events_arr select k,
cast(v['payload']['pull_request']['head']['repo']['topics'] as array<text>)
from github_events"""
+ sql "set enable_common_expr_pushdown = true; "
+ qt_sql """select count() from github_events_arr where array_contains(v,
'css');"""
+ sql "set enable_common_expr_pushdown = false; "
+ qt_sql """select count() from github_events_arr where array_contains(v,
'css');"""
+ sql "set enable_common_expr_pushdown = true; "
+
// TODO fix compaction issue, this case could be stable
qt_sql """select cast(v["payload"]["pull_request"]["additions"] as int)
from github_events where cast(v["repo"]["name"] as string) =
'xpressengine/xe-core' order by 1;"""
// TODO add test case that some certain columns are materialized in some
file while others are not materilized(sparse)
@@ -127,4 +155,33 @@ suite("regression_test_variant_github_events_p2",
"nonConcurrent,p2"){
sql """DELETE FROM github_events where k >= 9223372036854775107"""
qt_sql_select_count """ select count(*) from github_events_2; """
-}
+
+ trigger_and_wait_compaction("github_events", "full")
+
+ // query and filterd by inverted index
+ profile("test_profile_1") {
+ sql """ set enable_common_expr_pushdown = true; """
+ sql """ set enable_pipeline_x_engine = true;"""
+ sql """ set enable_profile = true;"""
+ sql """ set profile_level = 2;"""
+ run {
+ qt_sql_inv """/* test_profile_1 */
+ select count() from github_events where
arrays_overlap(cast(v['payload']['pull_request']['head']['repo']['topics'] as
array<text>), ['javascript', 'css'] )
+ """
+ }
+
+ check { profileString, exception ->
+ log.info(profileString)
+ // Use a regular expression to match the numeric value inside
parentheses after "RowsInvertedIndexFiltered:"
+ def matcher = (profileString =~
/RowsInvertedIndexFiltered:\s+[^\(]+\((\d+)\)/)
+ def total = 0
+ while (matcher.find()) {
+ total += matcher.group(1).toInteger()
+ }
+ // Assert that the sum of all matched numbers equals 67677
+ assertEquals(67677, total)
+ }
+ }
+ sql """ set enable_common_expr_pushdown = true; """
+ qt_sql_inv """select count() from github_events where
arrays_overlap(cast(v['payload']['pull_request']['head']['repo']['topics'] as
array<text>), ['javascript', 'css'] )"""
+}
\ No newline at end of file
diff --git
a/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy
b/regression-test/suites/variant_p0/with_index/test_array_contains_with_inverted_index.groovy
similarity index 56%
copy from
regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy
copy to
regression-test/suites/variant_p0/with_index/test_array_contains_with_inverted_index.groovy
index 83ff988e039..e31ae973e91 100644
---
a/regression-test/suites/inverted_index_p0/test_array_contains_with_inverted_index.groovy
+++
b/regression-test/suites/variant_p0/with_index/test_array_contains_with_inverted_index.groovy
@@ -18,20 +18,20 @@
suite("test_array_contains_with_inverted_index"){
// prepare test table
def indexTblName = "tai"
+ sql "set disable_inverted_index_v1_for_variant = false"
// If we use common expr pass to inverted index , we should set
enable_common_expr_pushdown = true
sql """ set enable_common_expr_pushdown = true; """
-// sql """ set enable_common_expr_pushdown_for_inverted_index = true; """
- sql """ set enable_pipeline_x_engine = true;"""
sql """ set enable_profile = true;"""
sql "DROP TABLE IF EXISTS ${indexTblName}"
// create 1 replica table
+ def storageFormat = new Random().nextBoolean() ? "V1" : "V2"
sql """
CREATE TABLE IF NOT EXISTS `${indexTblName}` (
`apply_date` date NULL COMMENT '',
`id` varchar(60) NOT NULL COMMENT '',
- `inventors` array<text> NULL COMMENT '',
+ `inventors` variant NULL COMMENT '',
INDEX index_inverted_inventors(inventors) USING INVERTED COMMENT ''
) ENGINE=OLAP
DUPLICATE KEY(`apply_date`, `id`)
@@ -43,67 +43,68 @@ suite("test_array_contains_with_inverted_index"){
"storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
- "enable_single_replica_compaction" = "false"
+ "enable_single_replica_compaction" = "false",
+ "inverted_index_storage_format" = "$storageFormat"
);
"""
- sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', '6afef581285b6608bf80d5a4e46cf839', '[\"a\", \"b\",
\"c\"]'); """
- sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', 'd93d942d985a8fb7547c72dada8d332d', '[\"d\", \"e\",
\"f\", \"g\", \"h\", \"i\", \"j\", \"k\", \"l\"]'); """
- sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', '48a33ec3453a28bce84b8f96fe161956', '[\"m\"]'); """
- sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', '021603e7dcfe65d44af0efd0e5aee154', '[\"n\"]'); """
- sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', '9fcb57ae675f0af4d613d9e6c0e8a2a2', '[\"o\"]'); """
+ sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', '6afef581285b6608bf80d5a4e46cf839', '{"inventors":["a",
"b", "c"]}'); """
+ sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', 'd93d942d985a8fb7547c72dada8d332d', '{"inventors":["d",
"e", "f", "g", "h", "i", "j", "k", "l"]}'); """
+ sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', '48a33ec3453a28bce84b8f96fe161956',
'{"inventors":["m"]}'); """
+ sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', '021603e7dcfe65d44af0efd0e5aee154',
'{"inventors":["n"]}'); """
+ sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', '9fcb57ae675f0af4d613d9e6c0e8a2a2',
'{"inventors":["o"]}'); """
sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`) VALUES
('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a3'); """
sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a4', NULL); """
- sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a5', '[]'); """
- sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a6', '[null,null,null]');
"""
- sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a7', [null,null,null]); """
- sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a8', []); """
- sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2019-01-01', 'a648a447b8f71522f11632eba4b4adde', '[\"p\", \"q\",
\"r\", \"s\", \"t\"]'); """
- sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2019-01-01', 'a9fb5c985c90bf05f3bee5ca3ae95260', '[\"u\", \"v\"]'); """
- sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2019-01-01', '0974e7a82e30d1af83205e474fadd0a2', '[\"w\"]'); """
- sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2019-01-01', '26823b3995ee38bd145ddd910b2f6300', '[\"x\"]'); """
- sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2019-01-01', 'ee27ee1da291e46403c408e220bed6e1', '[\"y\"]'); """
+ sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a5', '{"inventors":[]}');
"""
+ sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a6',
'{"inventors":[null,null,null]}'); """
+ sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a7',
'{"inventors":[null,null,null]}'); """
+ sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a8', '{"inventors":[]}');
"""
+ sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2019-01-01', 'a648a447b8f71522f11632eba4b4adde', '{"inventors":["p",
"q", "r", "s", "t"]}'); """
+ sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2019-01-01', 'a9fb5c985c90bf05f3bee5ca3ae95260', '{"inventors":["u",
"v"]}'); """
+ sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2019-01-01', '0974e7a82e30d1af83205e474fadd0a2',
'{"inventors":["w"]}'); """
+ sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2019-01-01', '26823b3995ee38bd145ddd910b2f6300',
'{"inventors":["x"]}'); """
+ sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`)
VALUES ('2019-01-01', 'ee27ee1da291e46403c408e220bed6e1',
'{"inventors":["y"]}'); """
sql """ set enable_common_expr_pushdown = true """
qt_sql """ select count() from ${indexTblName}"""
- def param_contains = ["\'s\'", "\'\'", null]
+ def param_contains = ["'s'", "''", null]
for (int i = 0 ; i < param_contains.size(); ++i) {
def p = param_contains[i]
log.info("param: ${p}")
- order_qt_sql """ select * from tai where array_contains(inventors,
${p}) order by id; """
- order_qt_sql """ select * from tai where array_contains(inventors,
${p}) and apply_date = '2017-01-01' order by id; """
- order_qt_sql """ select * from tai where array_contains(inventors,
${p}) and apply_date = '2019-01-01' order by id; """
- order_qt_sql """ select * from tai where array_contains(inventors,
${p}) or apply_date = '2017-01-01' order by id; """
- order_qt_sql """ select * from tai where !array_contains(inventors,
${p}) order by id; """
- order_qt_sql """ select * from tai where !array_contains(inventors,
${p}) and apply_date = '2017-01-01' order by id; """
- order_qt_sql """ select * from tai where !array_contains(inventors,
${p}) and apply_date = '2019-01-01' order by id; """
- order_qt_sql """ select * from tai where !array_contains(inventors,
${p}) or apply_date = '2017-01-01' order by id; """
- order_qt_sql """ select * from tai where (array_contains(inventors,
${p}) and apply_date = '2017-01-01') or apply_date = '2019-01-01' order by id;
"""
+ order_qt_sql """ select * from tai where
array_contains(cast(inventors['inventors'] as array<text>), ${p}) order by id;
"""
+ order_qt_sql """ select * from tai where
array_contains(cast(inventors['inventors'] as array<text>), ${p}) and
apply_date = '2017-01-01' order by id; """
+ order_qt_sql """ select * from tai where
array_contains(cast(inventors['inventors'] as array<text>), ${p}) and
apply_date = '2019-01-01' order by id; """
+ order_qt_sql """ select * from tai where
array_contains(cast(inventors['inventors'] as array<text>), ${p}) or apply_date
= '2017-01-01' order by id; """
+ order_qt_sql """ select * from tai where
!array_contains(cast(inventors['inventors'] as array<text>), ${p}) order by id;
"""
+ order_qt_sql """ select * from tai where
!array_contains(cast(inventors['inventors'] as array<text>), ${p}) and
apply_date = '2017-01-01' order by id; """
+ order_qt_sql """ select * from tai where
!array_contains(cast(inventors['inventors'] as array<text>), ${p}) and
apply_date = '2019-01-01' order by id; """
+ order_qt_sql """ select * from tai where
!array_contains(cast(inventors['inventors'] as array<text>), ${p}) or
apply_date = '2017-01-01' order by id; """
+ order_qt_sql """ select * from tai where
(array_contains(cast(inventors['inventors'] as array<text>), ${p}) and
apply_date = '2017-01-01') or apply_date = '2019-01-01' order by id; """
}
// test arrays_overlap with inverted index
// now if we use inverted index we will not eval exprs
- def param = [["\'s\'", "\'t\'"], [], null, ["\'s\'", "\'\'", "\'t\'"],
["\'s\'", null, "\'t\'"], [null, "\'\'"], ["\'s\'", null, "\'t\'", "\'\'"]] //
null for arrays_overlap will return null which in predicate will lead to return
empty set
+ def param = [["'s'", "'t'"], [], null, ["'s'", "''", "'t'"], ["'s'", null,
"'t'"], [null, "''"], ["'s'", null, "'t'", "''"]] // null for arrays_overlap
will return null which in predicate will lead to return empty set
for (int i = 0 ; i < param.size(); ++i) {
def p = param[i]
log.info("param: ${p}")
- order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
true)*/ * from tai where arrays_overlap(inventors, ${p}) order by id; """
- order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
false)*/ * from tai where arrays_overlap(inventors, ${p}) order by id; """
- order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
true)*/ * from tai where arrays_overlap(inventors, ${p}) and apply_date =
'2017-01-01' order by id; """
- order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
false)*/ * from tai where arrays_overlap(inventors, ${p}) and apply_date =
'2017-01-01' order by id; """
- order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
true)*/ * from tai where arrays_overlap(inventors, ${p}) and apply_date =
'2019-01-01' order by id; """
- order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
false)*/ * from tai where arrays_overlap(inventors, ${p}) and apply_date =
'2019-01-01' order by id; """
- order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
true)*/ * from tai where arrays_overlap(inventors, ${p}) or apply_date =
'2017-01-01' order by id; """
- order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
false)*/ * from tai where arrays_overlap(inventors, ${p}) or apply_date =
'2017-01-01' order by id; """
- order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
true)*/ * from tai where !arrays_overlap(inventors, ${p}) order by id; """
- order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
false)*/ * from tai where !arrays_overlap(inventors, ${p}) order by id; """
- order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
true)*/ * from tai where !arrays_overlap(inventors, ${p}) and apply_date =
'2017-01-01' order by id; """
- order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
false)*/ * from tai where !arrays_overlap(inventors, ${p}) and apply_date =
'2017-01-01' order by id; """
- order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
true)*/ * from tai where !arrays_overlap(inventors, ${p}) and apply_date =
'2019-01-01' order by id; """
- order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
false)*/ * from tai where !arrays_overlap(inventors, ${p}) and apply_date =
'2019-01-01' order by id; """
- order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
true)*/ * from tai where !arrays_overlap(inventors, ${p}) or apply_date =
'2017-01-01' order by id; """
- order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
false)*/ * from tai where !arrays_overlap(inventors, ${p}) or apply_date =
'2017-01-01' order by id; """
- order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
true)*/ * from tai where (arrays_overlap(inventors, ${p}) and apply_date =
'2017-01-01') or apply_date = '2019-01-01' order by id; """
- order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
false)*/ * from tai where (arrays_overlap(inventors, ${p}) and apply_date =
'2017-01-01') or apply_date = '2019-01-01' order by id; """
+ order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
true)*/ * from tai where arrays_overlap(cast(inventors['inventors'] as
array<text>), ${p}) order by id; """
+ order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
false)*/ * from tai where arrays_overlap(cast(inventors['inventors'] as
array<text>), ${p}) order by id; """
+ order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
true)*/ * from tai where arrays_overlap(cast(inventors['inventors'] as
array<text>), ${p}) and apply_date = '2017-01-01' order by id; """
+ order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
false)*/ * from tai where arrays_overlap(cast(inventors['inventors'] as
array<text>), ${p}) and apply_date = '2017-01-01' order by id; """
+ order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
true)*/ * from tai where arrays_overlap(cast(inventors['inventors'] as
array<text>), ${p}) and apply_date = '2019-01-01' order by id; """
+ order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
false)*/ * from tai where arrays_overlap(cast(inventors['inventors'] as
array<text>), ${p}) and apply_date = '2019-01-01' order by id; """
+ order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
true)*/ * from tai where arrays_overlap(cast(inventors['inventors'] as
array<text>), ${p}) or apply_date = '2017-01-01' order by id; """
+ order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
false)*/ * from tai where arrays_overlap(cast(inventors['inventors'] as
array<text>), ${p}) or apply_date = '2017-01-01' order by id; """
+ order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
true)*/ * from tai where !arrays_overlap(cast(inventors['inventors'] as
array<text>), ${p}) order by id; """
+ order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
false)*/ * from tai where !arrays_overlap(cast(inventors['inventors'] as
array<text>), ${p}) order by id; """
+ order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
true)*/ * from tai where !arrays_overlap(cast(inventors['inventors'] as
array<text>), ${p}) and apply_date = '2017-01-01' order by id; """
+ order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
false)*/ * from tai where !arrays_overlap(cast(inventors['inventors'] as
array<text>), ${p}) and apply_date = '2017-01-01' order by id; """
+ order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
true)*/ * from tai where !arrays_overlap(cast(inventors['inventors'] as
array<text>), ${p}) and apply_date = '2019-01-01' order by id; """
+ order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
false)*/ * from tai where !arrays_overlap(cast(inventors['inventors'] as
array<text>), ${p}) and apply_date = '2019-01-01' order by id; """
+ order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
true)*/ * from tai where !arrays_overlap(cast(inventors['inventors'] as
array<text>), ${p}) or apply_date = '2017-01-01' order by id; """
+ order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
false)*/ * from tai where !arrays_overlap(cast(inventors['inventors'] as
array<text>), ${p}) or apply_date = '2017-01-01' order by id; """
+ order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
true)*/ * from tai where (arrays_overlap(cast(inventors['inventors'] as
array<text>), ${p}) and apply_date = '2017-01-01') or apply_date = '2019-01-01'
order by id; """
+ order_qt_sql """ select /*+SET_VAR(enable_common_expr_pushdown =
false)*/ * from tai where (arrays_overlap(cast(inventors['inventors'] as
array<text>), ${p}) and apply_date = '2017-01-01') or apply_date = '2019-01-01'
order by id; """
}
}
diff --git
a/regression-test/suites/variant_p0/with_index/test_array_index_write.groovy
b/regression-test/suites/variant_p0/with_index/test_array_index_write.groovy
new file mode 100644
index 00000000000..ba37537a8cc
--- /dev/null
+++ b/regression-test/suites/variant_p0/with_index/test_array_index_write.groovy
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_array_index_write", "nonConcurrent"){
+ sql "set disable_inverted_index_v1_for_variant = false"
+ def create_variant_index_table = { testTablex, parser ->
+ def stmt = "CREATE TABLE IF NOT EXISTS " + testTablex + "(\n" +
+ " k1 INT NULL,\n" +
+ " c_arr VARIANT NULL COMMENT '',\n"
+
+ String strTmp = parser == "" ? "INDEX index_inverted_c_arr(c_arr)
USING INVERTED COMMENT 'c_arr index',\n" :
+ "INDEX index_inverted_c_arr(c_arr) USING INVERTED
PROPERTIES( \"parser\"=\" " + parser + "\") COMMENT 'c_arr index',\n"
+
+ stmt += strTmp
+ stmt = stmt.substring(0, stmt.length()-2)
+ def storageFormat = new Random().nextBoolean() ? "V1" : "V2"
+ stmt += ") \nENGINE=OLAP\n" +
+ "DUPLICATE KEY(`k1`)\n" +
+ "COMMENT 'OLAP'\n" +
+ "DISTRIBUTED BY HASH(`k1`) BUCKETS 10\n" +
+ "PROPERTIES(\"replication_num\" = \"1\",
\"inverted_index_storage_format\" = \"$storageFormat\");"
+ return stmt
+ }
+
+ def indexTbName = "test_variant_index_parser_empty"
+ sql create_variant_index_table.call(indexTbName, "")
+
+ def checkpoints_name = "array_inverted_index.write_index"
+
+ try {
+ GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name,
[single_array_field_count: 3])
+ sql "insert into ${indexTbName} values(1, '{\"c_arr\": [\"amory\",
\"is\", \"committer\"]}')"
+ } finally {
+ GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
+ }
+
+ try {
+ GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name,
[single_array_field_count: 2])
+ sql "insert into ${indexTbName} values(2, '{\"c_arr\": [\"amory\",
\"better\"]}')"
+ } finally {
+ GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
+ }
+
+ try {
+ GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name,
[single_array_field_count: 1])
+ sql "insert into ${indexTbName} values(3, '{\"c_arr\": [\"amory\",
null]}')"
+ } finally {
+ GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
+ }
+
+ try {
+ GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name,
[single_array_field_count: 0])
+ sql "insert into ${indexTbName} values(4, '{\"c_arr\": [null, null]}')"
+ } finally {
+ GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
+ }
+
+ try {
+ GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name,
[single_array_field_count: 0])
+ sql "insert into ${indexTbName} values(5, '{\"c_arr\": []}')"
+ } finally {
+ GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
+ }
+
+ try {
+ GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name,
[single_array_field_count: 0])
+ sql "insert into ${indexTbName} values(6, null)"
+ } finally {
+ GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
+ }
+
+ // test multiple types
+ sql """insert into ${indexTbName} values(6, '{"c_arr" : ["text"]}')"""
+ sql """insert into ${indexTbName} values(6, '{"c_arr" : [1.1]}')"""
+ sql """insert into ${indexTbName} values(6, '{"c_arr" : [1.0]}')"""
+ sql """insert into ${indexTbName} values(6, '{"c_arr" : [90]}')"""
+ sql """insert into ${indexTbName} values(6, '{"c_arr" :
[90999999999999]}')"""
+
+}
diff --git
a/regression-test/suites/variant_p0/with_index/test_array_inverted_index_profile.groovy
b/regression-test/suites/variant_p0/with_index/test_array_inverted_index_profile.groovy
new file mode 100644
index 00000000000..fb0af622b2a
--- /dev/null
+++
b/regression-test/suites/variant_p0/with_index/test_array_inverted_index_profile.groovy
@@ -0,0 +1,156 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import groovy.json.JsonSlurper
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+suite("test_variant_arrayInvertedIdx_profile", "nonConcurrent"){
+ // prepare test table
+ def indexTblName = "var_arr_idx"
+ def httpGet = { url ->
+ def dst = 'http://' + context.config.feHttpAddress
+ def conn = new URL(dst + url).openConnection()
+ conn.setRequestMethod("GET")
+ def encoding =
Base64.getEncoder().encodeToString((context.config.feHttpUser + ":" +
+ (context.config.feHttpPassword == null ? "" :
context.config.feHttpPassword)).getBytes("UTF-8"))
+ conn.setRequestProperty("Authorization", "Basic ${encoding}")
+ return conn.getInputStream().getText()
+ }
+
+ def checkRowsInvertedIndexFilter = { sql,
expectedRowsInvertedIndexFiltered ->
+ order_qt_sql sql
+ def profileUrl = '/rest/v1/query_profile/'
+ def profiles = httpGet(profileUrl)
+ log.debug("profiles:{}", profiles);
+ profiles = new JsonSlurper().parseText(profiles)
+ assertEquals(0, profiles.code)
+
+ def profileId = null;
+ for (def profile in profiles["data"]["rows"]) {
+ if (profile["Sql Statement"].contains(sql)) {
+ profileId = profile["Profile ID"]
+ break;
+ }
+ }
+ log.info("profileId:{}", profileId);
+ def profileDetail = httpGet("/rest/v1/query_profile/" + profileId)
+ String regex = "RowsInvertedIndexFiltered:.*(\\d+)"
+ Pattern pattern = Pattern.compile(regex)
+ Matcher matcher = pattern.matcher(profileDetail)
+ log.info("profileDetail:{}", profileDetail);
+ while (matcher.find()) {
+ int number = Integer.parseInt(matcher.group(1))
+ log.info("filter number:{}", number)
+ assertEquals(expectedRowsInvertedIndexFiltered, number)
+ }
+ }
+
+ // If we use common expr pass to inverted index , we should set
enable_common_expr_pushdown = true
+ sql """ set enable_common_expr_pushdown = true; """
+ sql """ set enable_profile = true;"""
+ sql """ set profile_level = 2;"""
+ sql "set disable_inverted_index_v1_for_variant = false"
+
+ sql "DROP TABLE IF EXISTS ${indexTblName}"
+ def storageFormat = new Random().nextBoolean() ? "V1" : "V2"
+ // create 1 replica table
+ sql """
+ CREATE TABLE IF NOT EXISTS `${indexTblName}` (
+ `apply_date` date NULL COMMENT '',
+ `id` varchar(60) NOT NULL COMMENT '',
+ `inventors` variant NULL COMMENT '',
+ INDEX index_inverted_inventors(inventors) USING INVERTED COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`apply_date`, `id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "is_being_synced" = "false",
+ "storage_format" = "V2",
+ "light_schema_change" = "true",
+ "disable_auto_compaction" = "false",
+ "enable_single_replica_compaction" = "false",
+ "inverted_index_storage_format" = "$storageFormat"
+ );
+ """
+
+ sql """ INSERT INTO `var_arr_idx` (`apply_date`, `id`, `inventors`) VALUES
+ ('2017-01-01', '6afef581285b6608bf80d5a4e46cf839', '{"inventors":["a",
"b", "c"]}'),
+ ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a3', '{"inventors":[]}'),
+ ('2017-01-01', 'd93d942d985a8fb7547c72dada8d332d', '{"inventors":
["d", "e", "f", "g", "h", "i", "j", "k", "l"]}'),
+ ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a4', NULL),
+ ('2017-01-01', 'd93d942d985a8fb7547c72dada8d332e', '{"inventors":
["m", "n", "o", "p", "q", "r", "s", "t", "u"]}'),
+ ('2017-01-01', '8fcb57ae675f0af4d613d9e6c0e8a2a6', '{"inventors":
[null,null,null]}'),
+ ('2019-01-01', 'd93d942d985a8fb7547c72dada8d332f', '{"inventors":
["v", "w", "x", "y", "z"]}'); """
+
+
+ qt_sql1 """ select count() from ${indexTblName}"""
+ def checkpoints_name = "array_func.array_contains"
+ try {
+ GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name,
[result_bitmap: 1])
+ order_qt_sql2 "select apply_date,id, inventors['inventors'] from
var_arr_idx where array_contains(cast(inventors['inventors'] as array<text>),
'w') order by id;"
+ } finally {
+ GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
+ }
+
+ int randomInt = new Random().nextInt(10)
+
+ if (randomInt % 2) {
+ profile("test_profile_time_${randomInt}") {
+ run {
+ sql "/* test_profile_time_${randomInt} */ select
apply_date,id, inventors['inventors'] from var_arr_idx where
array_contains(cast(inventors['inventors'] as array<text>), 'w') order by id"
+ }
+
+ check { profileString, exception ->
+ log.info(profileString)
+ assertTrue(profileString.contains("RowsInvertedIndexFiltered:
6"))
+ }
+ }
+ } else {
+ profile("test_profile_time_${randomInt}") {
+ run {
+ sql "/* test_profile_time_${randomInt} */ select
apply_date,id, inventors['inventors'] from var_arr_idx where
array_contains(cast(inventors['inventors'] as array<text>), 's') and apply_date
= '2017-01-01' order by id"
+ }
+
+ check { profileString, exception ->
+ log.info(profileString)
+ assertTrue(profileString.contains("RowsInvertedIndexFiltered:
5"))
+ }
+ }
+ }
+
+
+ // checkRowsInvertedIndexFilter.call("select apply_date,id,
inventors['inventors'] from var_arr_idx where
array_contains(cast(inventors['inventors'] as array<text>), 'w') order by id;",
6)
+
+ try {
+ GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name,
[result_bitmap: 1])
+ order_qt_sql3 """ select apply_date,id, inventors['inventors'] from
var_arr_idx where array_contains(cast(inventors['inventors'] as array<text>),
's') and apply_date = '2017-01-01' order by id; """
+ } finally {
+ GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
+ }
+ // and apply_date will be vectorized filter left is 6 rows for inverted
index
+ order_qt_sql4 """ select apply_date,id, inventors['inventors'] from
var_arr_idx where array_contains(cast(inventors['inventors'] as array<text>),
's') and apply_date = '2019-01-01' order by id; """
+
+ order_qt_sql5 """ select apply_date,id, inventors['inventors'] from
var_arr_idx where array_contains(cast(inventors['inventors'] as array<text>),
's') or apply_date = '2017-01-01' order by id; """
+ order_qt_sql6 """ select apply_date,id, inventors['inventors'] from
var_arr_idx where !array_contains(cast(inventors['inventors'] as array<text>),
's') order by id; """
+ order_qt_sql7 """ select apply_date,id, inventors['inventors'] from
var_arr_idx where !array_contains(cast(inventors['inventors'] as array<text>),
's') and apply_date = '2017-01-01' order by id; """
+ order_qt_sql8 """ select apply_date,id, inventors['inventors'] from
var_arr_idx where !array_contains(cast(inventors['inventors'] as array<text>),
's') and apply_date = '2019-01-01' order by id; """
+ order_qt_sql9 """ select apply_date,id, inventors['inventors'] from
var_arr_idx where !array_contains(cast(inventors['inventors'] as array<text>),
's') or apply_date = '2017-01-01' order by id; """
+ order_qt_sql10 """ select apply_date,id, inventors['inventors'] from
var_arr_idx where (array_contains(cast(inventors['inventors'] as array<text>),
's') and apply_date = '2017-01-01') or apply_date = '2019-01-01' order by id;
"""
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]