This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 5982b01bc30 [improve](ip-function)improve is_ip_address_in_range for
inverted index speed
5982b01bc30 is described below
commit 5982b01bc301e77f995408aa05c8e58325f3fd35
Author: amory <[email protected]>
AuthorDate: Thu Oct 17 20:47:52 2024 +0800
[improve](ip-function)improve is_ip_address_in_range for inverted index
speed
pick #41768
---
.../olap/rowset/segment_v2/inverted_index_reader.h | 2 +
be/src/vec/functions/function_ip.h | 252 ++++++++++++---------
be/src/vec/runtime/ip_address_cidr.h | 27 +++
.../functions/scalar/IsIpAddressInRange.java | 6 +
.../test_ip_cidr_search_with_inverted_index.out | 70 ++++++
.../test_ip_cidr_search_with_inverted_index.groovy | 134 +++++++++++
6 files changed, 385 insertions(+), 106 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h
b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
index ab143da5838..f37516bc40f 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
@@ -409,6 +409,8 @@ public:
M(PrimitiveType::TYPE_STRING)
M(PrimitiveType::TYPE_DATEV2)
M(PrimitiveType::TYPE_DATETIMEV2)
+ M(PrimitiveType::TYPE_IPV4)
+ M(PrimitiveType::TYPE_IPV6)
#undef M
default:
return Status::NotSupported("Unsupported primitive type {} for
inverted index reader",
diff --git a/be/src/vec/functions/function_ip.h
b/be/src/vec/functions/function_ip.h
index 30b901b4459..ddb99d80a1b 100644
--- a/be/src/vec/functions/function_ip.h
+++ b/be/src/vec/functions/function_ip.h
@@ -606,110 +606,177 @@ public:
return std::make_shared<DataTypeUInt8>();
}
- Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
- size_t result, size_t input_rows_count) const override
{
- const auto& addr_column_with_type_and_name =
block.get_by_position(arguments[0]);
- const auto& cidr_column_with_type_and_name =
block.get_by_position(arguments[1]);
- WhichDataType addr_type(addr_column_with_type_and_name.type);
- WhichDataType cidr_type(cidr_column_with_type_and_name.type);
- const auto& [addr_column, addr_const] =
- unpack_if_const(addr_column_with_type_and_name.column);
- const auto& [cidr_column, cidr_const] =
- unpack_if_const(cidr_column_with_type_and_name.column);
- const auto* str_addr_column = assert_cast<const
ColumnString*>(addr_column.get());
- const auto* str_cidr_column = assert_cast<const
ColumnString*>(cidr_column.get());
-
- auto col_res = ColumnUInt8::create(input_rows_count, 0);
+ template <PrimitiveType PT, typename ColumnType>
+ void execute_impl_with_ip(size_t input_rows_count, bool addr_const, bool
cidr_const,
+ const ColumnString* str_cidr_column, const
ColumnPtr addr_column,
+ ColumnUInt8* col_res) const {
auto& col_res_data = col_res->get_data();
-
+ const auto& ip_data = assert_cast<const
ColumnType*>(addr_column.get())->get_data();
for (size_t i = 0; i < input_rows_count; ++i) {
auto addr_idx = index_check_const(i, addr_const);
auto cidr_idx = index_check_const(i, cidr_const);
-
- const auto addr =
-
IPAddressVariant(str_addr_column->get_data_at(addr_idx).to_string_view());
const auto cidr =
parse_ip_with_cidr(str_cidr_column->get_data_at(cidr_idx).to_string_view());
- col_res_data[i] = is_address_in_range(addr, cidr) ? 1 : 0;
+ if constexpr (PT == PrimitiveType::TYPE_IPV4) {
+ if (cidr._address.as_v4()) {
+ col_res_data[i] = match_ipv4_subnet(ip_data[addr_idx],
cidr._address.as_v4(),
+ cidr._prefix)
+ ? 1
+ : 0;
+ } else {
+ col_res_data[i] = 0;
+ }
+ } else if constexpr (PT == PrimitiveType::TYPE_IPV6) {
+ if (cidr._address.as_v6()) {
+ col_res_data[i] =
match_ipv6_subnet((uint8*)(&ip_data[addr_idx]),
+ cidr._address.as_v6(),
cidr._prefix)
+ ? 1
+ : 0;
+ } else {
+ col_res_data[i] = 0;
+ }
+ }
}
-
- block.replace_by_position(result, std::move(col_res));
- return Status::OK();
}
-};
-
-// old version throw exception when meet null value
-class FunctionIsIPAddressInRangeOld : public IFunction {
-public:
- static constexpr auto name = "is_ip_address_in_range";
- static FunctionPtr create() { return
std::make_shared<FunctionIsIPAddressInRange>(); }
-
- String get_name() const override { return name; }
- size_t get_number_of_arguments() const override { return 2; }
+ Status evaluate_inverted_index(
+ const ColumnsWithTypeAndName& arguments,
+ const std::vector<vectorized::IndexFieldNameAndTypePair>&
data_type_with_names,
+ std::vector<segment_v2::InvertedIndexIterator*> iterators,
uint32_t num_rows,
+ segment_v2::InvertedIndexResultBitmap& bitmap_result) const
override {
+ DCHECK(arguments.size() == 1);
+ DCHECK(data_type_with_names.size() == 1);
+ DCHECK(iterators.size() == 1);
+ auto* iter = iterators[0];
+ auto data_type_with_name = data_type_with_names[0];
+ if (iter == nullptr) {
+ return Status::OK();
+ }
- DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
- return std::make_shared<DataTypeUInt8>();
+ if (iter->get_inverted_index_reader_type() !=
segment_v2::InvertedIndexReaderType::BKD) {
+ // Not support only bkd index
+ return Status::Error<ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED>(
+ "Inverted index evaluate skipped, ip range reader can only
support by bkd "
+ "reader");
+ }
+ // Get the is_ip_address_in_range from the arguments: cidr
+ const auto& cidr_column_with_type_and_name = arguments[0];
+ // in is_ip_address_in_range param is const Field
+ ColumnPtr arg_column = cidr_column_with_type_and_name.column;
+ DataTypePtr arg_type = cidr_column_with_type_and_name.type;
+ if ((is_column_nullable(*arg_column) &&
!is_column_const(*remove_nullable(arg_column))) ||
+ (!is_column_nullable(*arg_column) &&
!is_column_const(*arg_column))) {
+ // if not we should skip inverted index and evaluate in expression
+ return Status::Error<ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED>(
+ "Inverted index evaluate skipped, is_ip_address_in_range
only support const "
+ "value");
+ }
+ // check param type is string
+ if (!WhichDataType(*arg_type).is_string()) {
+ return Status::Error<ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED>(
+ "Inverted index evaluate skipped, is_ip_address_in_range
only support string "
+ "type");
+ }
+ // min && max ip address
+ Field min_ip, max_ip;
+ IPAddressCIDR cidr = parse_ip_with_cidr(arg_column->get_data_at(0));
+ if
(WhichDataType(remove_nullable(data_type_with_name.second)).is_ipv4() &&
+ cidr._address.as_v4()) {
+ auto range = apply_cidr_mask(cidr._address.as_v4(), cidr._prefix);
+ min_ip = range.first;
+ max_ip = range.second;
+ } else if
(WhichDataType(remove_nullable(data_type_with_name.second)).is_ipv6() &&
+ cidr._address.as_v6()) {
+ auto cidr_range_ipv6_col = ColumnIPv6::create(2, 0);
+ auto& cidr_range_ipv6_data = cidr_range_ipv6_col->get_data();
+ apply_cidr_mask(reinterpret_cast<const
char*>(cidr._address.as_v6()),
+ reinterpret_cast<char*>(&cidr_range_ipv6_data[0]),
+ reinterpret_cast<char*>(&cidr_range_ipv6_data[1]),
cidr._prefix);
+ min_ip = cidr_range_ipv6_data[0];
+ max_ip = cidr_range_ipv6_data[1];
+ }
+ // apply for inverted index
+ std::shared_ptr<roaring::Roaring> res_roaring =
std::make_shared<roaring::Roaring>();
+ std::shared_ptr<roaring::Roaring> max_roaring =
std::make_shared<roaring::Roaring>();
+ std::shared_ptr<roaring::Roaring> null_bitmap =
std::make_shared<roaring::Roaring>();
+
+ auto param_type =
data_type_with_name.second->get_type_as_type_descriptor().type;
+ std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory>
query_param = nullptr;
+ // >= min ip
+
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
+ param_type, &min_ip, query_param));
+ RETURN_IF_ERROR(iter->read_from_inverted_index(
+ data_type_with_name.first, query_param->get_value(),
+ segment_v2::InvertedIndexQueryType::GREATER_EQUAL_QUERY,
num_rows, res_roaring));
+ // <= max ip
+
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
+ param_type, &max_ip, query_param));
+ RETURN_IF_ERROR(iter->read_from_inverted_index(
+ data_type_with_name.first, query_param->get_value(),
+ segment_v2::InvertedIndexQueryType::LESS_EQUAL_QUERY,
num_rows, max_roaring));
+
+ DBUG_EXECUTE_IF("ip.inverted_index_filtered", {
+ auto req_id =
DebugPoints::instance()->get_debug_param_or_default<int32_t>(
+ "ip.inverted_index_filtered", "req_id", 0);
+ LOG(INFO) << "execute inverted index req_id: " << req_id
+ << " min: " << res_roaring->cardinality();
+ });
+ *res_roaring &= *max_roaring;
+ DBUG_EXECUTE_IF("ip.inverted_index_filtered", {
+ auto req_id =
DebugPoints::instance()->get_debug_param_or_default<int32_t>(
+ "ip.inverted_index_filtered", "req_id", 0);
+ LOG(INFO) << "execute inverted index req_id: " << req_id
+ << " max: " << max_roaring->cardinality()
+ << " result: " << res_roaring->cardinality();
+ });
+ segment_v2::InvertedIndexResultBitmap result(res_roaring, null_bitmap);
+ bitmap_result = result;
+ bitmap_result.mask_out_null();
+ return Status::OK();
}
- bool use_default_implementation_for_nulls() const override { return false;
}
-
Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override
{
+ DBUG_EXECUTE_IF("ip.inverted_index_filtered", {
+ auto req_id =
DebugPoints::instance()->get_debug_param_or_default<int32_t>(
+ "ip.inverted_index_filtered", "req_id", 0);
+ return Status::Error<ErrorCode::INTERNAL_ERROR>(
+ "{} has already execute inverted index req_id {} , should
not execute expr "
+ "with rows: {}",
+ get_name(), req_id, input_rows_count);
+ });
const auto& addr_column_with_type_and_name =
block.get_by_position(arguments[0]);
const auto& cidr_column_with_type_and_name =
block.get_by_position(arguments[1]);
- WhichDataType addr_type(addr_column_with_type_and_name.type);
- WhichDataType cidr_type(cidr_column_with_type_and_name.type);
const auto& [addr_column, addr_const] =
unpack_if_const(addr_column_with_type_and_name.column);
const auto& [cidr_column, cidr_const] =
unpack_if_const(cidr_column_with_type_and_name.column);
- const ColumnString* str_addr_column = nullptr;
- const ColumnString* str_cidr_column = nullptr;
- const NullMap* null_map_addr = nullptr;
- const NullMap* null_map_cidr = nullptr;
-
- if (addr_type.is_nullable()) {
- const auto* addr_column_nullable =
- assert_cast<const ColumnNullable*>(addr_column.get());
- str_addr_column = assert_cast<const ColumnString*>(
- addr_column_nullable->get_nested_column_ptr().get());
- null_map_addr = &addr_column_nullable->get_null_map_data();
- } else {
- str_addr_column = assert_cast<const
ColumnString*>(addr_column.get());
- }
-
- if (cidr_type.is_nullable()) {
- const auto* cidr_column_nullable =
- assert_cast<const ColumnNullable*>(cidr_column.get());
- str_cidr_column = assert_cast<const ColumnString*>(
- cidr_column_nullable->get_nested_column_ptr().get());
- null_map_cidr = &cidr_column_nullable->get_null_map_data();
- } else {
- str_cidr_column = assert_cast<const
ColumnString*>(cidr_column.get());
- }
auto col_res = ColumnUInt8::create(input_rows_count, 0);
auto& col_res_data = col_res->get_data();
- for (size_t i = 0; i < input_rows_count; ++i) {
- auto addr_idx = index_check_const(i, addr_const);
- auto cidr_idx = index_check_const(i, cidr_const);
- if (null_map_addr && (*null_map_addr)[addr_idx]) [[unlikely]] {
- throw Exception(ErrorCode::INVALID_ARGUMENT,
- "The arguments of function {} must be String,
not NULL",
- get_name());
- }
- if (null_map_cidr && (*null_map_cidr)[cidr_idx]) [[unlikely]] {
- throw Exception(ErrorCode::INVALID_ARGUMENT,
- "The arguments of function {} must be String,
not NULL",
- get_name());
+ if (is_ipv4(addr_column_with_type_and_name.type)) {
+ execute_impl_with_ip<PrimitiveType::TYPE_IPV4, ColumnIPv4>(
+ input_rows_count, addr_const, cidr_const,
+ assert_cast<const ColumnString*>(cidr_column.get()),
addr_column, col_res);
+ } else if (is_ipv6(addr_column_with_type_and_name.type)) {
+ execute_impl_with_ip<PrimitiveType::TYPE_IPV6, ColumnIPv6>(
+ input_rows_count, addr_const, cidr_const,
+ assert_cast<const ColumnString*>(cidr_column.get()),
addr_column, col_res);
+ } else {
+ const auto* str_addr_column = assert_cast<const
ColumnString*>(addr_column.get());
+ const auto* str_cidr_column = assert_cast<const
ColumnString*>(cidr_column.get());
+
+ for (size_t i = 0; i < input_rows_count; ++i) {
+ auto addr_idx = index_check_const(i, addr_const);
+ auto cidr_idx = index_check_const(i, cidr_const);
+
+ const auto addr =
+
IPAddressVariant(str_addr_column->get_data_at(addr_idx).to_string_view());
+ const auto cidr =
+
parse_ip_with_cidr(str_cidr_column->get_data_at(cidr_idx).to_string_view());
+ col_res_data[i] = is_address_in_range(addr, cidr) ? 1 : 0;
}
- const auto addr =
-
IPAddressVariant(str_addr_column->get_data_at(addr_idx).to_string_view());
- const auto cidr =
-
parse_ip_with_cidr(str_cidr_column->get_data_at(cidr_idx).to_string_view());
- col_res_data[i] = is_address_in_range(addr, cidr) ? 1 : 0;
}
block.replace_by_position(result, std::move(col_res));
@@ -797,21 +864,6 @@ public:
std::move(col_upper_range_output)}));
return Status::OK();
}
-
-private:
- static inline std::pair<UInt32, UInt32> apply_cidr_mask(UInt32 src, UInt8
bits_to_keep) {
- if (bits_to_keep >= 8 * sizeof(UInt32)) {
- return {src, src};
- }
- if (bits_to_keep == 0) {
- return {static_cast<UInt32>(0), static_cast<UInt32>(-1)};
- }
- UInt32 mask = static_cast<UInt32>(-1) << (8 * sizeof(UInt32) -
bits_to_keep);
- UInt32 lower = src & mask;
- UInt32 upper = lower | ~mask;
-
- return {lower, upper};
- }
};
class FunctionIPv6CIDRToRange : public IFunction {
@@ -936,18 +988,6 @@ public:
return ColumnStruct::create(
Columns {std::move(col_res_lower_range),
std::move(col_res_upper_range)});
}
-
-private:
- static void apply_cidr_mask(const char* __restrict src, char* __restrict
dst_lower,
- char* __restrict dst_upper, UInt8
bits_to_keep) {
- // little-endian mask
- const auto& mask = get_cidr_mask_ipv6(bits_to_keep);
-
- for (int8_t i = IPV6_BINARY_LENGTH - 1; i >= 0; --i) {
- dst_lower[i] = src[i] & mask[i];
- dst_upper[i] = dst_lower[i] | ~mask[i];
- }
- }
};
class FunctionIsIPv4Compat : public IFunction {
diff --git a/be/src/vec/runtime/ip_address_cidr.h
b/be/src/vec/runtime/ip_address_cidr.h
index de4c0050982..9e6ac1fd57b 100644
--- a/be/src/vec/runtime/ip_address_cidr.h
+++ b/be/src/vec/runtime/ip_address_cidr.h
@@ -24,6 +24,33 @@
#include "vec/common/format_ip.h"
namespace doris {
+namespace vectorized {
+static inline std::pair<UInt32, UInt32> apply_cidr_mask(UInt32 src, UInt8
bits_to_keep) {
+ if (bits_to_keep >= 8 * sizeof(UInt32)) {
+ return {src, src};
+ }
+ if (bits_to_keep == 0) {
+ return {static_cast<UInt32>(0), static_cast<UInt32>(-1)};
+ }
+ UInt32 mask = static_cast<UInt32>(-1) << (8 * sizeof(UInt32) -
bits_to_keep);
+ UInt32 lower = src & mask;
+ UInt32 upper = lower | ~mask;
+
+ return {lower, upper};
+}
+
+static inline void apply_cidr_mask(const char* __restrict src, char*
__restrict dst_lower,
+ char* __restrict dst_upper, UInt8
bits_to_keep) {
+ // little-endian mask
+ const auto& mask = get_cidr_mask_ipv6(bits_to_keep);
+
+ for (int8_t i = IPV6_BINARY_LENGTH - 1; i >= 0; --i) {
+ dst_lower[i] = src[i] & mask[i];
+ dst_upper[i] = dst_lower[i] | ~mask[i];
+ }
+}
+} // namespace vectorized
+
class IPAddressVariant {
public:
explicit IPAddressVariant(std::string_view address_str) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/IsIpAddressInRange.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/IsIpAddressInRange.java
index 85fe4dcab78..a29c1502223 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/IsIpAddressInRange.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/IsIpAddressInRange.java
@@ -24,6 +24,8 @@ import
org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.BooleanType;
+import org.apache.doris.nereids.types.IPv4Type;
+import org.apache.doris.nereids.types.IPv6Type;
import org.apache.doris.nereids.types.StringType;
import org.apache.doris.nereids.types.VarcharType;
@@ -39,6 +41,10 @@ public class IsIpAddressInRange extends ScalarFunction
implements BinaryExpression, ExplicitlyCastableSignature,
PropagateNullable {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
+
FunctionSignature.ret(BooleanType.INSTANCE).args(IPv4Type.INSTANCE,
VarcharType.SYSTEM_DEFAULT),
+
FunctionSignature.ret(BooleanType.INSTANCE).args(IPv4Type.INSTANCE,
StringType.INSTANCE),
+
FunctionSignature.ret(BooleanType.INSTANCE).args(IPv6Type.INSTANCE,
VarcharType.SYSTEM_DEFAULT),
+
FunctionSignature.ret(BooleanType.INSTANCE).args(IPv6Type.INSTANCE,
StringType.INSTANCE),
FunctionSignature.ret(BooleanType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT,
VarcharType.SYSTEM_DEFAULT),
FunctionSignature.ret(BooleanType.INSTANCE).args(StringType.INSTANCE,
StringType.INSTANCE));
diff --git
a/regression-test/data/inverted_index_p0/test_ip_cidr_search_with_inverted_index.out
b/regression-test/data/inverted_index_p0/test_ip_cidr_search_with_inverted_index.out
new file mode 100644
index 00000000000..f365449b9e5
--- /dev/null
+++
b/regression-test/data/inverted_index_p0/test_ip_cidr_search_with_inverted_index.out
@@ -0,0 +1,70 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+5
+
+-- !sql_without_ii_0 --
+
+-- !sql_without_ii_1 --
+
+-- !sql_without_ii_2 --
+
+-- !sql_without_ii_3 --
+2 42.117.228.166 2001:16a0:2:200a::2 true
+
+-- !sql_without_ii_4 --
+
+-- !sql_without_ii_5 --
+5 255.255.255.255 \N \N
+
+-- !sql_without_ii_6 --
+
+-- !sql_without_ii_7 --
+
+-- !sql_without_ii_8 --
+
+-- !sql_without_ii_9 --
+
+-- !sql_with_ii_0 --
+
+-- !sql_with_ii_1 --
+
+-- !sql_with_ii_2 --
+
+-- !sql_with_ii_3 --
+2 42.117.228.166 2001:16a0:2:200a::2 true
+
+-- !sql_with_ii_4 --
+
+-- !sql_with_ii_5 --
+5 255.255.255.255 \N \N
+
+-- !sql_with_ii_6 --
+
+-- !sql_with_ii_7 --
+
+-- !sql_with_ii_8 --
+
+-- !sql_with_ii_9 --
+
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+2 42.117.228.166 2001:16a0:2:200a::2
+
+-- !sql --
+
+-- !sql --
+5 255.255.255.255 \N
+
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+
+-- !sql --
+
diff --git
a/regression-test/suites/inverted_index_p0/test_ip_cidr_search_with_inverted_index.groovy
b/regression-test/suites/inverted_index_p0/test_ip_cidr_search_with_inverted_index.groovy
new file mode 100644
index 00000000000..61751f2404f
--- /dev/null
+++
b/regression-test/suites/inverted_index_p0/test_ip_cidr_search_with_inverted_index.groovy
@@ -0,0 +1,134 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_ip_cidr_search_with_inverted_index", "nonConcurrent"){
+ // prepare test table
+ sql "DROP TABLE IF EXISTS tc_ip_cidr_search_with_inverted_index"
+ // create 1 replica table
+ sql """
+ CREATE TABLE IF NOT EXISTS `tc_ip_cidr_search_with_inverted_index` (
+ `id` INT NULL,
+ `ipv4` IPV4 NULL,
+ `ipv6` IPV6 NULL,
+ `cidr` TEXT NULL,
+ INDEX v4_index (`ipv4`) USING INVERTED,
+ INDEX v6_index (`ipv6`) USING INVERTED
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 4
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "is_being_synced" = "false",
+ "storage_format" = "V2",
+ "light_schema_change" = "true",
+ "disable_auto_compaction" = "false",
+ "enable_single_replica_compaction" = "false"
+ );
+ """
+ sql """ set enable_profile = true;"""
+
+ sql """ insert into tc_ip_cidr_search_with_inverted_index values(1,
'59.50.185.152', '2a02:e980:83:5b09:ecb8:c669:b336:650e', '127.0.0.0/8'),(3,
'119.36.22.147', '2001:4888:1f:e891:161:26::', '127.0.0.0/8'),(2,
'42.117.228.166', '2001:16a0:2:200a::2', null); """
+ sql """ insert into tc_ip_cidr_search_with_inverted_index values(4, '.',
'2001:1b70:a1:610::b102:2', null); """
+ sql """ insert into tc_ip_cidr_search_with_inverted_index values(5,
'255.255.255.255', 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffffg', null) """
+
+ qt_sql """ select count() from tc_ip_cidr_search_with_inverted_index"""
+ // without inverted index query
+ sql """ set enable_common_expr_pushdown = false; """
+ sql """ set enable_inverted_index_query=false; """
+ // select ipv6 in ipv4 cidr
+ qt_sql_without_ii_0 """ select id, ipv4, ipv6,
is_ip_address_in_range(ipv6, '255.255.255.255/12') from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6,
'255.255.255.255/12') order by id; """
+ // select ipv6 in ipv6 cidr
+ qt_sql_without_ii_1 """ select id, ipv4, ipv6,
is_ip_address_in_range(ipv6, 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6,
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') order by id; """
+ qt_sql_without_ii_2 """ select id, ipv4, ipv6,
is_ip_address_in_range(ipv6, '::ffff:192.168.0.4/128') from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6,
'::ffff:192.168.0.4/128') order by id; """
+ qt_sql_without_ii_3 """ select id, ipv4, ipv6,
is_ip_address_in_range(ipv6, '2001:16a0:2:200a::2/64') from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6,
'2001:16a0:2:200a::2/64') order by id; """
+
+ // select ipv4 in ipv6 cidr
+ qt_sql_without_ii_4 """ select id, ipv4, ipv6,
is_ip_address_in_range(ipv4, 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4,
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') order by id; """
+ // select ipv4 in ipv4 cidr
+ qt_sql_without_ii_5 """ select id, ipv4, ipv6,
is_ip_address_in_range(ipv6, '255.255.255.255/12') from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4,
'255.255.255.255/12') order by id; """
+ qt_sql_without_ii_6 """ select id, ipv4, ipv6,
is_ip_address_in_range(ipv4, '127.0.0.0/8') from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4,
'127.0.0.0/8') order by id; """
+ qt_sql_without_ii_7 """ select id, ipv4, ipv6,
is_ip_address_in_range(ipv4, '192.168.100.0/24') from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4,
'192.168.100.0/24') order by id; """
+
+ // select in null cidr
+ qt_sql_without_ii_8 """ select id, ipv4, ipv6,
is_ip_address_in_range(ipv4, null) from tc_ip_cidr_search_with_inverted_index
where is_ip_address_in_range(ipv4, null) order by id; """
+ qt_sql_without_ii_9 """ select id, ipv4, ipv6,
is_ip_address_in_range(ipv6, null) from tc_ip_cidr_search_with_inverted_index
where is_ip_address_in_range(ipv6, null) order by id; """
+
+ // with inverted index query
+ // If we use common expr pass to inverted index , we should set
enable_common_expr_pushdown = true
+ sql """ set enable_common_expr_pushdown = true; """
+ sql """ set enable_inverted_index_query=true; """
+ sql """ set inverted_index_skip_threshold = 0; """ // set skip threshold
to 0
+
+ // select ipv6 in ipv4 cidr
+ qt_sql_with_ii_0 """ select id, ipv4, ipv6, is_ip_address_in_range(ipv6,
'255.255.255.255/12') from tc_ip_cidr_search_with_inverted_index where
is_ip_address_in_range(ipv6, '255.255.255.255/12') order by id; """
+ // select ipv6 in ipv6 cidr
+ qt_sql_with_ii_1 """ select id, ipv4, ipv6, is_ip_address_in_range(ipv6,
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6,
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') order by id; """
+ qt_sql_with_ii_2 """ select id, ipv4, ipv6, is_ip_address_in_range(ipv6,
'::ffff:192.168.0.4/128') from tc_ip_cidr_search_with_inverted_index where
is_ip_address_in_range(ipv6, '::ffff:192.168.0.4/128') order by id; """
+ qt_sql_with_ii_3 """ select id, ipv4, ipv6, is_ip_address_in_range(ipv6,
'2001:16a0:2:200a::2/64') from tc_ip_cidr_search_with_inverted_index where
is_ip_address_in_range(ipv6, '2001:16a0:2:200a::2/64') order by id; """
+
+ // select ipv4 in ipv6 cidr
+ qt_sql_with_ii_4 """ select id, ipv4, ipv6, is_ip_address_in_range(ipv4,
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4,
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') order by id; """
+ // select ipv4 in ipv4 cidr
+ qt_sql_with_ii_5 """ select id, ipv4, ipv6, is_ip_address_in_range(ipv6,
'255.255.255.255/12') from tc_ip_cidr_search_with_inverted_index where
is_ip_address_in_range(ipv4, '255.255.255.255/12') order by id; """
+ qt_sql_with_ii_6 """ select id, ipv4, ipv6, is_ip_address_in_range(ipv4,
'127.0.0.0/8') from tc_ip_cidr_search_with_inverted_index where
is_ip_address_in_range(ipv4, '127.0.0.0/8') order by id; """
+ qt_sql_with_ii_7 """ select id, ipv4, ipv6, is_ip_address_in_range(ipv4,
'192.168.100.0/24') from tc_ip_cidr_search_with_inverted_index where
is_ip_address_in_range(ipv4, '192.168.100.0/24') order by id; """
+
+ // select in null cidr
+ qt_sql_with_ii_8 """ select id, ipv4, ipv6, is_ip_address_in_range(ipv4,
null) from tc_ip_cidr_search_with_inverted_index where
is_ip_address_in_range(ipv4, null) order by id; """
+ qt_sql_with_ii_9 """ select id, ipv4, ipv6, is_ip_address_in_range(ipv6,
null) from tc_ip_cidr_search_with_inverted_index where
is_ip_address_in_range(ipv6, null) order by id; """
+
+
+
+ def create_sql = {
+ List<String> list = new ArrayList<>()
+ // select ipv6 in ipv4 cidr
+ list.add("select id, ipv4, ipv6 from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6,
'255.255.255.255/12') order by id;")
+ // select ipv6 in ipv6 cidr
+ list.add("select id, ipv4, ipv6 from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6,
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') order by id;")
+ list.add("select id, ipv4, ipv6 from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6,
'::ffff:192.168.0.4/128') order by id;")
+ list.add("select id, ipv4, ipv6 from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6,
'2001:16a0:2:200a::2/64') order by id;")
+ // select ipv4 in ipv6 cidr
+ list.add("select id, ipv4, ipv6 from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4,
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff/12') order by id;")
+ // select ipv4 in ipv4 cidr
+ list.add("select id, ipv4, ipv6 from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4,
'255.255.255.255/12') order by id;")
+ list.add("select id, ipv4, ipv6 from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4,
'127.0.0.0/8') order by id;")
+ list.add("select id, ipv4, ipv6 from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4,
'192.168.100.0/24') order by id;")
+ // select in null cidr
+ list.add("select id, ipv4, ipv6 from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv4, null)
order by id;")
+ list.add("select id, ipv4, ipv6 from
tc_ip_cidr_search_with_inverted_index where is_ip_address_in_range(ipv6, null)
order by id;")
+ return list;
+ }
+
+ def checkpoints_name = "ip.inverted_index_filtered"
+ def execute_sql = { sqlList ->
+ def i = 0
+ for (sqlStr in sqlList) {
+ try {
+ log.info("execute sql: i")
+ GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name,
[req_id: i])
+ order_qt_sql """ ${sqlStr} """
+ } finally {
+ GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
+ }
+ ++i
+ }
+ }
+
+ execute_sql.call(create_sql.call())
+
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]