This is an automated email from the ASF dual-hosted git repository.
starocean999 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 0c9c32c52d7 [Feature](datatype) update be ut codes and fix bugs for
IPv4/v6 (#28670)
0c9c32c52d7 is described below
commit 0c9c32c52d7605f1bd23b9f24eadecd809813714
Author: yangshijie <[email protected]>
AuthorDate: Wed Dec 20 14:38:46 2023 +0800
[Feature](datatype) update be ut codes and fix bugs for IPv4/v6 (#28670)
---
be/src/olap/rowset/segment_v2/encoding_info.cpp | 2 +
be/src/olap/types.h | 4 +-
be/src/vec/data_types/data_type_ipv4.cpp | 43 ++++----------------
be/src/vec/data_types/data_type_ipv4.h | 3 --
be/src/vec/data_types/data_type_ipv6.cpp | 23 ++++-------
be/src/vec/data_types/data_type_ipv6.h | 5 +--
be/test/vec/data_types/from_string_test.cpp | 53 +++++++++++++++++++++++++
7 files changed, 74 insertions(+), 59 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/encoding_info.cpp
b/be/src/olap/rowset/segment_v2/encoding_info.cpp
index ecf127e27a1..f10aba5cd3b 100644
--- a/be/src/olap/rowset/segment_v2/encoding_info.cpp
+++ b/be/src/olap/rowset/segment_v2/encoding_info.cpp
@@ -330,6 +330,8 @@ EncodingInfoResolver::EncodingInfoResolver() {
_add_map<FieldType::OLAP_FIELD_TYPE_DECIMAL256, BIT_SHUFFLE, true>();
_add_map<FieldType::OLAP_FIELD_TYPE_IPV4, BIT_SHUFFLE>();
+ _add_map<FieldType::OLAP_FIELD_TYPE_IPV4, PLAIN_ENCODING>();
+ _add_map<FieldType::OLAP_FIELD_TYPE_IPV4, BIT_SHUFFLE, true>();
_add_map<FieldType::OLAP_FIELD_TYPE_IPV6, BIT_SHUFFLE>();
_add_map<FieldType::OLAP_FIELD_TYPE_IPV6, PLAIN_ENCODING>();
diff --git a/be/src/olap/types.h b/be/src/olap/types.h
index 0701aca675a..0b2be2c38bb 100644
--- a/be/src/olap/types.h
+++ b/be/src/olap/types.h
@@ -791,7 +791,7 @@ struct BaseFieldTypeTraits : public
CppTypeTraits<field_type> {
if constexpr (field_type == FieldType::OLAP_FIELD_TYPE_LARGEINT) {
return get_int128_from_unalign(address);
} else if constexpr (field_type == FieldType::OLAP_FIELD_TYPE_IPV6) {
- return get_uint128_from_unalign(address);
+ return get_int128_from_unalign(address);
}
return *reinterpret_cast<const CppType*>(address);
}
@@ -1016,7 +1016,7 @@ struct FieldTypeTraits<FieldType::OLAP_FIELD_TYPE_IPV6>
return Status::Error<ErrorCode::INVALID_ARGUMENT>(
"FieldTypeTraits<OLAP_FIELD_TYPE_IPV6>::from_string meet
PARSE_FAILURE");
}
- *reinterpret_cast<int128_t*>(buf) = value;
+ memcpy(buf, &value, sizeof(int128_t));
return Status::OK();
}
diff --git a/be/src/vec/data_types/data_type_ipv4.cpp
b/be/src/vec/data_types/data_type_ipv4.cpp
index 90a88aa6fc7..963a1adf82e 100644
--- a/be/src/vec/data_types/data_type_ipv4.cpp
+++ b/be/src/vec/data_types/data_type_ipv4.cpp
@@ -37,8 +37,9 @@ std::string DataTypeIPv4::to_string(const IColumn& column,
size_t row_num) const
auto result = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = result.first;
row_num = result.second;
- IPv4 value = assert_cast<const ColumnIPv4&>(*ptr).get_element(row_num);
- return convert_ipv4_to_string(value);
+ IPv4 ipv4_val = assert_cast<const ColumnIPv4&>(*ptr).get_element(row_num);
+ auto value = IPv4Value(ipv4_val);
+ return value.to_string();
}
void DataTypeIPv4::to_string(const IColumn& column, size_t row_num,
BufferWritable& ostr) const {
@@ -48,43 +49,15 @@ void DataTypeIPv4::to_string(const IColumn& column, size_t
row_num, BufferWritab
Status DataTypeIPv4::from_string(ReadBuffer& rb, IColumn* column) const {
auto* column_data = static_cast<ColumnIPv4*>(column);
- StringParser::ParseResult result;
- IPv4 val = StringParser::string_to_unsigned_int<IPv4>(rb.position(),
rb.count(), &result);
+ IPv4 val = 0;
+ if (!read_ipv4_text_impl<IPv4>(val, rb)) {
+ return Status::InvalidArgument("parse ipv4 fail, string: '{}'",
+ std::string(rb.position(),
rb.count()).c_str());
+ }
column_data->insert_value(val);
return Status::OK();
}
-std::string DataTypeIPv4::convert_ipv4_to_string(IPv4 ipv4) {
- std::stringstream ss;
- ss << ((ipv4 >> 24) & 0xFF) << '.' << ((ipv4 >> 16) & 0xFF) << '.' <<
((ipv4 >> 8) & 0xFF)
- << '.' << (ipv4 & 0xFF);
- return ss.str();
-}
-
-bool DataTypeIPv4::convert_string_to_ipv4(IPv4& x, std::string ipv4) {
- const static int IPV4_PARTS_NUM = 4;
- IPv4 parts[IPV4_PARTS_NUM];
- int part_index = 0;
- std::stringstream ss(ipv4);
- std::string part;
- StringParser::ParseResult result;
-
- while (std::getline(ss, part, '.')) {
- IPv4 val = StringParser::string_to_unsigned_int<IPv4>(part.data(),
part.size(), &result);
- if (UNLIKELY(result != StringParser::PARSE_SUCCESS) || val > 255) {
- return false;
- }
- parts[part_index++] = val;
- }
-
- if (part_index != 4) {
- return false;
- }
-
- x = (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3];
- return true;
-}
-
MutableColumnPtr DataTypeIPv4::create_column() const {
return ColumnIPv4::create();
}
diff --git a/be/src/vec/data_types/data_type_ipv4.h
b/be/src/vec/data_types/data_type_ipv4.h
index d2bd3e487c9..89ac8e18447 100644
--- a/be/src/vec/data_types/data_type_ipv4.h
+++ b/be/src/vec/data_types/data_type_ipv4.h
@@ -57,9 +57,6 @@ public:
void to_string(const IColumn& column, size_t row_num, BufferWritable&
ostr) const override;
Status from_string(ReadBuffer& rb, IColumn* column) const override;
- static std::string convert_ipv4_to_string(IPv4 ipv4);
- static bool convert_string_to_ipv4(IPv4& x, std::string ipv4);
-
Field get_field(const TExprNode& node) const override { return
(IPv4)node.ipv4_literal.value; }
MutableColumnPtr create_column() const override;
diff --git a/be/src/vec/data_types/data_type_ipv6.cpp
b/be/src/vec/data_types/data_type_ipv6.cpp
index d54a0f48464..78b8e8e07d7 100755
--- a/be/src/vec/data_types/data_type_ipv6.cpp
+++ b/be/src/vec/data_types/data_type_ipv6.cpp
@@ -37,8 +37,9 @@ std::string DataTypeIPv6::to_string(const IColumn& column,
size_t row_num) const
auto result = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = result.first;
row_num = result.second;
- IPv6 value = assert_cast<const ColumnIPv6&>(*ptr).get_element(row_num);
- return convert_ipv6_to_string(value);
+ IPv6 ipv6_val = assert_cast<const ColumnIPv6&>(*ptr).get_element(row_num);
+ auto value = IPv6Value(ipv6_val);
+ return value.to_string();
}
void DataTypeIPv6::to_string(const IColumn& column, size_t row_num,
BufferWritable& ostr) const {
@@ -48,23 +49,15 @@ void DataTypeIPv6::to_string(const IColumn& column, size_t
row_num, BufferWritab
Status DataTypeIPv6::from_string(ReadBuffer& rb, IColumn* column) const {
auto* column_data = static_cast<ColumnIPv6*>(column);
- IPv6 value;
- if (!convert_string_to_ipv6(value, rb.to_string())) {
- throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
- "Invalid value: {} for type IPv6",
rb.to_string());
+ IPv6 val = 0;
+ if (!read_ipv6_text_impl<IPv6>(val, rb)) {
+ return Status::InvalidArgument("parse ipv6 fail, string: '{}'",
+ std::string(rb.position(),
rb.count()).c_str());
}
- column_data->insert_value(value);
+ column_data->insert_value(val);
return Status::OK();
}
-std::string DataTypeIPv6::convert_ipv6_to_string(IPv6 ipv6) {
- return IPv6Value::to_string(ipv6);
-}
-
-bool DataTypeIPv6::convert_string_to_ipv6(IPv6& x, std::string ipv6) {
- return IPv6Value::from_string(x, ipv6);
-}
-
MutableColumnPtr DataTypeIPv6::create_column() const {
return ColumnIPv6::create();
}
diff --git a/be/src/vec/data_types/data_type_ipv6.h
b/be/src/vec/data_types/data_type_ipv6.h
index f849dab98ac..87236c5592f 100755
--- a/be/src/vec/data_types/data_type_ipv6.h
+++ b/be/src/vec/data_types/data_type_ipv6.h
@@ -56,12 +56,9 @@ public:
void to_string(const IColumn& column, size_t row_num, BufferWritable&
ostr) const override;
Status from_string(ReadBuffer& rb, IColumn* column) const override;
- static std::string convert_ipv6_to_string(IPv6 ipv6);
- static bool convert_string_to_ipv6(IPv6& x, std::string ipv6);
-
Field get_field(const TExprNode& node) const override {
IPv6 value;
- if (!convert_string_to_ipv6(value, node.ipv6_literal.value)) {
+ if (!IPv6Value::from_string(value, node.ipv6_literal.value)) {
throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT,
"Invalid value: {} for type IPv6",
node.ipv6_literal.value);
}
diff --git a/be/test/vec/data_types/from_string_test.cpp
b/be/test/vec/data_types/from_string_test.cpp
index bbfb7da92a4..05015fcb9ca 100644
--- a/be/test/vec/data_types/from_string_test.cpp
+++ b/be/test/vec/data_types/from_string_test.cpp
@@ -283,6 +283,59 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) {
}
}
+ // ipv4 and ipv6 type
+ {
+ typedef std::pair<FieldType, string> FieldType_RandStr;
+ std::vector<FieldType_RandStr> ip_scala_field_types = {
+ FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV4, "0.0.0.0"),
// min case
+ FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV4,
"127.0.0.1"), // rand case
+ FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV4,
"255.255.255.255"), // max case
+ FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV6, "::"),
// min case
+ FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV6,
+ "2405:9800:9800:66::2"), // rand case
+ FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV6,
+ "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"),
// max case
+ };
+ std::vector<FieldType_RandStr> error_scala_field_types = {
+ FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV4,
"255.255.255.256"), // error case
+ FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV4,
"255.255.255."), // error case
+ FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV6,
+ "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffg"),
// error case
+ FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_IPV6,
+ "ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffff"),
// error case
+ };
+ for (auto pair : ip_scala_field_types) {
+ auto type = pair.first;
+ DataTypePtr data_type_ptr =
DataTypeFactory::instance().create_data_type(type, 0, 0);
+ std::cout << "this type is " << data_type_ptr->get_name() << ": "
+ << fmt::format("{}", type) << std::endl;
+ std::unique_ptr<WrapperField>
rand_wf(WrapperField::create_by_type(type));
+ Status st = rand_wf->from_string(pair.second, 0, 0);
+ string rand_ip = rand_wf->to_string();
+ ReadBuffer rand_rb(rand_ip.data(), rand_ip.size());
+ auto col = data_type_ptr->create_column();
+ st = data_type_ptr->from_string(rand_rb, col);
+ EXPECT_EQ(st.ok(), true);
+ string rand_s_d = data_type_ptr->to_string(*col, 0);
+ rtrim(rand_ip);
+ std::cout << "rand(" << rand_ip << ") with data_type_str:" <<
rand_s_d << std::endl;
+ EXPECT_EQ(rand_ip, rand_s_d);
+ }
+ for (auto pair : error_scala_field_types) {
+ auto type = pair.first;
+ DataTypePtr data_type_ptr =
DataTypeFactory::instance().create_data_type(type, 0, 0);
+ std::cout << "this type is " << data_type_ptr->get_name() << ": "
+ << fmt::format("{}", type) << std::endl;
+ std::unique_ptr<WrapperField>
rand_wf(WrapperField::create_by_type(type));
+ Status st = rand_wf->from_string(pair.second, 0, 0);
+ EXPECT_EQ(st.ok(), false);
+ ReadBuffer rand_rb(pair.second.data(), pair.second.size());
+ auto col = data_type_ptr->create_column();
+ st = data_type_ptr->from_string(rand_rb, col);
+ EXPECT_EQ(st.ok(), false);
+ }
+ }
+
// null data type
{
DataTypePtr data_type_ptr =
DataTypeFactory::instance().create_data_type(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]