This is an automated email from the ASF dual-hosted git repository.
ashingau pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2b4c4bb4424 [Fix][Opt](parquet-reader) Fix filter push down with
decimal types in parquet reader. (#27897)
2b4c4bb4424 is described below
commit 2b4c4bb44247fb10165bab5b0c98daae7f269c0e
Author: Qi Chen <[email protected]>
AuthorDate: Mon Dec 4 22:25:39 2023 +0800
[Fix][Opt](parquet-reader) Fix filter push down with decimal types in
parquet reader. (#27897)
Fix filter push down with decimal types in parquet reader introduced by
#22842
---
be/src/vec/exec/format/format_common.h | 11 +++++++----
be/src/vec/exec/format/orc/vorc_reader.cpp | 16 ++++++++--------
be/src/vec/exec/format/orc/vorc_reader.h | 4 ++--
.../exec/format/parquet/parquet_column_convert.h | 19 +++++++++----------
be/src/vec/exec/format/parquet/parquet_pred_cmp.h | 21 +++++++++++----------
5 files changed, 37 insertions(+), 34 deletions(-)
diff --git a/be/src/vec/exec/format/format_common.h
b/be/src/vec/exec/format/format_common.h
index 8d1b651c7b0..9e8ce4fe238 100644
--- a/be/src/vec/exec/format/format_common.h
+++ b/be/src/vec/exec/format/format_common.h
@@ -34,14 +34,17 @@ struct DecimalScaleParams {
template <typename DecimalPrimitiveType>
static inline constexpr DecimalPrimitiveType get_scale_factor(int32_t n) {
- if constexpr (std::is_same_v<DecimalPrimitiveType, Int32>) {
+ if constexpr (std::is_same_v<DecimalPrimitiveType, Decimal32>) {
return common::exp10_i32(n);
- } else if constexpr (std::is_same_v<DecimalPrimitiveType, Int64>) {
+ } else if constexpr (std::is_same_v<DecimalPrimitiveType, Decimal64>) {
return common::exp10_i64(n);
- } else if constexpr (std::is_same_v<DecimalPrimitiveType, Int128>) {
+ } else if constexpr (std::is_same_v<DecimalPrimitiveType, Decimal128>)
{
+ return common::exp10_i128(n);
+ } else if constexpr (std::is_same_v<DecimalPrimitiveType,
Decimal128I>) {
return common::exp10_i128(n);
} else {
- return DecimalPrimitiveType(1);
+ static_assert(!sizeof(DecimalPrimitiveType),
+ "All types must be matched with if constexpr.");
}
}
};
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 7f016e0f938..aa790d45170 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -1281,17 +1281,17 @@ Status OrcReader::_orc_column_to_doris_column(const
std::string& col_name,
case TypeIndex::Int32:
return _decode_int32_column<is_filter>(col_name, data_column, cvb,
num_values);
case TypeIndex::Decimal32:
- return _decode_decimal_column<Int32, is_filter>(col_name, data_column,
data_type, cvb,
- num_values);
+ return _decode_decimal_column<Decimal32, is_filter>(col_name,
data_column, data_type, cvb,
+ num_values);
case TypeIndex::Decimal64:
- return _decode_decimal_column<Int64, is_filter>(col_name, data_column,
data_type, cvb,
- num_values);
+ return _decode_decimal_column<Decimal64, is_filter>(col_name,
data_column, data_type, cvb,
+ num_values);
case TypeIndex::Decimal128:
- return _decode_decimal_column<Int128, is_filter>(col_name,
data_column, data_type, cvb,
- num_values);
+ return _decode_decimal_column<Decimal128, is_filter>(col_name,
data_column, data_type, cvb,
+ num_values);
case TypeIndex::Decimal128I:
- return _decode_decimal_column<Int128, is_filter>(col_name,
data_column, data_type, cvb,
- num_values);
+ return _decode_decimal_column<Decimal128I, is_filter>(col_name,
data_column, data_type, cvb,
+ num_values);
case TypeIndex::Date:
return _decode_time_column<VecDateTimeValue, Int64,
orc::LongVectorBatch, is_filter>(
col_name, data_column, cvb, num_values);
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h
b/be/src/vec/exec/format/orc/vorc_reader.h
index 0384b0b9793..ca699831d66 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -287,7 +287,7 @@ private:
if (scale_params.scale_type != DecimalScaleParams::NOT_INIT) {
return;
}
- auto* decimal_type =
reinterpret_cast<DataTypeDecimal<Decimal<DecimalPrimitiveType>>*>(
+ auto* decimal_type =
reinterpret_cast<DataTypeDecimal<DecimalPrimitiveType>*>(
const_cast<IDataType*>(remove_nullable(data_type).get()));
auto dest_scale = decimal_type->get_scale();
if (dest_scale > orc_decimal_scale) {
@@ -324,7 +324,7 @@ private:
auto* cvb_data = data->values.data();
auto& column_data =
-
static_cast<ColumnDecimal<Decimal<DecimalPrimitiveType>>&>(*data_column).get_data();
+
static_cast<ColumnDecimal<DecimalPrimitiveType>&>(*data_column).get_data();
auto origin_size = column_data.size();
column_data.resize(origin_size + num_values);
diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h
b/be/src/vec/exec/format/parquet/parquet_column_convert.h
index 9273d7a1aa2..65f5270face 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.h
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h
@@ -108,11 +108,11 @@ struct PhysicalTypeTraits<tparquet::Type::INT96> {
M(TypeIndex::Float32, Float32, Float32) \
M(TypeIndex::Float64, Float64, Float64)
-#define FOR_LOGICAL_DECIMAL_TYPES(M) \
- M(TypeIndex::Decimal32, Decimal32, Int32) \
- M(TypeIndex::Decimal64, Decimal64, Int64) \
- M(TypeIndex::Decimal128, Decimal128, Int128) \
- M(TypeIndex::Decimal128I, Decimal128, Int128)
+#define FOR_LOGICAL_DECIMAL_TYPES(M) \
+ M(TypeIndex::Decimal32, Decimal32, Decimal32) \
+ M(TypeIndex::Decimal64, Decimal64, Decimal64) \
+ M(TypeIndex::Decimal128, Decimal128, Decimal128) \
+ M(TypeIndex::Decimal128I, Decimal128I, Decimal128I)
struct ConvertParams {
// schema.logicalType.TIMESTAMP.isAdjustedToUTC == false
@@ -174,7 +174,7 @@ struct ConvertParams {
return;
}
auto scale = field_schema->parquet_schema.scale;
- auto* decimal_type =
static_cast<DataTypeDecimal<Decimal<DecimalPrimitiveType>>*>(
+ auto* decimal_type =
static_cast<DataTypeDecimal<DecimalPrimitiveType>*>(
const_cast<IDataType*>(remove_nullable(data_type).get()));
auto dest_scale = decimal_type->get_scale();
if (dest_scale > scale) {
@@ -400,9 +400,8 @@ public:
dst_col->resize(_convert_params->start_idx + rows);
DecimalScaleParams& scale_params = _convert_params->decimal_scale;
- auto* data =
static_cast<ColumnDecimal<Decimal<DecimalPhysicalType>>*>(dst_col.get())
- ->get_data()
- .data();
+ auto* data =
+
static_cast<ColumnDecimal<DecimalPhysicalType>*>(dst_col.get())->get_data().data();
for (int i = 0; i < rows; i++) {
ValueCopyType value = src_data[i];
@@ -678,4 +677,4 @@ inline Status get_converter(tparquet::Type::type
parquet_physical_type, Primitiv
}; // namespace ParquetConvert
-}; // namespace doris::vectorized
\ No newline at end of file
+}; // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
index b993370a159..8158fb9e9f1 100644
--- a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
+++ b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
@@ -163,7 +163,7 @@ private:
break;
case TYPE_DECIMALV2:
if constexpr (std::is_same_v<CppType, DecimalV2Value>) {
- size_t max_precision =
max_decimal_precision<Decimal<__int128_t>>();
+ size_t max_precision = max_decimal_precision<Decimal128>();
if (col_schema->parquet_schema.precision < 1 ||
col_schema->parquet_schema.precision > max_precision ||
col_schema->parquet_schema.scale > max_precision) {
@@ -172,18 +172,18 @@ private:
int v2_scale = DecimalV2Value::SCALE;
if (physical_type == tparquet::Type::FIXED_LEN_BYTE_ARRAY) {
min_value = DecimalV2Value(
- _decode_binary_decimal<Int128>(col_schema,
encoded_min, v2_scale));
+ _decode_binary_decimal<Decimal128>(col_schema,
encoded_min, v2_scale));
max_value = DecimalV2Value(
- _decode_binary_decimal<Int128>(col_schema,
encoded_max, v2_scale));
+ _decode_binary_decimal<Decimal128>(col_schema,
encoded_max, v2_scale));
} else if (physical_type == tparquet::Type::INT32) {
- min_value =
DecimalV2Value(_decode_primitive_decimal<Int128, Int32>(
+ min_value =
DecimalV2Value(_decode_primitive_decimal<Decimal128, Int32>(
col_schema, encoded_min, v2_scale));
- max_value =
DecimalV2Value(_decode_primitive_decimal<Int128, Int32>(
+ max_value =
DecimalV2Value(_decode_primitive_decimal<Decimal128, Int32>(
col_schema, encoded_max, v2_scale));
} else if (physical_type == tparquet::Type::INT64) {
- min_value =
DecimalV2Value(_decode_primitive_decimal<Int128, Int64>(
+ min_value =
DecimalV2Value(_decode_primitive_decimal<Decimal128, Int64>(
col_schema, encoded_min, v2_scale));
- max_value =
DecimalV2Value(_decode_primitive_decimal<Int128, Int64>(
+ max_value =
DecimalV2Value(_decode_primitive_decimal<Decimal128, Int64>(
col_schema, encoded_max, v2_scale));
} else {
return false;
@@ -197,9 +197,10 @@ private:
case TYPE_DECIMAL64:
[[fallthrough]];
case TYPE_DECIMAL128I:
- if constexpr (std::is_same_v<CppType, int32_t> ||
std::is_same_v<CppType, int64_t> ||
- std::is_same_v<CppType, __int128_t>) {
- size_t max_precision =
max_decimal_precision<Decimal<CppType>>();
+ if constexpr (std::is_same_v<CppType, Decimal32> ||
+ std::is_same_v<CppType, Decimal64> ||
+ std::is_same_v<CppType, Decimal128I>) {
+ size_t max_precision = max_decimal_precision<CppType>();
if (col_schema->parquet_schema.precision < 1 ||
col_schema->parquet_schema.precision > max_precision ||
col_schema->parquet_schema.scale > max_precision) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]