This is an automated email from the ASF dual-hosted git repository.

zhangzc pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new ac52ba01a [GLUTEN-3582][CH] Support FLBAType and BOOLEAN (#5962)
ac52ba01a is described below

commit ac52ba01a687cc219bf0fe5ff68153101678e2bc
Author: Chang chen <[email protected]>
AuthorDate: Tue Jun 4 09:47:09 2024 +0800

    [GLUTEN-3582][CH] Support FLBAType and BOOLEAN (#5962)
    
    [CH] Support FLBAType and BOOLEAN
---
 .../parquet/GlutenParquetColumnIndexSuite.scala    |  34 ++++++-
 .../Storages/Parquet/ColumnIndexFilter.cpp         |  32 +++---
 .../Storages/Parquet/ParquetConverter.h            | 108 ++++++++++++++++++---
 .../tests/gtest_parquet_columnindex.cpp            | 101 ++++++++++++++++---
 4 files changed, 233 insertions(+), 42 deletions(-)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/spark/sql/gluten/parquet/GlutenParquetColumnIndexSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/spark/sql/gluten/parquet/GlutenParquetColumnIndexSuite.scala
index bc2372852..05ed7ed6b 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/spark/sql/gluten/parquet/GlutenParquetColumnIndexSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/spark/sql/gluten/parquet/GlutenParquetColumnIndexSuite.scala
@@ -25,7 +25,12 @@ import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.gluten.test.GlutenSQLTestUtils
 import org.apache.spark.sql.internal.SQLConf
 
-case class ParquetData(parquetDir: String, filter: String, scanOutput: Long)
+case class ParquetData(
+    column: String,
+    parquetDir: String,
+    filter: String,
+    scanOutput: Long,
+    title: Option[String] = None)
 
 class GlutenParquetColumnIndexSuite
   extends GlutenClickHouseWholeStageTransformerSuite
@@ -39,20 +44,41 @@ class GlutenParquetColumnIndexSuite
   //  both gluten and vanilla spark dataframe
   private val parquetData = Seq(
     ParquetData(
+      "count(*)",
       "index/tpch/20003",
       "`27` <> '1-URGENT' and `9` >= '1995-01-01' and `9` < '1996-01-01' ",
       140000),
     ParquetData(
+      "count(*)",
       "index/tpch/upper_case",
       "c_comment = '! requests wake. (...)ructions. furiousl'",
-      12853)
+      12853),
+    ParquetData(
+      "*",
+      "index/pageindex/query102",
+      "`198` = 'Crafts' or `198` = 'Computers' or `198`= 'a' or `198`= ''",
+      45),
+    ParquetData(
+      "count(*)",
+      "index/pageindex/query102",
+      "`100001` < 30000  and `100001` > 1000.004",
+      45,
+      Some("push down Decimal filter")),
+    ParquetData(
+      "count(*)",
+      "index/pageindex/query102",
+      "`100001` in (30000, 1000.004, 45000, 2323445, 4235423.6, 4546677.245, 
56677.5)",
+      45,
+      Some("push down Decimal filter In")
+    ),
+    ParquetData("count(*)", "index/pageindex/query05", "`142` = true", 9896)
   )
 
   parquetData.foreach {
     data =>
-      test(s"${data.parquetDir}") {
+      test(data.title.getOrElse(data.parquetDir)) {
         val parquetDir = s"$testPath/${data.parquetDir}"
-        val sql1 = s"""|select count(*) from $fileFormat.`$parquetDir`
+        val sql1 = s"""|select ${data.column} from $fileFormat.`$parquetDir`
                        |where ${data.filter}
                        |""".stripMargin
         compareResultsAgainstVanillaSpark(
diff --git a/cpp-ch/local-engine/Storages/Parquet/ColumnIndexFilter.cpp 
b/cpp-ch/local-engine/Storages/Parquet/ColumnIndexFilter.cpp
index 0d3b07e47..817de7f27 100644
--- a/cpp-ch/local-engine/Storages/Parquet/ColumnIndexFilter.cpp
+++ b/cpp-ch/local-engine/Storages/Parquet/ColumnIndexFilter.cpp
@@ -547,7 +547,8 @@ PageIndexs TypedColumnIndexImpl<DType, ORDER>::notEq(const 
DB::Field & value) co
     }
 
     // Merging value filtering with pages containing nulls
-    auto real_value{parquetCast<DType>(value)};
+    ToParquet<DType> to_parquet;
+    auto real_value{to_parquet.as(value, *descr_)};
     TypedComparator<DType> typed_comparator{real_value, *column_index_, 
*comparator_};
     auto pages = ORDER::notEq(typed_comparator);
     const std::set<size_t> matchingIndexes(pages.begin(), pages.end());
@@ -573,7 +574,8 @@ PageIndexs TypedColumnIndexImpl<DType, ORDER>::eq(const 
DB::Field & value) const
             return {PageIndexsBuilder::ALL_PAGES};
         }
     }
-    auto real_value = parquetCast<DType>(value);
+    ToParquet<DType> to_parquet;
+    auto real_value{to_parquet.as(value, *descr_)};
     TypedComparator<DType> typed_comparator{real_value, *column_index_, 
*comparator_};
     return ORDER::eq(typed_comparator);
 }
@@ -581,7 +583,8 @@ PageIndexs TypedColumnIndexImpl<DType, ORDER>::eq(const 
DB::Field & value) const
 template <typename DType, Derived<BoundaryOrder> ORDER>
 PageIndexs TypedColumnIndexImpl<DType, ORDER>::gt(const DB::Field & value) 
const
 {
-    auto real_value{parquetCast<DType>(value)};
+    ToParquet<DType> to_parquet;
+    auto real_value{to_parquet.as(value, *descr_)};
     TypedComparator<DType> typed_comparator{real_value, *column_index_, 
*comparator_};
     return ORDER::gt(typed_comparator);
 }
@@ -589,7 +592,8 @@ PageIndexs TypedColumnIndexImpl<DType, ORDER>::gt(const 
DB::Field & value) const
 template <typename DType, Derived<BoundaryOrder> ORDER>
 PageIndexs TypedColumnIndexImpl<DType, ORDER>::gtEg(const DB::Field & value) 
const
 {
-    auto real_value{parquetCast<DType>(value)};
+    ToParquet<DType> to_parquet;
+    auto real_value{to_parquet.as(value, *descr_)};
     TypedComparator<DType> typed_comparator{real_value, *column_index_, 
*comparator_};
     return ORDER::gtEq(typed_comparator);
 }
@@ -597,7 +601,8 @@ PageIndexs TypedColumnIndexImpl<DType, ORDER>::gtEg(const 
DB::Field & value) con
 template <typename DType, Derived<BoundaryOrder> ORDER>
 PageIndexs TypedColumnIndexImpl<DType, ORDER>::lt(const DB::Field & value) 
const
 {
-    auto real_value{parquetCast<DType>(value)};
+    ToParquet<DType> to_parquet;
+    auto real_value{to_parquet.as(value, *descr_)};
     TypedComparator<DType> typed_comparator{real_value, *column_index_, 
*comparator_};
     return ORDER::lt(typed_comparator);
 }
@@ -605,7 +610,8 @@ PageIndexs TypedColumnIndexImpl<DType, ORDER>::lt(const 
DB::Field & value) const
 template <typename DType, Derived<BoundaryOrder> ORDER>
 PageIndexs TypedColumnIndexImpl<DType, ORDER>::ltEg(const DB::Field & value) 
const
 {
-    auto real_value{parquetCast<DType>(value)};
+    ToParquet<DType> to_parquet;
+    auto real_value{to_parquet.as(value, *descr_)};
     TypedComparator<DType> typed_comparator{real_value, *column_index_, 
*comparator_};
     return ORDER::ltEq(typed_comparator);
 }
@@ -615,7 +621,7 @@ PageIndexs TypedColumnIndexImpl<DType, ORDER>::in(const 
DB::ColumnPtr & column)
 {
     /// TDDO: handle null
     ///
-    std::shared_ptr<ParquetConverter<DType>> converter = 
ParquetConverter<DType>::Make(column);
+    std::shared_ptr<ParquetConverter<DType>> converter = 
ParquetConverter<DType>::Make(column, *descr_);
     const auto * value = converter->getBatch(0, column->size());
     T min, max;
     std::tie(min, max) = comparator_->GetMinMax(value, column->size());
@@ -659,7 +665,8 @@ ColumnIndexPtr internalMakeColumnIndex(
     switch (physical_type)
     {
         case parquet::Type::BOOLEAN:
-            break;
+            return std::make_unique<TypedColumnIndexImpl<parquet::BooleanType, 
ORDER>>(
+                descr, 
dynamic_pointer_cast<parquet::BoolColumnIndex>(column_index), offset_index);
         case parquet::Type::INT32:
             return std::make_unique<TypedColumnIndexImpl<parquet::Int32Type, 
ORDER>>(
                 descr, 
dynamic_pointer_cast<parquet::Int32ColumnIndex>(column_index), offset_index);
@@ -669,20 +676,21 @@ ColumnIndexPtr internalMakeColumnIndex(
         case parquet::Type::INT96:
             break;
         case parquet::Type::FLOAT:
-            break;
+            return std::make_unique<TypedColumnIndexImpl<parquet::FloatType, 
ORDER>>(
+                descr, 
dynamic_pointer_cast<parquet::FloatColumnIndex>(column_index), offset_index);
         case parquet::Type::DOUBLE:
             return std::make_unique<TypedColumnIndexImpl<parquet::DoubleType, 
ORDER>>(
                 descr, 
dynamic_pointer_cast<parquet::DoubleColumnIndex>(column_index), offset_index);
-            break;
         case parquet::Type::BYTE_ARRAY:
             return 
std::make_unique<TypedColumnIndexImpl<parquet::ByteArrayType, ORDER>>(
                 descr, 
dynamic_pointer_cast<parquet::ByteArrayColumnIndex>(column_index), 
offset_index);
         case parquet::Type::FIXED_LEN_BYTE_ARRAY:
-            break;
+            return std::make_unique<TypedColumnIndexImpl<parquet::FLBAType, 
ORDER>>(
+                descr, 
dynamic_pointer_cast<parquet::FLBAColumnIndex>(column_index), offset_index);
         case parquet::Type::UNDEFINED:
             break;
     }
-    throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unsupported physical 
type {}", physical_type);
+    throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unsupported physical 
type {}", TypeToString(physical_type));
 }
 
 ColumnIndexPtr ColumnIndex::create(
diff --git a/cpp-ch/local-engine/Storages/Parquet/ParquetConverter.h 
b/cpp-ch/local-engine/Storages/Parquet/ParquetConverter.h
index ac7b2479a..89e83e668 100644
--- a/cpp-ch/local-engine/Storages/Parquet/ParquetConverter.h
+++ b/cpp-ch/local-engine/Storages/Parquet/ParquetConverter.h
@@ -15,33 +15,72 @@
  * limitations under the License.
  */
 #pragma once
+#include <Columns/ColumnDecimal.h>
 #include <Core/Field.h>
+#include <base/Decimal_fwd.h>
+#include <parquet/schema.h>
 #include <parquet/statistics.h>
 #include <parquet/types.h>
 #include <Common/PODArray.h>
 
+namespace DB::ErrorCodes
+{
+extern const int LOGICAL_ERROR;
+}
+
 namespace local_engine
 {
+
 template <typename PhysicalType>
-auto parquetCast(const DB::Field & value) -> typename PhysicalType::c_type
+struct ToParquet
 {
     using T = typename PhysicalType::c_type;
-    if constexpr (std::is_same_v<PhysicalType, parquet::Int32Type>)
-        return static_cast<T>(value.get<Int64>());
-    else if constexpr (std::is_same_v<PhysicalType, parquet::ByteArrayType>)
+    T as(const DB::Field & value, const parquet::ColumnDescriptor &)
+    {
+        if constexpr (std::is_same_v<PhysicalType, parquet::Int32Type>)
+            return static_cast<T>(value.get<Int64>());
+        // parquet::BooleanType, parquet::Int64Type, parquet::FloatType, 
parquet::DoubleType
+        return value.get<T>(); // FLOAT, DOUBLE, INT64
+    }
+};
+
+template <>
+struct ToParquet<parquet::ByteArrayType>
+{
+    using T = parquet::ByteArray;
+    T as(const DB::Field & value, const parquet::ColumnDescriptor &)
     {
         assert(value.getType() == DB::Field::Types::String);
         const std::string & s = value.get<std::string>();
         const auto * const ptr = reinterpret_cast<const uint8_t *>(s.data());
         return parquet::ByteArray(static_cast<uint32_t>(s.size()), ptr);
     }
-    else if constexpr (std::is_same_v<PhysicalType, parquet::FLBAType>)
+};
+
+template <>
+struct ToParquet<parquet::FLBAType>
+{
+    uint8_t buf[256];
+    using T = parquet::FixedLenByteArray;
+    T as(const DB::Field & value, const parquet::ColumnDescriptor & descriptor)
     {
-        abort();
+        if (value.getType() != DB::Field::Types::Decimal128)
+            throw DB::Exception(
+                DB::ErrorCodes::LOGICAL_ERROR, "Field type '{}' for 
FIXED_LEN_BYTE_ARRAY is not supported", value.getTypeName());
+        static_assert(sizeof(Int128) <= sizeof(buf));
+        if (descriptor.type_length() > sizeof(Int128))
+            throw DB::Exception(
+                DB::ErrorCodes::LOGICAL_ERROR,
+                "descriptor.type_length() = {} , which is > {}, e.g. 
sizeof(Int128)",
+                descriptor.type_length(),
+                sizeof(Int128));
+        Int128 val = value.get<DB::DecimalField<DB::Decimal128>>().getValue();
+        std::reverse(reinterpret_cast<char *>(&val), reinterpret_cast<char 
*>(&val) + sizeof(val));
+        const int offset = sizeof(Int128) - descriptor.type_length();
+        memcpy(buf, reinterpret_cast<char *>(&val) + offset, 
descriptor.type_length());
+        return parquet::FixedLenByteArray(buf);
     }
-    else
-        return value.get<T>(); // FLOAT, DOUBLE, INT64
-}
+};
 
 // Int32 Int64 Float Double
 template <typename DType, typename Col>
@@ -100,6 +139,42 @@ struct ConverterString
     }
 };
 
+/// Like ConverterNumberAsFixedString, but converts to big-endian. Because 
that's the byte order
+/// Parquet uses for decimal types and literally nothing else, for some reason.
+template <typename T>
+struct ConverterDecimal
+{
+    const parquet::ColumnDescriptor & descriptor;
+    const DB::ColumnDecimal<T> & column;
+    DB::PODArray<uint8_t> data_buf;
+    DB::PODArray<parquet::FixedLenByteArray> ptr_buf;
+
+    explicit ConverterDecimal(const DB::ColumnPtr & c, const 
parquet::ColumnDescriptor & desc)
+        : descriptor(desc), column(assert_cast<const DB::ColumnDecimal<T> 
&>(*c))
+    {
+        if (descriptor.type_length() > sizeof(T))
+            throw DB::Exception(
+                DB::ErrorCodes::LOGICAL_ERROR,
+                "descriptor.type_length() = {} , which is > {}, e.g. 
sizeof(T)",
+                descriptor.type_length(),
+                sizeof(T));
+    }
+
+    const parquet::FixedLenByteArray * getBatch(size_t offset, size_t count)
+    {
+        data_buf.resize(count * sizeof(T));
+        ptr_buf.resize(count);
+        memcpy(data_buf.data(), reinterpret_cast<const char 
*>(column.getData().data() + offset), count * sizeof(T));
+        const size_t offset_in_buf = sizeof(Int128) - descriptor.type_length();
+        ;
+        for (size_t i = 0; i < count; ++i)
+        {
+            std::reverse(data_buf.data() + i * sizeof(T), data_buf.data() + (i 
+ 1) * sizeof(T));
+            ptr_buf[i].ptr = data_buf.data() + i * sizeof(T) + offset_in_buf;
+        }
+        return ptr_buf.data();
+    }
+};
 
 class BaseConverter
 {
@@ -115,7 +190,7 @@ protected:
 
 public:
     virtual const T * getBatch(size_t offset, size_t count) = 0;
-    static std::shared_ptr<ParquetConverter<DType>> Make(const DB::ColumnPtr & 
c);
+    static std::shared_ptr<ParquetConverter<DType>> Make(const DB::ColumnPtr & 
c, const parquet::ColumnDescriptor & desc);
 };
 
 template <typename DType, typename CONVERT>
@@ -134,7 +209,7 @@ private:
 
 
 template <typename DType>
-std::shared_ptr<ParquetConverter<DType>> ParquetConverter<DType>::Make(const 
DB::ColumnPtr & c)
+std::shared_ptr<ParquetConverter<DType>> ParquetConverter<DType>::Make(const 
DB::ColumnPtr & c, const parquet::ColumnDescriptor & desc)
 {
     std::shared_ptr<BaseConverter> result;
 
@@ -204,6 +279,17 @@ std::shared_ptr<ParquetConverter<DType>> 
ParquetConverter<DType>::Make(const DB:
                     break;
             }
             break;
+        case parquet::Type::FIXED_LEN_BYTE_ARRAY:
+            switch (c->getDataType())
+            {
+                case TypeIndex::Decimal128:
+                    result = 
std::make_shared<ParquetConverterImpl<parquet::FLBAType, 
ConverterDecimal<Decimal128>>>(
+                        ConverterDecimal<Decimal128>(c, desc));
+                    break;
+                default:
+                    break;
+            }
+            break;
         default:
             break;
     }
diff --git a/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp 
b/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
index ba09b21b2..ea3bd41e4 100644
--- a/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
+++ b/cpp-ch/local-engine/tests/gtest_parquet_columnindex.cpp
@@ -15,12 +15,14 @@
  * limitations under the License.
  */
 
+#include <charconv>
+
+
 #include "config.h"
 #if USE_PARQUET
 #include <ranges>
 #include <string>
 #include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypesNumber.h>
 #include <Interpreters/ActionsVisitor.h>
 #include <Interpreters/ExpressionActions.h>
 #include <Parser/SerializedPlanParser.h>
@@ -59,6 +61,9 @@ class PrimitiveNodeBuilder
     parquet::Repetition::type repetition_ = parquet::Repetition::UNDEFINED;
     parquet::ConvertedType::type converted_type_ = 
parquet::ConvertedType::NONE;
     parquet::Type::type physical_type_ = parquet::Type::UNDEFINED;
+    int length_ = -1;
+    int precision_ = -1;
+    int scale_ = -1;
 
 public:
     PrimitiveNodeBuilder & as(parquet::ConvertedType::type converted_type)
@@ -67,13 +72,25 @@ public:
         return *this;
     }
 
+    PrimitiveNodeBuilder & with_length(int length)
+    {
+        length_ = length;
+        return *this;
+    }
+    PrimitiveNodeBuilder & asDecimal(int precision, int scale)
+    {
+        converted_type_ = parquet::ConvertedType::DECIMAL;
+        precision_ = precision;
+        scale_ = scale;
+        return *this;
+    }
     parquet::schema::NodePtr named(const std::string & name) const
     {
         assert(!name.empty());
         if (physical_type_ == parquet::Type::UNDEFINED)
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Unsupported physical 
type");
         return parquet::schema::PrimitiveNode::Make(
-            name, repetition_, physical_type_, converted_type_, /*length=*/-1, 
/*precision=*/-1, /*scale=*/-1, /*field_id*/ -1);
+            name, repetition_, physical_type_, converted_type_, length_, 
precision_, scale_, /*field_id*/ -1);
     }
     parquet::ColumnDescriptor descriptor(const std::string & name) const { 
return {named(name), /*max_definition_level=*/1, 0}; }
     static PrimitiveNodeBuilder optional(parquet::Type::type physical_type)
@@ -483,13 +500,22 @@ using ParquetValue = std::variant<
     parquet::DoubleType::c_type,
     parquet::ByteArrayType::c_type>;
 
-ParquetValue to(const DB::Field & value, const parquet::ColumnDescriptor & 
desc)
+template <typename PhysicalType>
+void doComapre(
+    const parquet::ColumnDescriptor & descriptor, const DB::Field & value, 
const std::function<void(const ParquetValue &)> & compare)
+{
+    local_engine::ToParquet<PhysicalType> to_parquet;
+    compare({to_parquet.as(value, descriptor)});
+}
+
+void with_actual(const DB::Field & value, const parquet::ColumnDescriptor & 
desc, const std::function<void(const ParquetValue &)> & compare)
 {
     using namespace local_engine;
     switch (desc.physical_type())
     {
         case parquet::Type::BOOLEAN:
-            break;
+            doComapre<parquet::BooleanType>(desc, value, compare);
+            return;
         case parquet::Type::INT32: {
             switch (desc.converted_type())
             {
@@ -500,7 +526,8 @@ ParquetValue to(const DB::Field & value, const 
parquet::ColumnDescriptor & desc)
                 case parquet::ConvertedType::INT_16:
                 case parquet::ConvertedType::INT_32:
                 case parquet::ConvertedType::NONE:
-                    return {parquetCast<parquet::Int32Type>(value)};
+                    doComapre<parquet::Int32Type>(desc, value, compare);
+                    return;
                 default:
                     break;
             }
@@ -512,35 +539,81 @@ ParquetValue to(const DB::Field & value, const 
parquet::ColumnDescriptor & desc)
                 case parquet::ConvertedType::INT_64:
                 case parquet::ConvertedType::UINT_64:
                 case parquet::ConvertedType::NONE:
-                    return {parquetCast<parquet::Int64Type>(value)};
+                    doComapre<parquet::Int64Type>(desc, value, compare);
+                    return;
                 default:
                     break;
             }
             break;
         case parquet::Type::INT96:
+            // doComapre<parquet::Int96Type>(desc, value, compare);
             break;
         case parquet::Type::FLOAT:
-            return {value.get<Float32>()};
+            doComapre<parquet::FloatType>(desc, value, compare);
+            return;
         case parquet::Type::DOUBLE:
-            return {value.get<Float64>()};
-            break;
+            doComapre<parquet::DoubleType>(desc, value, compare);
+            return;
         case parquet::Type::BYTE_ARRAY:
             switch (desc.converted_type())
             {
                 case parquet::ConvertedType::UTF8:
-                    return parquetCast<parquet::ByteArrayType>(value);
+                    doComapre<parquet::ByteArrayType>(desc, value, compare);
+                    return;
                 default:
                     break;
             }
             break;
         case parquet::Type::FIXED_LEN_BYTE_ARRAY:
+            // doComapre<parquet::FLBAType>(desc, value, compare);
             break;
         case parquet::Type::UNDEFINED:
             break;
     }
-    abort();
+    ASSERT_TRUE(false) << "Unsupported physical type: [" << 
TypeToString(desc.physical_type()) << "] with logical type: ["
+                       << desc.logical_type()->ToString() << "] with converted 
type: [" << ConvertedTypeToString(desc.converted_type())
+                       << "]";
 }
 
+// for gtest
+namespace parquet
+{
+void PrintTo(const ByteArray & val, std::ostream * os)
+{
+    *os << '[' << std::hex;
+
+    for (size_t i = 0; i < val.len; ++i)
+    {
+        *os << std::setw(2) << std::setfill('0') << 
static_cast<int>(val.ptr[i]);
+        if (i != val.len - 1)
+            *os << ", ";
+    }
+    *os << ']';
+}
+}
+TEST(ColumnIndex, DecimalField)
+{
+    // we can't define `operator==` for parquet::FLBAType
+    Field value = DecimalField<Decimal128>(Int128(300000000), 4);
+    local_engine::ToParquet<parquet::FLBAType> to_parquet;
+    const parquet::ColumnDescriptor desc
+        = PNB::optional(parquet::Type::FIXED_LEN_BYTE_ARRAY).asDecimal(38, 
4).with_length(13).descriptor("column1");
+    uint8_t expected_a[13]{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x11, 
0xE1, 0xA3, 0x0};
+    const parquet::ByteArray expected{13, expected_a};
+    const parquet::ByteArray actual{13, to_parquet.as(value, desc).ptr};
+    ASSERT_EQ(actual, expected);
+
+
+    /// Eexception test
+    Field unsupport = DecimalField<Decimal256>(Int256(300000000), 4);
+    EXPECT_THROW(to_parquet.as(unsupport, desc), DB::Exception);
+
+    const parquet::ColumnDescriptor error
+        = PNB::optional(parquet::Type::FIXED_LEN_BYTE_ARRAY).asDecimal(38, 
4).with_length(18).descriptor("column1");
+    EXPECT_THROW(to_parquet.as(value, error), DB::Exception);
+}
+
+
 TEST(ColumnIndex, Field)
 {
     std::string s_tmp = "hello world";
@@ -551,7 +624,6 @@ TEST(ColumnIndex, Field)
         parquet::ColumnDescriptor, //desc
         ParquetValue //expected value
         >;
-    using PNB = test_utils::PrimitiveNodeBuilder;
     const std::vector<TESTDATA> datas{
         {"int32_UINT_8",
          static_cast<UInt8>(1),
@@ -579,8 +651,7 @@ TEST(ColumnIndex, Field)
             const auto & value = std::get<1>(data);
             const auto & desc = std::get<2>(data);
             const auto & expected = std::get<3>(data);
-            const auto actual = to(value, desc);
-            ASSERT_EQ(actual, expected) << name;
+            with_actual(value, desc, [&](const ParquetValue & actual) { 
ASSERT_EQ(actual, expected) << name; });
         });
 
     const std::vector<std::pair<String, Field>> primitive_fields{
@@ -612,7 +683,7 @@ struct ReadStatesParam
     ReadStatesParam() = default;
 
     ReadStatesParam(local_engine::RowRanges ranges, 
std::shared_ptr<local_engine::ColumnReadState> states)
-        : row_ranges(std::move(ranges)), read_states(std::move(states)){};
+        : row_ranges(std::move(ranges)), read_states(std::move(states)) {};
 
     local_engine::RowRanges row_ranges;
     std::shared_ptr<local_engine::ColumnReadState> read_states;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to