This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d991bcf60ae [fix](ParquetReader) Fix Parquet Reader to read `int96` 
parquet type problem (#32394)
d991bcf60ae is described below

commit d991bcf60aeac694dfc18f3f6a18262cb17612b0
Author: Tiewei Fang <[email protected]>
AuthorDate: Tue Mar 19 17:26:50 2024 +0800

    [fix](ParquetReader) Fix Parquet Reader to read `int96` parquet type 
problem (#32394)
    
    `hi - JULIAN_EPOCH_OFFSET_DAYS` could be negative, so we can't all use 
unsigned int.
---
 be/src/vec/exec/format/parquet/parquet_column_convert.h | 13 +++++++------
 be/src/vec/exec/format/parquet/parquet_common.cpp       |  6 +++---
 be/src/vec/exec/format/parquet/parquet_common.h         | 12 ++++++------
 3 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h 
b/be/src/vec/exec/format/parquet/parquet_column_convert.h
index 39ee29f663f..1d6cfd63515 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.h
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h
@@ -318,12 +318,13 @@ public:
         auto& data = 
static_cast<ColumnVector<UInt64>*>(dst_col.get())->get_data();
 
         for (int i = 0; i < rows; i++) {
-            ParquetInt96 x = ParquetInt96_data[i];
-            auto& num = data[start_idx + i];
-            auto& value = 
reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(num);
-            int64_t micros = x.to_timestamp_micros();
-            value.from_unixtime(micros / 1000000, *_convert_params->ctz);
-            value.set_microsecond(micros % 1000000);
+            ParquetInt96 src_cell_data = ParquetInt96_data[i];
+            auto& dst_value =
+                    
reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(data[start_idx + i]);
+
+            int64_t timestamp_with_micros = 
src_cell_data.to_timestamp_micros();
+            dst_value.from_unixtime(timestamp_with_micros / 1000000, 
*_convert_params->ctz);
+            dst_value.set_microsecond(timestamp_with_micros % 1000000);
         }
         return Status::OK();
     }
diff --git a/be/src/vec/exec/format/parquet/parquet_common.cpp 
b/be/src/vec/exec/format/parquet/parquet_common.cpp
index cbef2a0f286..33e9f11242b 100644
--- a/be/src/vec/exec/format/parquet/parquet_common.cpp
+++ b/be/src/vec/exec/format/parquet/parquet_common.cpp
@@ -24,9 +24,9 @@
 
 namespace doris::vectorized {
 
-const uint32_t ParquetInt96::JULIAN_EPOCH_OFFSET_DAYS = 2440588;
-const uint64_t ParquetInt96::MICROS_IN_DAY = 86400000000;
-const uint64_t ParquetInt96::NANOS_PER_MICROSECOND = 1000;
+const int32_t ParquetInt96::JULIAN_EPOCH_OFFSET_DAYS = 2440588;
+const int64_t ParquetInt96::MICROS_IN_DAY = 86400000000;
+const int64_t ParquetInt96::NANOS_PER_MICROSECOND = 1000;
 
 ColumnSelectVector::ColumnSelectVector(const uint8_t* filter_map, size_t 
filter_map_size,
                                        bool filter_all) {
diff --git a/be/src/vec/exec/format/parquet/parquet_common.h 
b/be/src/vec/exec/format/parquet/parquet_common.h
index 6667ab2c101..2cf745882ee 100644
--- a/be/src/vec/exec/format/parquet/parquet_common.h
+++ b/be/src/vec/exec/format/parquet/parquet_common.h
@@ -48,10 +48,10 @@ struct RowRange {
 
 #pragma pack(1)
 struct ParquetInt96 {
-    uint64_t lo; // time of nanoseconds in a day
-    uint32_t hi; // days from julian epoch
+    int64_t lo; // time of nanoseconds in a day
+    int32_t hi; // days from julian epoch
 
-    inline uint64_t to_timestamp_micros() const {
+    inline int64_t to_timestamp_micros() const {
         return (hi - JULIAN_EPOCH_OFFSET_DAYS) * MICROS_IN_DAY + lo / 
NANOS_PER_MICROSECOND;
     }
     inline __int128 to_int128() const {
@@ -60,9 +60,9 @@ struct ParquetInt96 {
         return ans;
     }
 
-    static const uint32_t JULIAN_EPOCH_OFFSET_DAYS;
-    static const uint64_t MICROS_IN_DAY;
-    static const uint64_t NANOS_PER_MICROSECOND;
+    static const int32_t JULIAN_EPOCH_OFFSET_DAYS;
+    static const int64_t MICROS_IN_DAY;
+    static const int64_t NANOS_PER_MICROSECOND;
 };
 #pragma pack()
 static_assert(sizeof(ParquetInt96) == 12, "The size of ParquetInt96 is not 
12.");


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to