github-actions[bot] commented on code in PR #63192:
URL: https://github.com/apache/doris/pull/63192#discussion_r3253487495


##########
be/src/format/parquet/vparquet_column_reader.cpp:
##########
@@ -103,6 +127,1608 @@ static void fill_array_offset(FieldSchema* field, 
ColumnArray::Offsets64& offset
     }
 }
 
+static constexpr int64_t UNIX_EPOCH_DAYNR = 719528;
+static constexpr int64_t MICROS_PER_SECOND = 1000000;
+
+static int64_t variant_date_value(const VecDateTimeValue& value) {
+    return value.daynr() - UNIX_EPOCH_DAYNR;
+}
+
+static int64_t variant_date_value(const DateV2Value<DateV2ValueType>& value) {
+    return value.daynr() - UNIX_EPOCH_DAYNR;
+}
+
+static int64_t variant_datetime_value(const VecDateTimeValue& value) {
+    int64_t timestamp = 0;
+    value.unix_timestamp(&timestamp, cctz::utc_time_zone());
+    return timestamp * MICROS_PER_SECOND;
+}
+
+static int64_t variant_datetime_value(const DateV2Value<DateTimeV2ValueType>& 
value) {
+    int64_t timestamp = 0;
+    value.unix_timestamp(&timestamp, cctz::utc_time_zone());
+    return timestamp * MICROS_PER_SECOND + value.microsecond();
+}
+
+static int64_t variant_datetime_value(const TimestampTzValue& value) {
+    int64_t timestamp = 0;
+    value.unix_timestamp(&timestamp, cctz::utc_time_zone());
+    return timestamp * MICROS_PER_SECOND + value.microsecond();
+}
+
+static int find_child_idx(const FieldSchema& field, std::string_view name) {
+    for (int i = 0; i < field.children.size(); ++i) {
+        if (field.children[i].lower_case_name == name) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+static bool is_variant_wrapper_typed_value_child(const FieldSchema& field) {
+    auto type = remove_nullable(field.data_type);
+    return type->get_primitive_type() == TYPE_STRUCT || 
type->get_primitive_type() == TYPE_ARRAY;
+}
+
+static bool is_variant_wrapper_field(const FieldSchema& field,
+                                     bool 
allow_scalar_typed_value_only_wrapper) {
+    auto type = remove_nullable(field.data_type);
+    if (type->get_primitive_type() != TYPE_STRUCT && 
type->get_primitive_type() != TYPE_VARIANT) {
+        return false;
+    }
+
+    bool has_metadata = false;
+    bool has_value = false;
+    const FieldSchema* typed_value = nullptr;
+    for (const auto& child : field.children) {
+        if (child.lower_case_name == "metadata") {
+            if (child.physical_type != tparquet::Type::BYTE_ARRAY) {
+                return false;
+            }
+            has_metadata = true;
+            continue;
+        }
+        if (child.lower_case_name == "value") {
+            if (child.physical_type != tparquet::Type::BYTE_ARRAY) {
+                return false;
+            }
+            has_value = true;
+            continue;
+        }
+        if (child.lower_case_name == "typed_value") {
+            typed_value = &child;
+            continue;
+        }
+        return false;
+    }
+    if (has_metadata && has_value) {
+        return type->get_primitive_type() == TYPE_VARIANT || typed_value != 
nullptr;
+    }
+    if (has_value) {
+        return typed_value != nullptr;
+    }
+    return typed_value != nullptr && (allow_scalar_typed_value_only_wrapper ||
+                                      
is_variant_wrapper_typed_value_child(*typed_value));
+}
+
+static bool is_value_only_variant_wrapper_candidate(const FieldSchema& field) {

Review Comment:
   This nullable guard also filters out valid value-only shredded residual 
wrappers. A shredded field under `typed_value` is typically optional to 
represent the field being absent; for a layout like `v.typed_value.metric { 
optional binary value }`, `metric.data_type` is `Nullable(Struct<value>)`. 
Row-wise reconstruction then skips the residual-wrapper path here and falls 
through to `typed_value_to_variant_map()`, which treats the bytes as an 
ordinary `metric.value` typed binary child instead of decoding the residual 
VARIANT payload with the inherited metadata. The new 
`RowWiseReadsValueOnlyNestedResidualField` test does not catch this because it 
builds `metric_field.data_type` as a non-nullable `DataTypeStruct`, while the 
pruning helper tests build the same residual field as nullable. Please 
distinguish user `value` structs from real shredded residual wrappers without 
rejecting optional wrapper groups, and add coverage with a nullable value-only 
residual field.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to