eldenmoon commented on code in PR #63192:
URL: https://github.com/apache/doris/pull/63192#discussion_r3230283458


##########
be/src/format/parquet/vparquet_column_reader.cpp:
##########
@@ -1001,6 +1091,368 @@ Status StructColumnReader::read_column_data(
     return Status::OK();
 }
 
+Status VariantColumnReader::init(io::FileReaderSPtr file, FieldSchema* field,
+                                 const tparquet::RowGroup& row_group, size_t 
max_buf_size,
+                                 std::unordered_map<int, 
tparquet::OffsetIndex>& col_offsets,
+                                 RuntimeState* state, bool in_collection,
+                                 const std::set<uint64_t>& column_ids,
+                                 const std::set<uint64_t>& filter_column_ids) {
+    _field_schema = field;
+    _variant_struct_field = std::make_unique<FieldSchema>(*field);
+
+    DataTypes child_types;
+    Strings child_names;
+    child_types.reserve(field->children.size());
+    child_names.reserve(field->children.size());
+    for (const auto& child : field->children) {
+        child_types.push_back(make_nullable(child.data_type));
+        child_names.push_back(child.name);
+    }
+    _variant_struct_type = std::make_shared<DataTypeStruct>(child_types, 
child_names);
+    if (field->data_type->is_nullable()) {
+        _variant_struct_type = make_nullable(_variant_struct_type);
+    }
+    _variant_struct_field->data_type = _variant_struct_type;
+
+    RETURN_IF_ERROR(ParquetColumnReader::create(file, 
_variant_struct_field.get(), row_group,
+                                                _row_ranges, _ctz, _io_ctx, 
_struct_reader,
+                                                max_buf_size, col_offsets, 
state, in_collection,
+                                                column_ids, 
filter_column_ids));
+    _struct_reader->set_column_in_nested();
+    return Status::OK();
+}
+
+Status VariantColumnReader::_get_binary_field(const Field& field, std::string* 
value,
+                                              bool* present) const {
+    if (field.is_null()) {
+        *present = false;
+        return Status::OK();
+    }
+    *present = true;
+    switch (field.get_type()) {
+    case TYPE_STRING:
+        *value = field.get<TYPE_STRING>();
+        return Status::OK();
+    case TYPE_CHAR:
+        *value = field.get<TYPE_CHAR>();
+        return Status::OK();
+    case TYPE_VARCHAR:
+        *value = field.get<TYPE_VARCHAR>();
+        return Status::OK();
+    case TYPE_VARBINARY: {
+        auto ref = field.get<TYPE_VARBINARY>().to_string_ref();
+        value->assign(ref.data, ref.size);
+        return Status::OK();
+    }
+    default:
+        return Status::Corruption("Parquet VARIANT binary field has unexpected 
Doris type {}",
+                                  field.get_type_name());
+    }
+}
+
+Status VariantColumnReader::_field_to_json(const FieldSchema& field_schema, 
const Field& field,
+                                           std::string* json, bool* present) 
const {
+    if (field.is_null()) {
+        *present = false;
+        return Status::OK();
+    }
+    *present = true;
+    const DataTypePtr& type = remove_nullable(field_schema.data_type);
+    switch (type->get_primitive_type()) {
+    case TYPE_BOOLEAN:
+    case TYPE_TINYINT:
+    case TYPE_SMALLINT:
+    case TYPE_INT:
+    case TYPE_BIGINT:
+    case TYPE_LARGEINT:
+    case TYPE_FLOAT:
+    case TYPE_DOUBLE:
+    case TYPE_DECIMALV2:
+    case TYPE_DECIMAL32:
+    case TYPE_DECIMAL64:
+    case TYPE_DECIMAL128I:
+    case TYPE_DECIMAL256:
+        json->append(field.to_debug_string(type->get_scale()));
+        return Status::OK();

Review Comment:
   Handled in the latest head. The shredded typed-value path now normalizes 
TYPE_FLOAT/TYPE_DOUBLE with std::isfinite() and emits JSON null for non-finite 
values, matching the unshredded decoder. The typed-only regression asserts 
cast(v[f] as string) is null for the non-finite case.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to