This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new d7ad299154 [fix](NestedType) throw error when reading complex nested
type in orc&parquet (#19489)
d7ad299154 is described below
commit d7ad299154f25aa0ba50c1de696fcbc11f666c4b
Author: Ashin Gau <[email protected]>
AuthorDate: Thu May 11 07:51:02 2023 +0800
[fix](NestedType) throw error when reading complex nested type in
orc&parquet (#19489)
Doris block does not support complex nested type now, but orc and parquet
reader has generated complex nested column,
which makes the output of mysql client wrong and users confused.
---
be/src/vec/exec/format/orc/vorc_reader.cpp | 20 ++++++++++++++++++++
.../exec/format/parquet/vparquet_column_reader.cpp | 17 +++++++++++++++++
2 files changed, 37 insertions(+)
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 0f13b4d191..6a88360b49 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -1016,6 +1016,11 @@ Status OrcReader::_orc_column_to_doris_column(const
std::string& col_name,
reinterpret_cast<const
DataTypeArray*>(remove_nullable(data_type).get())
->get_nested_type());
const orc::Type* nested_orc_type = orc_column_type->getSubtype(0);
+ if (nested_orc_type->getKind() == orc::TypeKind::MAP ||
+ nested_orc_type->getKind() == orc::TypeKind::STRUCT) {
+ return Status::InternalError(
+ "Array does not support nested map/struct type in column
{}", col_name);
+ }
return _orc_column_to_doris_column<is_filter>(
col_name,
static_cast<ColumnArray&>(*data_column).get_data_ptr(), nested_type,
nested_orc_type, orc_list->elements.get(), element_size);
@@ -1037,6 +1042,15 @@ Status OrcReader::_orc_column_to_doris_column(const
std::string& col_name,
->get_value_type());
const orc::Type* orc_key_type = orc_column_type->getSubtype(0);
const orc::Type* orc_value_type = orc_column_type->getSubtype(1);
+ if (orc_key_type->getKind() == orc::TypeKind::LIST ||
+ orc_key_type->getKind() == orc::TypeKind::MAP ||
+ orc_key_type->getKind() == orc::TypeKind::STRUCT ||
+ orc_value_type->getKind() == orc::TypeKind::LIST ||
+ orc_value_type->getKind() == orc::TypeKind::MAP ||
+ orc_value_type->getKind() == orc::TypeKind::STRUCT) {
+ return Status::InternalError("Map does not support nested complex
type in column {}",
+ col_name);
+ }
const ColumnPtr& doris_key_column = doris_map.get_keys_ptr();
const ColumnPtr& doris_value_column = doris_map.get_values_ptr();
RETURN_IF_ERROR(_orc_column_to_doris_column<is_filter>(col_name,
doris_key_column,
@@ -1060,6 +1074,12 @@ Status OrcReader::_orc_column_to_doris_column(const
std::string& col_name,
for (int i = 0; i < doris_struct.tuple_size(); ++i) {
orc::ColumnVectorBatch* orc_field = orc_struct->fields[i];
const orc::Type* orc_type = orc_column_type->getSubtype(i);
+ if (orc_type->getKind() == orc::TypeKind::LIST ||
+ orc_type->getKind() == orc::TypeKind::MAP ||
+ orc_type->getKind() == orc::TypeKind::STRUCT) {
+ return Status::InternalError(
+ "Struct does not support nested complex type in column
{}", col_name);
+ }
const ColumnPtr& doris_field = doris_struct.get_column_ptr(i);
const DataTypePtr& doris_type = doris_struct_type->get_element(i);
RETURN_IF_ERROR(_orc_column_to_doris_column<is_filter>(
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
index 152012da4b..af1266cbbd 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
@@ -123,6 +123,11 @@ Status ParquetColumnReader::create(io::FileReaderSPtr
file, FieldSchema* field,
size_t max_buf_size) {
if (field->type.type == TYPE_ARRAY) {
std::unique_ptr<ParquetColumnReader> element_reader;
+ if (field->children[0].type.type == TYPE_MAP ||
+ field->children[0].type.type == TYPE_STRUCT) {
+ return Status::InternalError(
+ "Array does not support nested map/struct type in column
{}", field->name);
+ }
RETURN_IF_ERROR(create(file, &field->children[0], row_group,
row_ranges, ctz, io_ctx,
element_reader, max_buf_size));
element_reader->set_nested_column();
@@ -130,6 +135,13 @@ Status ParquetColumnReader::create(io::FileReaderSPtr
file, FieldSchema* field,
RETURN_IF_ERROR(array_reader->init(std::move(element_reader), field));
reader.reset(array_reader);
} else if (field->type.type == TYPE_MAP) {
+ auto key_type = field->children[0].children[0].type.type;
+ auto value_type = field->children[0].children[1].type.type;
+ if (key_type == TYPE_ARRAY || key_type == TYPE_MAP || key_type ==
TYPE_STRUCT ||
+ value_type == TYPE_ARRAY || value_type == TYPE_MAP || value_type
== TYPE_STRUCT) {
+ return Status::InternalError("Map does not support nested complex
type in column {}",
+ field->name);
+ }
std::unique_ptr<ParquetColumnReader> key_reader;
std::unique_ptr<ParquetColumnReader> value_reader;
RETURN_IF_ERROR(create(file, &field->children[0].children[0],
row_group, row_ranges, ctz,
@@ -144,6 +156,11 @@ Status ParquetColumnReader::create(io::FileReaderSPtr
file, FieldSchema* field,
} else if (field->type.type == TYPE_STRUCT) {
std::vector<std::unique_ptr<ParquetColumnReader>> child_readers;
for (int i = 0; i < field->children.size(); ++i) {
+ auto child_type = field->children[i].type.type;
+ if (child_type == TYPE_ARRAY || child_type == TYPE_MAP ||
child_type == TYPE_STRUCT) {
+ return Status::InternalError(
+ "Struct does not support nested complex type in column
{}", field->name);
+ }
std::unique_ptr<ParquetColumnReader> child_reader;
RETURN_IF_ERROR(create(file, &field->children[i], row_group,
row_ranges, ctz, io_ctx,
child_reader, max_buf_size));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]