This is an automated email from the ASF dual-hosted git repository.
cambyzju pushed a commit to branch struct-type
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/struct-type by this push:
new 2578bad186 [WIP](struct-type) support struct-type in vectorize engine
(#15665)
2578bad186 is described below
commit 2578bad186bc234be6922a6ef9f2175d8819b14a
Author: xy720 <[email protected]>
AuthorDate: Thu Jan 12 14:40:08 2023 +0800
[WIP](struct-type) support struct-type in vectorize engine (#15665)
---
be/src/olap/aggregate_func.cpp | 2 +
be/src/olap/aggregate_func.h | 15 ++
be/src/olap/field.h | 57 ++++-
be/src/olap/olap_define.h | 3 +
be/src/olap/rowset/segment_v2/column_reader.cpp | 112 ++++++++++
be/src/olap/rowset/segment_v2/column_reader.h | 38 ++++
be/src/olap/rowset/segment_v2/column_writer.cpp | 174 +++++++++++++++
be/src/olap/rowset/segment_v2/column_writer.h | 51 +++++
be/src/olap/rowset/segment_v2/segment_writer.cpp | 9 +
be/src/olap/tablet_meta.cpp | 8 +-
be/src/olap/tablet_schema.cpp | 20 +-
be/src/olap/types.cpp | 52 ++++-
be/src/olap/types.h | 245 ++++++++++++++++++++-
be/src/runtime/CMakeLists.txt | 1 +
be/src/runtime/primitive_type.cpp | 15 ++
be/src/runtime/primitive_type.h | 1 +
be/src/runtime/struct_value.cpp | 27 +++
be/src/runtime/struct_value.h | 67 ++++++
be/src/runtime/types.cpp | 48 +++-
be/src/runtime/types.h | 2 +-
be/src/udf/udf.h | 1 +
be/src/vec/CMakeLists.txt | 1 +
be/src/vec/columns/column_struct.cpp | 34 +--
be/src/vec/columns/column_struct.h | 15 +-
be/src/vec/data_types/data_type_factory.cpp | 41 +++-
be/src/vec/data_types/data_type_factory.hpp | 4 +
be/src/vec/data_types/data_type_struct.cpp | 33 ++-
be/src/vec/data_types/data_type_struct.h | 12 +-
be/src/vec/exprs/vcast_expr.cpp | 1 +
be/src/vec/exprs/vexpr.cpp | 5 +
be/src/vec/exprs/vstruct_literal.cpp | 37 ++++
be/src/vec/exprs/vstruct_literal.h | 34 +++
be/src/vec/functions/function_cast.h | 1 +
be/src/vec/olap/olap_data_convertor.cpp | 62 ++++++
be/src/vec/olap/olap_data_convertor.h | 21 ++
be/src/vec/sink/vmysql_result_writer.cpp | 100 +++++++--
be/src/vec/sink/vmysql_result_writer.h | 2 +-
be/src/vec/sink/vtablet_sink.cpp | 3 +-
fe/fe-core/src/main/cup/sql_parser.cup | 33 ++-
.../java/org/apache/doris/analysis/ColumnDef.java | 4 +
.../org/apache/doris/analysis/CreateTableStmt.java | 12 +-
.../main/java/org/apache/doris/analysis/Expr.java | 7 +-
.../org/apache/doris/analysis/StructLiteral.java | 150 +++++++++++++
.../java/org/apache/doris/analysis/Subquery.java | 2 +-
.../java/org/apache/doris/analysis/TypeDef.java | 47 ++--
.../java/org/apache/doris/catalog/ArrayType.java | 10 +
.../main/java/org/apache/doris/catalog/Column.java | 66 +++---
.../java/org/apache/doris/catalog/MapType.java | 5 +
.../org/apache/doris/catalog/PrimitiveType.java | 5 +-
.../java/org/apache/doris/catalog/StructField.java | 55 ++++-
.../java/org/apache/doris/catalog/StructType.java | 94 +++++++-
.../main/java/org/apache/doris/catalog/Type.java | 24 +-
fe/fe-core/src/main/jflex/sql_scanner.flex | 4 +
gensrc/proto/types.proto | 1 +
gensrc/thrift/Exprs.thrift | 1 +
gensrc/thrift/Types.thrift | 1 +
56 files changed, 1702 insertions(+), 173 deletions(-)
diff --git a/be/src/olap/aggregate_func.cpp b/be/src/olap/aggregate_func.cpp
index 027105952c..7236488f78 100644
--- a/be/src/olap/aggregate_func.cpp
+++ b/be/src/olap/aggregate_func.cpp
@@ -147,6 +147,8 @@ AggregateFuncResolver::AggregateFuncResolver() {
OLAP_FIELD_TYPE_DECIMAL128I>();
add_aggregate_mapping<OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_ARRAY,
OLAP_FIELD_TYPE_ARRAY>();
+ // struct types
+ add_aggregate_mapping<OLAP_FIELD_AGGREGATION_NONE,
OLAP_FIELD_TYPE_STRUCT>();
// Min Aggregate Function
add_aggregate_mapping<OLAP_FIELD_AGGREGATION_MIN,
OLAP_FIELD_TYPE_TINYINT>();
diff --git a/be/src/olap/aggregate_func.h b/be/src/olap/aggregate_func.h
index 5a397b658f..b72be9a7cd 100644
--- a/be/src/olap/aggregate_func.h
+++ b/be/src/olap/aggregate_func.h
@@ -107,6 +107,21 @@ struct BaseAggregateFuncs {
static void finalize(RowCursorCell* src, MemPool* mem_pool) {}
};
+// Define an empty trait for struct type because
+// we now only support struct type in dup key table.
+template <FieldType sub_type>
+struct BaseAggregateFuncs<OLAP_FIELD_TYPE_STRUCT, sub_type> {
+ // Default init do nothing, use direct_copy in struct field instead.
+ static void init(RowCursorCell* dst, const char* src, bool src_null,
MemPool* mem_pool,
+ ObjectPool* agg_pool) {}
+
+ // Default update do nothing.
+ static void update(RowCursorCell* dst, const RowCursorCell& src, MemPool*
mem_pool) {}
+
+ // Default finalize do nothing.
+ static void finalize(RowCursorCell* src, MemPool* mem_pool) {}
+};
+
template <FieldType sub_type>
struct BaseAggregateFuncs<OLAP_FIELD_TYPE_ARRAY, sub_type> {
static void init(RowCursorCell* dst, const char* src, bool src_null,
MemPool* mem_pool,
diff --git a/be/src/olap/field.h b/be/src/olap/field.h
index 391f7f45be..97a516f32c 100644
--- a/be/src/olap/field.h
+++ b/be/src/olap/field.h
@@ -49,7 +49,9 @@ public:
_index_size(column.index_length()),
_is_nullable(column.is_nullable()),
_unique_id(column.unique_id()) {
- if (column.type() == OLAP_FIELD_TYPE_ARRAY) {
+ if (column.type() == OLAP_FIELD_TYPE_STRUCT) {
+ _agg_info = get_aggregate_info(column.aggregation(),
column.type());
+ } else if (column.type() == OLAP_FIELD_TYPE_ARRAY) {
_agg_info = get_aggregate_info(column.aggregation(), column.type(),
column.get_sub_column(0).type());
} else {
@@ -311,6 +313,9 @@ protected:
const AggregateInfo* _agg_info;
// unit : byte
// except for strings, other types have fixed lengths
+ // Note that, the struct type itself has fixed length, but due to
+ // its number of subfields is a variable, so the actual length of
+ // a struct field is not fixed.
uint32_t _length;
// Since the length of the STRING type cannot be determined,
// only dynamic memory can be used. Mempool cannot realize realloc.
@@ -450,6 +455,38 @@ uint32_t Field::hash_code(const CellType& cell, uint32_t
seed) const {
return _type_info->hash_code(cell.cell_ptr(), seed);
}
+class StructField : public Field {
+public:
+ explicit StructField(const TabletColumn& column) : Field(column) {}
+
+ void consume(RowCursorCell* dst, const char* src, bool src_null, MemPool*
mem_pool,
+ ObjectPool* agg_pool) const override {
+ dst->set_is_null(src_null);
+ if (src_null) {
+ return;
+ }
+ _type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool);
+ }
+
+ char* allocate_memory(char* cell_ptr, char* variable_ptr) const override {
+ auto struct_v = (StructValue*)cell_ptr;
+ struct_v->set_values(reinterpret_cast<void**>(variable_ptr));
+ variable_ptr += _length;
+ for (size_t i = 0; i < get_sub_field_count(); i++) {
+ variable_ptr += get_sub_field(i)->get_variable_len();
+ }
+ return variable_ptr;
+ }
+
+ size_t get_variable_len() const override {
+ size_t variable_len = _length;
+ for (size_t i = 0; i < get_sub_field_count(); i++) {
+ variable_len += get_sub_field(i)->get_variable_len();
+ }
+ return variable_len;
+ }
+};
+
class ArrayField : public Field {
public:
explicit ArrayField(const TabletColumn& column) : Field(column) {}
@@ -746,6 +783,15 @@ public:
return new VarcharField(column);
case OLAP_FIELD_TYPE_STRING:
return new StringField(column);
+ case OLAP_FIELD_TYPE_STRUCT: {
+ auto* local = new StructField(column);
+ for (uint32_t i = 0; i < column.get_subtype_count(); i++) {
+ std::unique_ptr<Field> sub_field(
+ FieldFactory::create(column.get_sub_column(i)));
+ local->add_sub_field(std::move(sub_field));
+ }
+ return local;
+ }
case OLAP_FIELD_TYPE_ARRAY: {
std::unique_ptr<Field>
item_field(FieldFactory::create(column.get_sub_column(0)));
auto* local = new ArrayField(column);
@@ -786,6 +832,15 @@ public:
return new VarcharField(column);
case OLAP_FIELD_TYPE_STRING:
return new StringField(column);
+ case OLAP_FIELD_TYPE_STRUCT: {
+ auto* local = new StructField(column);
+ for (uint32_t i = 0; i < column.get_subtype_count(); i++) {
+ std::unique_ptr<Field> sub_field(
+ FieldFactory::create(column.get_sub_column(i)));
+ local->add_sub_field(std::move(sub_field));
+ }
+ return local;
+ }
case OLAP_FIELD_TYPE_ARRAY: {
std::unique_ptr<Field>
item_field(FieldFactory::create(column.get_sub_column(0)));
auto* local = new ArrayField(column);
diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h
index 40a6dd9dee..93889d2d3b 100644
--- a/be/src/olap/olap_define.h
+++ b/be/src/olap/olap_define.h
@@ -60,6 +60,9 @@ static const uint32_t OLAP_STRING_MAX_LENGTH = 2147483647;
// the max length supported for jsonb type 2G
static const uint32_t OLAP_JSONB_MAX_LENGTH = 2147483647;
+// the max length supported for struct, but excluding the length of its
subtypes.
+static const uint16_t OLAP_STRUCT_MAX_LENGTH = 65535;
+
// the max length supported for array
static const uint16_t OLAP_ARRAY_MAX_LENGTH = 65535;
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 4ce70b1b0d..cab7aae2c5 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -31,6 +31,7 @@
#include "util/rle_encoding.h" // for RleDecoder
#include "vec/columns/column.h"
#include "vec/columns/column_array.h"
+#include "vec/columns/column_struct.h"
#include "vec/core/types.h"
#include "vec/runtime/vdatetime_value.h" //for VecDateTime
@@ -49,6 +50,23 @@ Status ColumnReader::create(const ColumnReaderOptions& opts,
const ColumnMetaPB&
} else {
auto type = (FieldType)meta.type();
switch (type) {
+ case FieldType::OLAP_FIELD_TYPE_STRUCT: {
+ // not support empty struct
+ DCHECK(meta.children_columns_size() >= 1);
+ // create struct column reader
+ std::unique_ptr<ColumnReader> struct_reader(
+ new ColumnReader(opts, meta, num_rows, file_reader));
+ struct_reader->_sub_readers.reserve(meta.children_columns_size());
+ for (size_t i = 0; i < meta.children_columns_size(); i++) {
+ std::unique_ptr<ColumnReader> sub_reader;
+ RETURN_IF_ERROR(ColumnReader::create(opts,
meta.children_columns(i),
+
meta.children_columns(i).num_rows(),
+ file_reader,
&sub_reader));
+ struct_reader->_sub_readers.push_back(std::move(sub_reader));
+ }
+ *reader = std::move(struct_reader);
+ return Status::OK();
+ }
case FieldType::OLAP_FIELD_TYPE_ARRAY: {
DCHECK(meta.children_columns_size() == 2 ||
meta.children_columns_size() == 3);
@@ -398,6 +416,24 @@ Status ColumnReader::new_iterator(ColumnIterator**
iterator) {
} else {
auto type = (FieldType)_meta.type();
switch (type) {
+ case FieldType::OLAP_FIELD_TYPE_STRUCT: {
+ std::vector<ColumnIterator*> sub_column_iterators;
+ size_t child_size = is_nullable() ? _sub_readers.size() - 1 :
_sub_readers.size();
+ sub_column_iterators.reserve(child_size);
+
+ ColumnIterator* sub_column_iterator;
+ for (size_t i = 0; i < child_size; i++) {
+
RETURN_IF_ERROR(_sub_readers[i]->new_iterator(&sub_column_iterator));
+ sub_column_iterators.push_back(sub_column_iterator);
+ }
+
+ ColumnIterator* null_iterator = nullptr;
+ if (is_nullable()) {
+
RETURN_IF_ERROR(_sub_readers[child_size]->new_iterator(&null_iterator));
+ }
+ *iterator = new StructFileColumnIterator(this, null_iterator,
sub_column_iterators);
+ return Status::OK();
+ }
case FieldType::OLAP_FIELD_TYPE_ARRAY: {
ColumnIterator* item_iterator = nullptr;
RETURN_IF_ERROR(_sub_readers[0]->new_iterator(&item_iterator));
@@ -423,6 +459,82 @@ Status ColumnReader::new_iterator(ColumnIterator**
iterator) {
////////////////////////////////////////////////////////////////////////////////
+StructFileColumnIterator::StructFileColumnIterator(
+ ColumnReader* reader, ColumnIterator* null_iterator,
+ std::vector<ColumnIterator*>& sub_column_iterators)
+ : _struct_reader(reader) {
+ _sub_column_iterators.resize(sub_column_iterators.size());
+ for (size_t i = 0; i < sub_column_iterators.size(); i++) {
+ _sub_column_iterators[i].reset(sub_column_iterators[i]);
+ }
+ if (_struct_reader->is_nullable()) {
+ _null_iterator.reset(null_iterator);
+ }
+}
+
+Status StructFileColumnIterator::init(const ColumnIteratorOptions& opts) {
+ for (auto& column_iterator : _sub_column_iterators) {
+ RETURN_IF_ERROR(column_iterator->init(opts));
+ }
+ if (_struct_reader->is_nullable()) {
+ RETURN_IF_ERROR(_null_iterator->init(opts));
+ }
+ return Status::OK();
+}
+
+Status StructFileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst,
bool* has_null) {
+ return Status::NotSupported("not supported");
+}
+
+Status StructFileColumnIterator::next_batch(size_t* n,
vectorized::MutableColumnPtr& dst,
+ bool* has_null) {
+ const auto* column_struct =
vectorized::check_and_get_column<vectorized::ColumnStruct>(
+ dst->is_nullable() ?
static_cast<vectorized::ColumnNullable&>(*dst).get_nested_column()
+ : *dst);
+ for (size_t i = 0; i < column_struct->tuple_size(); i++) {
+ size_t num_read = *n;
+ auto sub_column_ptr = column_struct->get_column(i).assume_mutable();
+ bool column_has_null = false;
+ RETURN_IF_ERROR(
+ _sub_column_iterators[i]->next_batch(&num_read,
sub_column_ptr, &column_has_null));
+ DCHECK(num_read == *n);
+ }
+
+ if (dst->is_nullable()) {
+ size_t num_read = *n;
+ auto null_map_ptr =
+
static_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column_ptr();
+ bool null_signs_has_null = false;
+ RETURN_IF_ERROR(_null_iterator->next_batch(&num_read, null_map_ptr,
&null_signs_has_null));
+ DCHECK(num_read == *n);
+ }
+
+ return Status::OK();
+}
+
+Status StructFileColumnIterator::seek_to_ordinal(ordinal_t ord) {
+ for (auto& column_iterator : _sub_column_iterators) {
+ RETURN_IF_ERROR(column_iterator->seek_to_ordinal(ord));
+ }
+ if (_struct_reader->is_nullable()) {
+ RETURN_IF_ERROR(_null_iterator->seek_to_ordinal(ord));
+ }
+ return Status::OK();
+}
+
+Status StructFileColumnIterator::read_by_rowids(const rowid_t* rowids, const
size_t count,
+ vectorized::MutableColumnPtr&
dst) {
+ for (size_t i = 0; i < count; ++i) {
+ RETURN_IF_ERROR(seek_to_ordinal(rowids[i]));
+ size_t num_read = 1;
+ RETURN_IF_ERROR(next_batch(&num_read, dst, nullptr));
+ DCHECK(num_read == 1);
+ }
+ return Status::OK();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
ArrayFileColumnIterator::ArrayFileColumnIterator(ColumnReader* reader,
FileColumnIterator*
offset_reader,
ColumnIterator* item_iterator,
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h
b/be/src/olap/rowset/segment_v2/column_reader.h
index 5a86bdec10..2d04c5fd71 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -378,6 +378,44 @@ public:
ordinal_t get_current_ordinal() const override { return 0; }
};
+class StructFileColumnIterator final : public ColumnIterator {
+public:
+ explicit StructFileColumnIterator(ColumnReader* reader, ColumnIterator*
null_iterator,
+ std::vector<ColumnIterator*>&
sub_column_iterators);
+
+ ~StructFileColumnIterator() override = default;
+
+ Status init(const ColumnIteratorOptions& opts) override;
+
+ Status next_batch(size_t* n, ColumnBlockView* dst, bool* has_null)
override;
+
+ Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool*
has_null) override;
+
+ Status read_by_rowids(const rowid_t* rowids, const size_t count,
+ vectorized::MutableColumnPtr& dst) override;
+
+ Status seek_to_first() override {
+ for (auto& column_iterator : _sub_column_iterators) {
+ RETURN_IF_ERROR(column_iterator->seek_to_first());
+ }
+ if (_struct_reader->is_nullable()) {
+ RETURN_IF_ERROR(_null_iterator->seek_to_first());
+ }
+ return Status::OK();
+ }
+
+ Status seek_to_ordinal(ordinal_t ord) override;
+
+ ordinal_t get_current_ordinal() const override {
+ return _sub_column_iterators[0]->get_current_ordinal();
+ }
+
+private:
+ ColumnReader* _struct_reader;
+ std::unique_ptr<ColumnIterator> _null_iterator;
+ std::vector<std::unique_ptr<ColumnIterator>> _sub_column_iterators;
+};
+
class ArrayFileColumnIterator final : public ColumnIterator {
public:
explicit ArrayFileColumnIterator(ColumnReader* reader, FileColumnIterator*
offset_reader,
diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp
b/be/src/olap/rowset/segment_v2/column_writer.cpp
index df820b141e..fdd42f9444 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/column_writer.cpp
@@ -88,6 +88,78 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts,
const TabletColumn*
return Status::OK();
} else {
switch (column->type()) {
+ case FieldType::OLAP_FIELD_TYPE_STRUCT: {
+ // not support empty struct
+ DCHECK(column->get_subtype_count() >= 1);
+ std::vector<std::unique_ptr<ColumnWriter>> sub_column_writers;
+ sub_column_writers.reserve(column->get_subtype_count());
+ for (uint32_t i = 0; i < column->get_subtype_count(); i++) {
+ const TabletColumn& sub_column = column->get_sub_column(i);
+
+ // create sub writer
+ ColumnWriterOptions column_options;
+ column_options.meta = opts.meta->mutable_children_columns(i);
+ column_options.need_zone_map = false;
+ column_options.need_bloom_filter = sub_column.is_bf_column();
+ column_options.need_bitmap_index =
sub_column.has_bitmap_index();
+ column_options.inverted_index = nullptr;
+ if (sub_column.type() == FieldType::OLAP_FIELD_TYPE_STRUCT) {
+ if (column_options.need_bloom_filter) {
+ return Status::NotSupported("Do not support bloom
filter for struct type");
+ }
+ if (column_options.need_bitmap_index) {
+ return Status::NotSupported("Do not support bitmap
index for struct type");
+ }
+ }
+ if (sub_column.type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
+ if (column_options.need_bloom_filter) {
+ return Status::NotSupported("Do not support bloom
filter for array type");
+ }
+ if (column_options.need_bitmap_index) {
+ return Status::NotSupported("Do not support bitmap
index for array type");
+ }
+ }
+ std::unique_ptr<ColumnWriter> sub_column_writer;
+ RETURN_IF_ERROR(ColumnWriter::create(column_options,
&sub_column, file_writer,
+ &sub_column_writer));
+ sub_column_writers.push_back(std::move(sub_column_writer));
+ }
+
+ // if nullable, create null writer
+ ScalarColumnWriter* null_writer = nullptr;
+ if (opts.meta->is_nullable()) {
+ FieldType null_type = FieldType::OLAP_FIELD_TYPE_TINYINT;
+ ColumnWriterOptions null_options;
+ null_options.meta = opts.meta->add_children_columns();
+ null_options.meta->set_column_id(column->get_subtype_count() +
1);
+ null_options.meta->set_unique_id(column->get_subtype_count() +
1);
+ null_options.meta->set_type(null_type);
+ null_options.meta->set_is_nullable(false);
+ null_options.meta->set_length(
+
get_scalar_type_info<OLAP_FIELD_TYPE_TINYINT>()->size());
+ null_options.meta->set_encoding(DEFAULT_ENCODING);
+ null_options.meta->set_compression(opts.meta->compression());
+
+ null_options.need_zone_map = false;
+ null_options.need_bloom_filter = false;
+ null_options.need_bitmap_index = false;
+
+ TabletColumn null_column = TabletColumn(
+ OLAP_FIELD_AGGREGATION_NONE, null_type,
null_options.meta->is_nullable(),
+ null_options.meta->unique_id(),
null_options.meta->length());
+ null_column.set_name("nullable");
+ null_column.set_index_length(-1); // no short key index
+ std::unique_ptr<Field>
null_field(FieldFactory::create(null_column));
+ null_writer =
+ new ScalarColumnWriter(null_options,
std::move(null_field), file_writer);
+ }
+
+ std::unique_ptr<ColumnWriter> writer_local =
+ std::unique_ptr<ColumnWriter>(new StructColumnWriter(
+ opts, std::move(field), null_writer,
sub_column_writers));
+ *writer = std::move(writer_local);
+ return Status::OK();
+ }
case FieldType::OLAP_FIELD_TYPE_ARRAY: {
DCHECK(column->get_subtype_count() == 1);
const TabletColumn& item_column = column->get_sub_column(0);
@@ -542,6 +614,108 @@ Status ScalarColumnWriter::finish_current_page() {
////////////////////////////////////////////////////////////////////////////////
+StructColumnWriter::StructColumnWriter(
+ const ColumnWriterOptions& opts, std::unique_ptr<Field> field,
+ ScalarColumnWriter* null_writer,
+ std::vector<std::unique_ptr<ColumnWriter>>& sub_column_writers)
+ : ColumnWriter(std::move(field), opts.meta->is_nullable()),
_opts(opts) {
+ for (auto& sub_column_writer : sub_column_writers) {
+ _sub_column_writers.push_back(std::move(sub_column_writer));
+ }
+ _num_sub_column_writers = _sub_column_writers.size();
+ DCHECK(_num_sub_column_writers >= 1);
+ if (is_nullable()) {
+ _null_writer.reset(null_writer);
+ }
+}
+
+Status StructColumnWriter::init() {
+ for (auto& column_writer : _sub_column_writers) {
+ RETURN_IF_ERROR(column_writer->init());
+ }
+ if (is_nullable()) {
+ RETURN_IF_ERROR(_null_writer->init());
+ }
+ return Status::OK();
+}
+
+Status StructColumnWriter::write_inverted_index() {
+ if (_opts.inverted_index) {
+ for (auto& column_writer : _sub_column_writers) {
+ RETURN_IF_ERROR(column_writer->write_inverted_index());
+ }
+ }
+ return Status::OK();
+}
+
+Status StructColumnWriter::append_nullable(const uint8_t* null_map, const
uint8_t** ptr,
+ size_t num_rows) {
+ RETURN_IF_ERROR(append_data(ptr, num_rows));
+ RETURN_IF_ERROR(_null_writer->append_data(&null_map, num_rows));
+ return Status::OK();
+}
+
+Status StructColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) {
+ auto data_cursor = reinterpret_cast<const void**>(ptr);
+ auto null_map_cursor = data_cursor + _num_sub_column_writers;
+ for (auto& column_writer : _sub_column_writers) {
+ RETURN_IF_ERROR(column_writer->append(reinterpret_cast<const
uint8_t*>(*null_map_cursor),
+ *data_cursor, num_rows));
+ data_cursor++;
+ null_map_cursor++;
+ }
+ return Status::OK();
+}
+
+uint64_t StructColumnWriter::estimate_buffer_size() {
+ uint64_t size = 0;
+ for (auto& column_writer : _sub_column_writers) {
+ size += column_writer->estimate_buffer_size();
+ }
+ size += is_nullable() ? _null_writer->estimate_buffer_size() : 0;
+ return size;
+}
+
+Status StructColumnWriter::finish() {
+ for (auto& column_writer : _sub_column_writers) {
+ RETURN_IF_ERROR(column_writer->finish());
+ }
+ if (is_nullable()) {
+ RETURN_IF_ERROR(_null_writer->finish());
+ }
+ return Status::OK();
+}
+
+Status StructColumnWriter::write_data() {
+ for (auto& column_writer : _sub_column_writers) {
+ RETURN_IF_ERROR(column_writer->write_data());
+ }
+ if (is_nullable()) {
+ RETURN_IF_ERROR(_null_writer->write_data());
+ }
+ return Status::OK();
+}
+
+Status StructColumnWriter::write_ordinal_index() {
+ for (auto& column_writer : _sub_column_writers) {
+ RETURN_IF_ERROR(column_writer->write_ordinal_index());
+ }
+ if (is_nullable()) {
+ RETURN_IF_ERROR(_null_writer->write_ordinal_index());
+ }
+ return Status::OK();
+}
+
+Status StructColumnWriter::append_nulls(size_t num_rows) {
+ return Status::NotSupported("struct writer not support append nulls");
+}
+
+Status StructColumnWriter::finish_current_page() {
+ return Status::NotSupported("struct writer has no data, can not
finish_current_page");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
ArrayColumnWriter::ArrayColumnWriter(const ColumnWriterOptions& opts,
std::unique_ptr<Field> field,
ScalarColumnWriter* offset_writer,
ScalarColumnWriter* null_writer,
diff --git a/be/src/olap/rowset/segment_v2/column_writer.h
b/be/src/olap/rowset/segment_v2/column_writer.h
index 5ea7ae654c..007a69a6d9 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.h
+++ b/be/src/olap/rowset/segment_v2/column_writer.h
@@ -265,6 +265,57 @@ private:
FlushPageCallback* _new_page_callback = nullptr;
};
+class StructColumnWriter final : public ColumnWriter {
+public:
+ explicit StructColumnWriter(const ColumnWriterOptions& opts,
std::unique_ptr<Field> field,
+ ScalarColumnWriter* null_writer,
+ std::vector<std::unique_ptr<ColumnWriter>>&
sub_column_writers);
+ ~StructColumnWriter() override = default;
+
+ Status init() override;
+
+ Status append_nullable(const uint8_t* null_map, const uint8_t** data,
size_t num_rows);
+ Status append_data(const uint8_t** ptr, size_t num_rows) override;
+
+ uint64_t estimate_buffer_size() override;
+
+ Status finish() override;
+ Status write_data() override;
+ Status write_ordinal_index() override;
+ Status append_nulls(size_t num_rows) override;
+
+ Status finish_current_page() override;
+
+ Status write_zone_map() override {
+ if (_opts.need_zone_map) {
+ return Status::NotSupported("struct not support zone map");
+ }
+ return Status::OK();
+ }
+
+ Status write_bitmap_index() override {
+ if (_opts.need_bitmap_index) {
+ return Status::NotSupported("struct not support bitmap index");
+ }
+ return Status::OK();
+ }
+ Status write_inverted_index() override;
+ Status write_bloom_filter_index() override {
+ if (_opts.need_bloom_filter) {
+ return Status::NotSupported("struct not support bloom filter
index");
+ }
+ return Status::OK();
+ }
+
+ ordinal_t get_next_rowid() const override { return
_sub_column_writers[0]->get_next_rowid(); }
+
+private:
+ size_t _num_sub_column_writers;
+ std::unique_ptr<ScalarColumnWriter> _null_writer;
+ std::vector<std::unique_ptr<ColumnWriter>> _sub_column_writers;
+ ColumnWriterOptions _opts;
+};
+
class ArrayColumnWriter final : public ColumnWriter, public FlushPageCallback {
public:
explicit ArrayColumnWriter(const ColumnWriterOptions& opts,
std::unique_ptr<Field> field,
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index df7f26c34f..28ef07d40d 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -135,6 +135,15 @@ Status SegmentWriter::init(const std::vector<uint32_t>&
col_ids, bool has_key) {
break;
}
}
+ if (column.type() == FieldType::OLAP_FIELD_TYPE_STRUCT) {
+ opts.need_zone_map = false;
+ if (opts.need_bloom_filter) {
+ return Status::NotSupported("Do not support bloom filter for
struct type");
+ }
+ if (opts.need_bitmap_index) {
+ return Status::NotSupported("Do not support bitmap index for
struct type");
+ }
+ }
if (column.type() == FieldType::OLAP_FIELD_TYPE_ARRAY) {
opts.need_zone_map = false;
if (opts.need_bloom_filter) {
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index e3d4c6f3ff..24b5460549 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -295,8 +295,12 @@ void TabletMeta::init_column_from_tcolumn(uint32_t
unique_id, const TColumn& tco
if (tcolumn.__isset.is_bloom_filter_column) {
column->set_is_bf_column(tcolumn.is_bloom_filter_column);
}
-
- if (tcolumn.column_type.type == TPrimitiveType::ARRAY) {
+ if (tcolumn.column_type.type == TPrimitiveType::STRUCT) {
+ for (size_t i = 0; i < tcolumn.children_column.size(); i++) {
+ ColumnPB* children_column = column->add_children_columns();
+ init_column_from_tcolumn(i, tcolumn.children_column[i],
children_column);
+ }
+ } else if (tcolumn.column_type.type == TPrimitiveType::ARRAY) {
ColumnPB* children_column = column->add_children_columns();
init_column_from_tcolumn(0, tcolumn.children_column[0],
children_column);
}
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 55f030cbb9..9fe92eb6b4 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -310,6 +310,10 @@ uint32_t
TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint3
return string_length + sizeof(OLAP_STRING_MAX_LENGTH);
case TPrimitiveType::JSONB:
return string_length + sizeof(OLAP_JSONB_MAX_LENGTH);
+ case TPrimitiveType::STRUCT:
+ // Note that(xy): this is the length of struct type itself,
+ // the length of its subtypes are not included.
+ return OLAP_STRUCT_MAX_LENGTH;
case TPrimitiveType::ARRAY:
return OLAP_ARRAY_MAX_LENGTH;
case TPrimitiveType::DECIMAL32:
@@ -402,8 +406,13 @@ void TabletColumn::init_from_pb(const ColumnPB& column) {
if (column.has_visible()) {
_visible = column.visible();
}
-
- if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
+ if (_type == FieldType::OLAP_FIELD_TYPE_STRUCT) {
+ for (size_t i = 0; i < column.children_columns_size(); i++) {
+ TabletColumn child_column;
+ child_column.init_from_pb(column.children_columns(i));
+ add_sub_column(child_column);
+ }
+ } else if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) {
DCHECK(column.children_columns_size() == 1) << "ARRAY type has more
than 1 children types.";
TabletColumn child_column;
child_column.init_from_pb(column.children_columns(0));
@@ -435,7 +444,12 @@ void TabletColumn::to_schema_pb(ColumnPB* column) const {
}
column->set_visible(_visible);
- if (_type == OLAP_FIELD_TYPE_ARRAY) {
+ if (_type == OLAP_FIELD_TYPE_STRUCT) {
+ for (size_t i = 0; i < _sub_columns.size(); i++) {
+ ColumnPB* child = column->add_children_columns();
+ _sub_columns[i].to_schema_pb(child);
+ }
+ } else if (_type == OLAP_FIELD_TYPE_ARRAY) {
DCHECK(_sub_columns.size() == 1) << "ARRAY type has more than 1
children types.";
ColumnPB* child = column->add_children_columns();
_sub_columns[0].to_schema_pb(child);
diff --git a/be/src/olap/types.cpp b/be/src/olap/types.cpp
index aa2226a84a..916f7768af 100644
--- a/be/src/olap/types.cpp
+++ b/be/src/olap/types.cpp
@@ -166,10 +166,33 @@ const TypeInfo* get_array_type_info(FieldType leaf_type,
int32_t iterations) {
return array_type_Info_arr[leaf_type][iterations];
}
+// Produce a struct type info
+// TODO(xy): Need refactor to this produce method
+const TypeInfo* get_struct_type_info(std::vector<FieldType> field_types) {
+ std::vector<TypeInfoPtr> type_infos;
+ type_infos.reserve(field_types.size());
+ for (FieldType& type : field_types) {
+ if (is_scalar_type(type)) {
+
type_infos.push_back(create_static_type_info_ptr(get_scalar_type_info(type)));
+ } else {
+ // TODO(xy): Not supported nested complex type now
+ }
+ }
+ return new StructTypeInfo(type_infos);
+}
+
// TODO: Support the type info of the nested array with more than 9 depths.
+// TODO(xy): Support the type info of the nested struct
TypeInfoPtr get_type_info(segment_v2::ColumnMetaPB* column_meta_pb) {
FieldType type = (FieldType)column_meta_pb->type();
- if (UNLIKELY(type == OLAP_FIELD_TYPE_ARRAY)) {
+ if (UNLIKELY(type == OLAP_FIELD_TYPE_STRUCT)) {
+ std::vector<FieldType> field_types;
+ for (uint32_t i = 0; i < column_meta_pb->children_columns_size(); i++)
{
+ const auto* child_column = &column_meta_pb->children_columns(i);
+ field_types.push_back((FieldType)child_column->type());
+ }
+ return create_dynamic_type_info_ptr(get_struct_type_info(field_types));
+ } else if (UNLIKELY(type == OLAP_FIELD_TYPE_ARRAY)) {
int32_t iterations = 0;
const auto* child_column = &column_meta_pb->children_columns(0);
while (child_column->type() == OLAP_FIELD_TYPE_ARRAY) {
@@ -202,7 +225,14 @@ TypeInfoPtr create_type_info_ptr(const TypeInfo*
type_info, bool should_reclaim_
// TODO: Support the type info of the nested array with more than 9 depths.
TypeInfoPtr get_type_info(const TabletColumn* col) {
auto type = col->type();
- if (UNLIKELY(type == OLAP_FIELD_TYPE_ARRAY)) {
+ if (UNLIKELY(type == OLAP_FIELD_TYPE_STRUCT)) {
+ std::vector<FieldType> field_types;
+ for (uint32_t i = 0; i < col->get_subtype_count(); i++) {
+ const auto* child_column = &col->get_sub_column(i);
+ field_types.push_back(child_column->type());
+ }
+ return create_dynamic_type_info_ptr(get_struct_type_info(field_types));
+ } else if (UNLIKELY(type == OLAP_FIELD_TYPE_ARRAY)) {
int32_t iterations = 0;
const auto* child_column = &col->get_sub_column(0);
while (child_column->type() == OLAP_FIELD_TYPE_ARRAY) {
@@ -219,9 +249,21 @@ TypeInfoPtr clone_type_info(const TypeInfo* type_info) {
if (is_scalar_type(type_info->type())) {
return create_static_type_info_ptr(type_info);
} else {
- const auto array_type_info = dynamic_cast<const
ArrayTypeInfo*>(type_info);
- return create_dynamic_type_info_ptr(
- new
ArrayTypeInfo(clone_type_info(array_type_info->item_type_info())));
+ auto type = type_info->type();
+ if (type == OLAP_FIELD_TYPE_STRUCT) {
+ const auto struct_type_info = dynamic_cast<const
StructTypeInfo*>(type_info);
+ std::vector<TypeInfoPtr> clone_type_infos;
+ const std::vector<TypeInfoPtr>* sub_type_infos =
struct_type_info->type_infos();
+ clone_type_infos.reserve(sub_type_infos->size());
+ for (size_t i = 0; i < sub_type_infos->size(); i++) {
+
clone_type_infos.push_back(clone_type_info((*sub_type_infos)[i].get()));
+ }
+ return create_dynamic_type_info_ptr(new
StructTypeInfo(clone_type_infos));
+ } else {
+ const auto array_type_info = dynamic_cast<const
ArrayTypeInfo*>(type_info);
+ return create_dynamic_type_info_ptr(
+ new
ArrayTypeInfo(clone_type_info(array_type_info->item_type_info())));
+ }
}
}
diff --git a/be/src/olap/types.h b/be/src/olap/types.h
index c580f2ea53..a8f21b66ae 100644
--- a/be/src/olap/types.h
+++ b/be/src/olap/types.h
@@ -32,6 +32,7 @@
#include "runtime/collection_value.h"
#include "runtime/jsonb_value.h"
#include "runtime/mem_pool.h"
+#include "runtime/struct_value.h"
#include "util/jsonb_document.h"
#include "util/jsonb_utils.h"
#include "util/mem_util.hpp"
@@ -431,6 +432,245 @@ private:
const size_t _item_size;
};
+class StructTypeInfo : public TypeInfo {
+public:
+ explicit StructTypeInfo(std::vector<TypeInfoPtr>& type_infos) {
+ for (TypeInfoPtr& type_info : type_infos) {
+ _type_infos.push_back(std::move(type_info));
+ }
+ }
+ ~StructTypeInfo() override = default;
+
+ bool equal(const void* left, const void* right) const override {
+ auto l_value = reinterpret_cast<const StructValue*>(left);
+ auto r_value = reinterpret_cast<const StructValue*>(right);
+ if (l_value->size() != r_value->size()) {
+ return false;
+ }
+ uint32_t size = l_value->size();
+
+ if (!l_value->has_null() && !r_value->has_null()) {
+ for (size_t i = 0; i < size; ++i) {
+ if (!_type_infos[i]->equal(l_value->child_value(i),
r_value->child_value(i))) {
+ return false;
+ }
+ }
+ } else {
+ for (size_t i = 0; i < size; ++i) {
+ if (l_value->is_null_at(i)) {
+ if (r_value->is_null_at(i)) { // both are null
+ continue;
+ } else { // left is null & right is not null
+ return false;
+ }
+ } else if (r_value->is_null_at(i)) { // left is not null &
right is null
+ return false;
+ }
+ if (!_type_infos[i]->equal(l_value->child_value(i),
r_value->child_value(i))) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ int cmp(const void* left, const void* right) const override {
+ auto l_value = reinterpret_cast<const StructValue*>(left);
+ auto r_value = reinterpret_cast<const StructValue*>(right);
+ uint32_t l_size = l_value->size();
+ uint32_t r_size = r_value->size();
+ size_t cur = 0;
+
+ if (!l_value->has_null() && !r_value->has_null()) {
+ while (cur < l_size && cur < r_size) {
+ int result =
+ _type_infos[cur]->cmp(l_value->child_value(cur),
r_value->child_value(cur));
+ if (result != 0) {
+ return result;
+ }
+ ++cur;
+ }
+ } else {
+ while (cur < l_size && cur < r_size) {
+ if (l_value->is_null_at(cur)) {
+ if (!r_value->is_null_at(cur)) { // left is null & right
is not null
+ return -1;
+ }
+ } else if (r_value->is_null_at(cur)) { // left is not null &
right is null
+ return 1;
+ } else { // both are not null
+ int result =
_type_infos[cur]->cmp(l_value->child_value(cur),
+
r_value->child_value(cur));
+ if (result != 0) {
+ return result;
+ }
+ }
+ ++cur;
+ }
+ }
+
+ if (l_size < r_size) {
+ return -1;
+ } else if (l_size > r_size) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+
+ void shallow_copy(void* dest, const void* src) const override {
+ auto dest_value = reinterpret_cast<StructValue*>(dest);
+ auto src_value = reinterpret_cast<const StructValue*>(src);
+ dest_value->shallow_copy(src_value);
+ }
+
+ void deep_copy(void* dest, const void* src, MemPool* mem_pool) const
override {
+ auto dest_value = reinterpret_cast<StructValue*>(dest);
+ auto src_value = reinterpret_cast<const StructValue*>(src);
+
+ if (src_value->size() == 0) {
+ new (dest_value) StructValue(src_value->size());
+ return;
+ }
+
+ dest_value->set_size(src_value->size());
+ dest_value->set_has_null(src_value->has_null());
+
+ size_t allocate_size = src_value->size() *
sizeof(*src_value->values());
+ // allocate memory for children value
+ for (size_t i = 0; i < src_value->size(); ++i) {
+ if (src_value->is_null_at(i)) continue;
+ allocate_size += _type_infos[i]->size();
+ }
+
+ dest_value->set_values((void**)mem_pool->allocate(allocate_size));
+ auto ptr = reinterpret_cast<uint8_t*>(dest_value->mutable_values());
+ ptr += dest_value->size() * sizeof(*dest_value->values());
+
+ for (size_t i = 0; i < src_value->size(); ++i) {
+ dest_value->set_child_value(nullptr, i);
+ if (src_value->is_null_at(i)) continue;
+ dest_value->set_child_value(ptr, i);
+ ptr += _type_infos[i]->size();
+ }
+
+ // copy children value
+ for (size_t i = 0; i < src_value->size(); ++i) {
+ if (src_value->is_null_at(i)) continue;
+ _type_infos[i]->deep_copy(dest_value->mutable_child_value(i),
src_value->child_value(i),
+ mem_pool);
+ }
+ }
+
+ void copy_object(void* dest, const void* src, MemPool* mem_pool) const
override {
+ deep_copy(dest, src, mem_pool);
+ }
+
+ void direct_copy(void* dest, const void* src) const override {
+ auto dest_value = static_cast<StructValue*>(dest);
+ auto base = reinterpret_cast<uint8_t*>(dest_value->mutable_values());
+ direct_copy(&base, dest, src);
+ }
+
+ void direct_copy(uint8_t** base, void* dest, const void* src) const {
+ auto dest_value = static_cast<StructValue*>(dest);
+ auto src_value = static_cast<const StructValue*>(src);
+
+ dest_value->set_size(src_value->size());
+ dest_value->set_has_null(src_value->has_null());
+ *base += src_value->size() * sizeof(*src_value->values());
+
+ for (size_t i = 0; i < src_value->size(); ++i) {
+ dest_value->set_child_value(nullptr, i);
+ if (src_value->is_null_at(i)) continue;
+ dest_value->set_child_value(*base, i);
+ *base += _type_infos[i]->size();
+ }
+
+ for (size_t i = 0; i < src_value->size(); ++i) {
+ if (dest_value->is_null_at(i)) {
+ continue;
+ }
+ auto dest_address = dest_value->mutable_child_value(i);
+ auto src_address = src_value->child_value(i);
+ if (_type_infos[i]->type() == OLAP_FIELD_TYPE_STRUCT) {
+ dynamic_cast<const StructTypeInfo*>(_type_infos[i].get())
+ ->direct_copy(base, dest_address, src_address);
+ } else if (_type_infos[i]->type() == OLAP_FIELD_TYPE_ARRAY) {
+ dynamic_cast<const ArrayTypeInfo*>(_type_infos[i].get())
+ ->direct_copy(base, dest_address, src_address);
+ } else {
+ if (is_olap_string_type(_type_infos[i]->type())) {
+ auto dest_slice = reinterpret_cast<Slice*>(dest_address);
+ auto src_slice = reinterpret_cast<const
Slice*>(src_address);
+ dest_slice->data = reinterpret_cast<char*>(*base);
+ dest_slice->size = src_slice->size;
+ *base += src_slice->size;
+ }
+ _type_infos[i]->direct_copy(dest_address, src_address);
+ }
+ }
+ }
+
+ void direct_copy_may_cut(void* dest, const void* src) const override {
direct_copy(dest, src); }
+
+ Status convert_from(void* dest, const void* src, const TypeInfo* src_type,
MemPool* mem_pool,
+ size_t variable_len = 0) const override {
+ return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>();
+ }
+
+ Status from_string(void* buf, const std::string& scan_key, const int
precision = 0,
+ const int scale = 0) const override {
+ return Status::Error<ErrorCode::NOT_IMPLEMENTED_ERROR>();
+ }
+
+ std::string to_string(const void* src) const override {
+ auto src_value = reinterpret_cast<const StructValue*>(src);
+ std::string result = "{";
+
+ for (size_t i = 0; i < src_value->size(); ++i) {
+ std::string field_value =
_type_infos[i]->to_string(src_value->child_value(i));
+ result += field_value;
+ if (i < src_value->size() - 1) {
+ result += ", ";
+ }
+ }
+ result += "}";
+ return result;
+ }
+
+ void set_to_max(void* buf) const override {
+ DCHECK(false) << "set_to_max of list is not implemented.";
+ }
+
+ void set_to_min(void* buf) const override {
+ DCHECK(false) << "set_to_min of list is not implemented.";
+ }
+
+ uint32_t hash_code(const void* data, uint32_t seed) const override {
+ auto struct_value = reinterpret_cast<const StructValue*>(data);
+ auto size = struct_value->size();
+ uint32_t result = HashUtil::hash(&size, sizeof(size), seed);
+ for (size_t i = 0; i < size; ++i) {
+ if (struct_value->is_null_at(i)) {
+ result = seed * result;
+ } else {
+ result = seed * result +
_type_infos[i]->hash_code(struct_value->values()[i], seed);
+ }
+ }
+ return result;
+ }
+
+ const size_t size() const override { return sizeof(StructValue); }
+
+ FieldType type() const override { return OLAP_FIELD_TYPE_STRUCT; }
+
+ inline const std::vector<TypeInfoPtr>* type_infos() const { return
&_type_infos; }
+
+private:
+ std::vector<TypeInfoPtr> _type_infos;
+};
+
bool is_scalar_type(FieldType field_type);
const TypeInfo* get_scalar_type_info(FieldType field_type);
@@ -566,12 +806,15 @@ template <>
struct CppTypeTraits<OLAP_FIELD_TYPE_OBJECT> {
using CppType = Slice;
};
-
template <>
struct CppTypeTraits<OLAP_FIELD_TYPE_QUANTILE_STATE> {
using CppType = Slice;
};
template <>
+struct CppTypeTraits<OLAP_FIELD_TYPE_STRUCT> {
+ using CppType = StructValue;
+};
+template <>
struct CppTypeTraits<OLAP_FIELD_TYPE_ARRAY> {
using CppType = CollectionValue;
};
diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt
index 36c75ffbef..a1f2bc3d45 100644
--- a/be/src/runtime/CMakeLists.txt
+++ b/be/src/runtime/CMakeLists.txt
@@ -48,6 +48,7 @@ set(RUNTIME_FILES
threadlocal.cc
decimalv2_value.cpp
large_int_value.cpp
+ struct_value.cpp
collection_value.cpp
tuple.cpp
tuple_row.cpp
diff --git a/be/src/runtime/primitive_type.cpp
b/be/src/runtime/primitive_type.cpp
index 779676a45d..27bc9f5b6e 100644
--- a/be/src/runtime/primitive_type.cpp
+++ b/be/src/runtime/primitive_type.cpp
@@ -22,6 +22,7 @@
#include "runtime/define_primitive_type.h"
#include "runtime/jsonb_value.h"
#include "runtime/string_value.h"
+#include "runtime/struct_value.h"
namespace doris {
@@ -53,6 +54,8 @@ PrimitiveType convert_type_to_primitive(FunctionContext::Type
type) {
return PrimitiveType::TYPE_BOOLEAN;
case FunctionContext::Type::TYPE_ARRAY:
return PrimitiveType::TYPE_ARRAY;
+ case FunctionContext::Type::TYPE_STRUCT:
+ return PrimitiveType::TYPE_STRUCT;
case FunctionContext::Type::TYPE_OBJECT:
return PrimitiveType::TYPE_OBJECT;
case FunctionContext::Type::TYPE_HLL:
@@ -95,6 +98,7 @@ int get_byte_size(PrimitiveType type) {
case TYPE_HLL:
case TYPE_QUANTILE_STATE:
case TYPE_ARRAY:
+ case TYPE_STRUCT:
case TYPE_MAP:
return 0;
@@ -262,6 +266,9 @@ PrimitiveType thrift_to_type(TPrimitiveType::type ttype) {
case TPrimitiveType::ARRAY:
return TYPE_ARRAY;
+ case TPrimitiveType::STRUCT:
+ return TYPE_STRUCT;
+
default:
return INVALID_TYPE;
}
@@ -356,6 +363,9 @@ TPrimitiveType::type to_thrift(PrimitiveType ptype) {
case TYPE_ARRAY:
return TPrimitiveType::ARRAY;
+ case TYPE_STRUCT:
+ return TPrimitiveType::STRUCT;
+
default:
return TPrimitiveType::INVALID_TYPE;
}
@@ -450,6 +460,9 @@ std::string type_to_string(PrimitiveType t) {
case TYPE_ARRAY:
return "ARRAY";
+ case TYPE_STRUCT:
+ return "STRUCT";
+
default:
return "";
};
@@ -589,6 +602,8 @@ int get_slot_size(PrimitiveType type) {
return sizeof(JsonBinaryValue);
case TYPE_ARRAY:
return sizeof(CollectionValue);
+ case TYPE_STRUCT:
+ return sizeof(StructValue);
case TYPE_NULL:
case TYPE_BOOLEAN:
diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h
index 5af7639290..f84f9a3348 100644
--- a/be/src/runtime/primitive_type.h
+++ b/be/src/runtime/primitive_type.h
@@ -54,6 +54,7 @@ constexpr bool is_enumeration_type(PrimitiveType type) {
case TYPE_DECIMAL128I:
case TYPE_BOOLEAN:
case TYPE_ARRAY:
+ case TYPE_STRUCT:
case TYPE_HLL:
return false;
case TYPE_TINYINT:
diff --git a/be/src/runtime/struct_value.cpp b/be/src/runtime/struct_value.cpp
new file mode 100644
index 0000000000..ff0b9cc62a
--- /dev/null
+++ b/be/src/runtime/struct_value.cpp
@@ -0,0 +1,27 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "runtime/struct_value.h"
+
+namespace doris {
+
+void StructValue::shallow_copy(const StructValue* other) {
+ _size = other->_size;
+ _values = other->_values;
+ _has_null = other->_has_null;
+}
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/runtime/struct_value.h b/be/src/runtime/struct_value.h
new file mode 100644
index 0000000000..ec243d729c
--- /dev/null
+++ b/be/src/runtime/struct_value.h
@@ -0,0 +1,67 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <type_traits>
+
+#include "runtime/primitive_type.h"
+
+namespace doris {
+
+class StructValue {
+public:
+ StructValue() = default;
+
+ explicit StructValue(uint32_t size) : _values(nullptr), _size(size),
_has_null(false) {}
+ StructValue(void** values, uint32_t size) : _values(values), _size(size),
_has_null(false) {}
+ StructValue(void** values, uint32_t size, bool has_null)
+ : _values(values), _size(size), _has_null(has_null) {}
+
+ //void to_struct_val(StructVal* val) const;
+ //static StructValue from_struct_val(const StructVal& val);
+
+ uint32_t size() const { return _size; }
+ void set_size(uint32_t size) { _size = size; }
+ bool has_null() const { return _has_null; }
+ void set_has_null(bool has_null) { _has_null = has_null; }
+ bool is_null_at(uint32_t index) const {
+ return this->_has_null && this->_values[index] == nullptr;
+ }
+
+ void shallow_copy(const StructValue* other);
+
+ // size_t get_byte_size(const TypeDescriptor& type) const;
+
+ const void** values() const { return const_cast<const void**>(_values); }
+ void** mutable_values() { return _values; }
+ void set_values(void** values) { _values = values; }
+ const void* child_value(uint32_t index) const { return _values[index]; }
+ void* mutable_child_value(uint32_t index) { return _values[index]; }
+ void set_child_value(void* value, uint32_t index) { _values[index] =
value; }
+
+private:
+ // pointer to the start of the vector of children pointers. These pointers
are
+ // point to children values where a null pointer means that this child is
NULL.
+ void** _values;
+ // the number of values in this struct value.
+ uint32_t _size;
+ // child has no null value if has_null is false.
+ // child may has null value if has_null is true.
+ bool _has_null;
+};
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp
index 37f4933569..f26b1dcbce 100644
--- a/be/src/runtime/types.cpp
+++ b/be/src/runtime/types.cpp
@@ -55,14 +55,31 @@ TypeDescriptor::TypeDescriptor(const
std::vector<TTypeNode>& types, int* idx)
case TTypeNodeType::ARRAY: {
DCHECK(!node.__isset.scalar_type);
DCHECK_LT(*idx, types.size() - 1);
+ // contains_null should be always set in ArrayType
+ DCHECK(node.__isset.contains_null);
type = TYPE_ARRAY;
- if (node.__isset.contains_null) {
- contains_null = node.contains_null;
- }
+ contains_nulls.reserve(1);
+ contains_nulls.push_back(node.contains_null);
++(*idx);
children.push_back(TypeDescriptor(types, idx));
break;
}
+ case TTypeNodeType::STRUCT: {
+ DCHECK(!node.__isset.scalar_type);
+ DCHECK_LT(*idx, types.size() - 1);
+ DCHECK(!node.__isset.contains_null);
+ DCHECK(node.__isset.struct_fields);
+ DCHECK_GE(node.struct_fields.size(), 1);
+ type = TYPE_STRUCT;
+ contains_nulls.reserve(node.struct_fields.size());
+ for (size_t i = 0; i < node.struct_fields.size(); i++) {
+ ++(*idx);
+ children.push_back(TypeDescriptor(types, idx));
+ field_names.push_back(node.struct_fields[i].name);
+ contains_nulls.push_back(node.struct_fields[i].contains_null);
+ }
+ break;
+ }
// case TTypeNodeType::STRUCT:
// type = TYPE_STRUCT;
// for (int i = 0; i < node.struct_fields.size(); ++i) {
@@ -98,15 +115,17 @@ void TypeDescriptor::to_thrift(TTypeDesc* thrift_type)
const {
if (is_complex_type()) {
if (type == TYPE_ARRAY) {
node.type = TTypeNodeType::ARRAY;
+ node.contains_null = contains_nulls[0];
} else if (type == TYPE_MAP) {
node.type = TTypeNodeType::MAP;
} else {
DCHECK_EQ(type, TYPE_STRUCT);
node.type = TTypeNodeType::STRUCT;
node.__set_struct_fields(std::vector<TStructField>());
- for (auto& field_name : field_names) {
+ for (size_t i = 0; i < field_names.size(); i++) {
node.struct_fields.push_back(TStructField());
- node.struct_fields.back().name = field_name;
+ node.struct_fields.back().name = field_names[i];
+ node.struct_fields.back().contains_null = contains_nulls[i];
}
}
for (const TypeDescriptor& child : children) {
@@ -147,10 +166,12 @@ void TypeDescriptor::to_protobuf(PTypeDesc* ptype) const {
scalar_type->set_scale(scale);
} else if (type == TYPE_ARRAY) {
node->set_type(TTypeNodeType::ARRAY);
+ node->set_contains_null(contains_nulls[0]);
for (const TypeDescriptor& child : children) {
child.to_protobuf(ptype);
}
}
+ // TODO(xy): support struct
}
TypeDescriptor::TypeDescriptor(const
google::protobuf::RepeatedPtrField<PTypeNode>& types, int* idx)
@@ -184,13 +205,15 @@ TypeDescriptor::TypeDescriptor(const
google::protobuf::RepeatedPtrField<PTypeNod
}
case TTypeNodeType::ARRAY: {
type = TYPE_ARRAY;
+ contains_nulls.push_back(true);
if (node.has_contains_null()) {
- contains_null = node.contains_null();
+ contains_nulls[0] = node.contains_null();
}
++(*idx);
children.push_back(TypeDescriptor(types, idx));
break;
}
+ // TODO(xy): support struct
default:
DCHECK(false) << node.type();
}
@@ -218,6 +241,19 @@ std::string TypeDescriptor::debug_string() const {
ss << "ARRAY<" << children[0].debug_string() << ">";
return ss.str();
}
+ case TYPE_STRUCT: {
+ ss << "STRUCT<";
+ for (size_t i = 0; i < children.size(); i++) {
+ ss << field_names[i];
+ ss << ":";
+ ss << children[i].debug_string();
+ if (i != children.size() - 1) {
+ ss << ",";
+ }
+ }
+ ss << ">";
+ return ss.str();
+ }
default:
return type_to_string(type);
}
diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h
index 4f4c2ef0fe..8c76605636 100644
--- a/be/src/runtime/types.h
+++ b/be/src/runtime/types.h
@@ -64,7 +64,7 @@ struct TypeDescriptor {
std::vector<std::string> field_names;
// Used for complex types only.
- bool contains_null = true;
+ std::vector<bool> contains_nulls;
TypeDescriptor() : type(INVALID_TYPE), len(-1), precision(-1), scale(-1) {}
diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h
index 49758f40d1..ec60fae072 100644
--- a/be/src/udf/udf.h
+++ b/be/src/udf/udf.h
@@ -89,6 +89,7 @@ public:
TYPE_DECIMALV2,
TYPE_OBJECT,
TYPE_ARRAY,
+ TYPE_STRUCT,
TYPE_QUANTILE_STATE,
TYPE_DATEV2,
TYPE_DATETIMEV2,
diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt
index 27d163e8ca..205e320bcb 100644
--- a/be/src/vec/CMakeLists.txt
+++ b/be/src/vec/CMakeLists.txt
@@ -132,6 +132,7 @@ set(VEC_FILES
exprs/vexpr_context.cpp
exprs/vliteral.cpp
exprs/varray_literal.cpp
+ exprs/vstruct_literal.cpp
exprs/vin_predicate.cpp
exprs/vbloom_predicate.cpp
exprs/vbitmap_predicate.cpp
diff --git a/be/src/vec/columns/column_struct.cpp
b/be/src/vec/columns/column_struct.cpp
index 05d5df0fd4..c7e5c23d2a 100644
--- a/be/src/vec/columns/column_struct.cpp
+++ b/be/src/vec/columns/column_struct.cpp
@@ -48,8 +48,7 @@ ColumnStruct::ColumnStruct(MutableColumns&& mutable_columns) {
columns.reserve(mutable_columns.size());
for (auto& column : mutable_columns) {
if (is_column_const(*column)) {
- throw Exception {"ColumnStruct cannot have ColumnConst as its
element",
- ErrorCodes::ILLEGAL_COLUMN};
+ LOG(FATAL) << "ColumnStruct cannot have ColumnConst as its
element";
}
columns.push_back(std::move(column));
}
@@ -59,8 +58,7 @@ ColumnStruct::ColumnStruct(Columns&& columns) {
columns.reserve(columns.size());
for (auto& column : columns) {
if (is_column_const(*column)) {
- throw Exception {"ColumnStruct cannot have ColumnConst as its
element",
- ErrorCodes::ILLEGAL_COLUMN};
+ LOG(FATAL) << "ColumnStruct cannot have ColumnConst as its
element";
}
columns.push_back(std::move(column));
}
@@ -70,8 +68,7 @@ ColumnStruct::ColumnStruct(TupleColumns&& tuple_columns) {
columns.reserve(tuple_columns.size());
for (auto& column : tuple_columns) {
if (is_column_const(*column)) {
- throw Exception {"ColumnStruct cannot have ColumnConst as its
element",
- ErrorCodes::ILLEGAL_COLUMN};
+ LOG(FATAL) << "ColumnStruct cannot have ColumnConst as its
element";
}
columns.push_back(std::move(column));
}
@@ -79,9 +76,9 @@ ColumnStruct::ColumnStruct(TupleColumns&& tuple_columns) {
ColumnStruct::Ptr ColumnStruct::create(Columns& columns) {
for (const auto& column : columns) {
- if (is_column_const(*column))
- throw Exception {"ColumnStruct cannot have ColumnConst as its
element",
- ErrorCodes::ILLEGAL_COLUMN};
+ if (is_column_const(*column)) {
+ LOG(FATAL) << "ColumnStruct cannot have ColumnConst as its
element";
+ }
}
auto column_struct = ColumnStruct::create(columns);
return column_struct;
@@ -90,8 +87,7 @@ ColumnStruct::Ptr ColumnStruct::create(Columns& columns) {
ColumnStruct::Ptr ColumnStruct::create(TupleColumns& tuple_columns) {
for (const auto& column : tuple_columns) {
if (is_column_const(*column)) {
- throw Exception {"ColumnStruct cannot have ColumnConst as its
element",
- ErrorCodes::ILLEGAL_COLUMN};
+ LOG(FATAL) << "ColumnStruct cannot have ColumnConst as its
element";
}
}
auto column_struct = ColumnStruct::create(tuple_columns);
@@ -144,22 +140,11 @@ bool ColumnStruct::is_default_at(size_t n) const {
return true;
}
-StringRef ColumnStruct::get_data_at(size_t) const {
- throw Exception("Method get_data_at is not supported for " + get_name(),
- ErrorCodes::NOT_IMPLEMENTED);
-}
-
-void ColumnStruct::insert_data(const char*, size_t) {
- throw Exception("Method insert_data is not supported for " + get_name(),
- ErrorCodes::NOT_IMPLEMENTED);
-}
-
void ColumnStruct::insert(const Field& x) {
const auto& tuple = x.get<const Tuple&>();
const size_t tuple_size = columns.size();
if (tuple.size() != tuple_size) {
- throw Exception("Cannot insert value of different size into tuple",
-
ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE);
+ LOG(FATAL) << "Cannot insert value of different size into tuple.";
}
for (size_t i = 0; i < tuple_size; ++i) {
@@ -172,8 +157,7 @@ void ColumnStruct::insert_from(const IColumn& src_, size_t
n) {
const size_t tuple_size = columns.size();
if (src.columns.size() != tuple_size) {
- throw Exception("Cannot insert value of different size into tuple",
-
ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE);
+ LOG(FATAL) << "Cannot insert value of different size into tuple.";
}
for (size_t i = 0; i < tuple_size; ++i) {
diff --git a/be/src/vec/columns/column_struct.h
b/be/src/vec/columns/column_struct.h
index 895a2796b7..75dade874d 100644
--- a/be/src/vec/columns/column_struct.h
+++ b/be/src/vec/columns/column_struct.h
@@ -102,7 +102,7 @@ public:
std::string get_name() const override;
const char* get_family_name() const override { return "Struct"; }
TypeIndex get_data_type() const { return TypeIndex::Struct; }
-
+ bool can_be_inside_nullable() const override { return true; }
MutableColumnPtr clone_empty() const override;
MutableColumnPtr clone_resized(size_t size) const override;
@@ -112,8 +112,12 @@ public:
void get(size_t n, Field& res) const override;
bool is_default_at(size_t n) const override;
- StringRef get_data_at(size_t n) const override;
- void insert_data(const char* pos, size_t length) override;
+ [[noreturn]] StringRef get_data_at(size_t n) const override {
+ LOG(FATAL) << "Method get_data_at is not supported for " + get_name();
+ }
+ [[noreturn]] void insert_data(const char* pos, size_t length) override {
+ LOG(FATAL) << "Method insert_data is not supported for " + get_name();
+ }
void insert(const Field& x) override;
void insert_from(const IColumn& src_, size_t n) override;
void insert_default() override;
@@ -162,7 +166,10 @@ public:
// int compare_at_with_collation(size_t n, size_t m, const IColumn& rhs,
int nan_direction_hint,
// const Collator& collator) const override;
- int compare_at(size_t n, size_t m, const IColumn& rhs, int
nan_direction_hint) const override;
+ [[noreturn]] int compare_at(size_t n, size_t m, const IColumn& rhs_,
+ int nan_direction_hint) const override {
+ LOG(FATAL) << "compare_at not implemented";
+ }
void get_extremes(Field& min, Field& max) const override;
// void get_permutation(IColumn::PermutationSortDirection direction,
diff --git a/be/src/vec/data_types/data_type_factory.cpp
b/be/src/vec/data_types/data_type_factory.cpp
index 85dfc445ba..36aae47d69 100644
--- a/be/src/vec/data_types/data_type_factory.cpp
+++ b/be/src/vec/data_types/data_type_factory.cpp
@@ -27,6 +27,18 @@ DataTypePtr DataTypeFactory::create_data_type(const
doris::Field& col_desc) {
if (col_desc.type() == OLAP_FIELD_TYPE_ARRAY) {
DCHECK(col_desc.get_sub_field_count() == 1);
nested =
std::make_shared<DataTypeArray>(create_data_type(*col_desc.get_sub_field(0)));
+ } else if (col_desc.type() == OLAP_FIELD_TYPE_STRUCT) {
+ DCHECK(col_desc.get_sub_field_count() >= 1);
+ size_t field_size = col_desc.get_sub_field_count();
+ DataTypes dataTypes;
+ Strings names;
+ dataTypes.reserve(field_size);
+ names.reserve(field_size);
+ for (size_t i = 0; i < field_size; i++) {
+ dataTypes.push_back(create_data_type(*col_desc.get_sub_field(i)));
+ names.push_back(col_desc.name());
+ }
+ nested = std::make_shared<DataTypeStruct>(dataTypes, names);
} else {
nested = _create_primitive_data_type(col_desc.type(),
col_desc.get_precision(),
col_desc.get_scale());
@@ -43,6 +55,18 @@ DataTypePtr DataTypeFactory::create_data_type(const
TabletColumn& col_desc, bool
if (col_desc.type() == OLAP_FIELD_TYPE_ARRAY) {
DCHECK(col_desc.get_subtype_count() == 1);
nested =
std::make_shared<DataTypeArray>(create_data_type(col_desc.get_sub_column(0)));
+ } else if (col_desc.type() == OLAP_FIELD_TYPE_STRUCT) {
+ DCHECK(col_desc.get_subtype_count() >= 1);
+ size_t col_size = col_desc.get_subtype_count();
+ DataTypes dataTypes;
+ Strings names;
+ dataTypes.reserve(col_size);
+ names.reserve(col_size);
+ for (size_t i = 0; i < col_size; i++) {
+ dataTypes.push_back(create_data_type(col_desc.get_sub_column(i)));
+ names.push_back(col_desc.name());
+ }
+ nested = std::make_shared<DataTypeStruct>(dataTypes, names);
} else {
nested =
_create_primitive_data_type(col_desc.type(),
col_desc.precision(), col_desc.frac());
@@ -125,8 +149,23 @@ DataTypePtr DataTypeFactory::create_data_type(const
TypeDescriptor& col_desc, bo
case TYPE_ARRAY:
DCHECK(col_desc.children.size() == 1);
nested = std::make_shared<vectorized::DataTypeArray>(
- create_data_type(col_desc.children[0],
col_desc.contains_null));
+ create_data_type(col_desc.children[0],
col_desc.contains_nulls[0]));
+ break;
+ case TYPE_STRUCT: {
+ DCHECK(col_desc.children.size() >= 1);
+ size_t child_size = col_desc.children.size();
+ DCHECK_EQ(col_desc.field_names.size(), child_size);
+ DataTypes dataTypes;
+ Strings names;
+ dataTypes.reserve(child_size);
+ names.reserve(child_size);
+ for (size_t i = 0; i < child_size; i++) {
+ dataTypes.push_back(create_data_type(col_desc.children[i],
col_desc.contains_nulls[i]));
+ names.push_back(col_desc.field_names[i]);
+ }
+ nested = std::make_shared<DataTypeStruct>(dataTypes, names);
break;
+ }
case INVALID_TYPE:
default:
DCHECK(false) << "invalid PrimitiveType:" << (int)col_desc.type;
diff --git a/be/src/vec/data_types/data_type_factory.hpp
b/be/src/vec/data_types/data_type_factory.hpp
index 696ab9ee54..c5c6269288 100644
--- a/be/src/vec/data_types/data_type_factory.hpp
+++ b/be/src/vec/data_types/data_type_factory.hpp
@@ -41,6 +41,7 @@
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"
+#include "vec/data_types/data_type_struct.h"
namespace doris::vectorized {
@@ -95,7 +96,10 @@ public:
});
return instance;
}
+
+ // TODO(xy): support creator to create dynamic struct type
DataTypePtr get(const std::string& name) { return _data_type_map[name]; }
+ // TODO(xy): support creator to create dynamic struct type
const std::string& get(const DataTypePtr& data_type) const {
auto type_ptr = data_type->is_nullable()
?
((DataTypeNullable*)(data_type.get()))->get_nested_type()
diff --git a/be/src/vec/data_types/data_type_struct.cpp
b/be/src/vec/data_types/data_type_struct.cpp
index 91aff67a40..8fd2179bfa 100644
--- a/be/src/vec/data_types/data_type_struct.cpp
+++ b/be/src/vec/data_types/data_type_struct.cpp
@@ -43,16 +43,15 @@ DataTypeStruct::DataTypeStruct(const DataTypes& elems_)
}
}
-static std::optional<Exception> check_tuple_names(const Strings& names) {
+static Status check_tuple_names(const Strings& names) {
std::unordered_set<String> names_set;
for (const auto& name : names) {
if (name.empty()) {
- return Exception("Names of tuple elements cannot be empty",
ErrorCodes::BAD_ARGUMENTS);
+ return Status::InvalidArgument("Names of tuple elements cannot be
empty");
}
if (!names_set.insert(name).second) {
- return Exception("Names of tuple elements must be unique",
- ErrorCodes::DUPLICATE_COLUMN);
+ return Status::InvalidArgument("Names of tuple elements must be
unique");
}
}
@@ -63,13 +62,12 @@ DataTypeStruct::DataTypeStruct(const DataTypes& elems_,
const Strings& names_)
: elems(elems_), names(names_), have_explicit_names(true) {
size_t size = elems.size();
if (names.size() != size) {
- throw Exception("Wrong number of names passed to constructor of
DataTypeStruct",
- ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+ LOG(FATAL) << "Wrong number of names passed to constructor of
DataTypeStruct";
}
- if (auto exception = check_tuple_names(names)) {
- throw std::move(*exception);
- }
+ Status st = check_tuple_names(names);
+ //if (!st.ok()) {
+ //}
}
std::string DataTypeStruct::do_get_name() const {
@@ -114,8 +112,7 @@ static void add_element_safe(const DataTypes& elems,
IColumn& column, F&& impl)
if (element_column.size() != new_size) {
// This is not a logical error because it may work with
// user-supplied data.
- throw Exception("Cannot read a tuple because not all elements
are present",
-
ErrorCodes::SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH);
+ LOG(FATAL) << "Cannot read a tuple because not all elements
are present";
}
}
} catch (...) {
@@ -168,10 +165,11 @@ MutableColumnPtr DataTypeStruct::create_column() const {
// return ColumnStruct::create(std::move(tuple_columns));
// }
-// Field DataTypeStruct::get_default() const {
-// return Tuple(collections::map<Tuple>(
-// elems, [](const DataTypePtr& elem) { return
elem->get_default(); }));
-// }
+Field DataTypeStruct::get_default() const {
+ return Tuple();
+ //return Tuple(collections::map<Tuple>(
+ // elems, [](const DataTypePtr& elem) { return
elem->get_default(); }));
+}
void DataTypeStruct::insert_default_into(IColumn& column) const {
add_element_safe(elems, column, [&] {
@@ -210,8 +208,7 @@ size_t DataTypeStruct::get_position_by_name(const String&
name) const {
return i;
}
}
- throw Exception("Struct doesn't have element with name '" + name + "'",
- ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
+ LOG(FATAL) << "Struct doesn't have element with name '" + name + "'";
}
std::optional<size_t> DataTypeStruct::try_get_position_by_name(const String&
name) const {
@@ -229,7 +226,7 @@ String DataTypeStruct::get_name_by_position(size_t i) const
{
fmt::memory_buffer error_msg;
fmt::format_to(error_msg, "Index of tuple element ({}) if out range
([1, {}])", i,
names.size());
- throw Exception(fmt::to_string(error_msg), ErrorCodes::ILLEGAL_INDEX);
+ LOG(FATAL) << fmt::to_string(error_msg);
}
return names[i - 1];
diff --git a/be/src/vec/data_types/data_type_struct.h
b/be/src/vec/data_types/data_type_struct.h
index 4201583ef4..9405544b76 100644
--- a/be/src/vec/data_types/data_type_struct.h
+++ b/be/src/vec/data_types/data_type_struct.h
@@ -62,7 +62,7 @@ public:
std::string do_get_name() const override;
const char* get_family_name() const override { return "Struct"; }
- bool can_be_inside_nullable() const override { return false; }
+ bool can_be_inside_nullable() const override { return true; }
bool supports_sparse_serialization() const { return true; }
MutableColumnPtr create_column() const override;
@@ -90,16 +90,18 @@ public:
std::optional<size_t> try_get_position_by_name(const String& name) const;
String get_name_by_position(size_t i) const;
- int64_t get_uncompressed_serialized_bytes(const IColumn& column,
- int be_exec_version) const
override {
+ [[noreturn]] int64_t get_uncompressed_serialized_bytes(const IColumn&
column,
+ int
be_exec_version) const override {
LOG(FATAL) << "get_uncompressed_serialized_bytes not implemented";
}
- char* serialize(const IColumn& column, char* buf, int be_exec_version)
const override {
+ [[noreturn]] char* serialize(const IColumn& column, char* buf,
+ int be_exec_version) const override {
LOG(FATAL) << "serialize not implemented";
}
- const char* deserialize(const char* buf, IColumn* column, int
be_exec_version) const override {
+ [[noreturn]] const char* deserialize(const char* buf, IColumn* column,
+ int be_exec_version) const override {
LOG(FATAL) << "serialize not implemented";
}
diff --git a/be/src/vec/exprs/vcast_expr.cpp b/be/src/vec/exprs/vcast_expr.cpp
index 68f3b05cc1..6f90ede85b 100644
--- a/be/src/vec/exprs/vcast_expr.cpp
+++ b/be/src/vec/exprs/vcast_expr.cpp
@@ -38,6 +38,7 @@ doris::Status VCastExpr::prepare(doris::RuntimeState* state,
const doris::RowDes
// create a const string column
_target_data_type = _data_type;
+ // TODO(xy): support return struct type name
_target_data_type_name =
DataTypeFactory::instance().get(_target_data_type);
_cast_param_data_type = std::make_shared<DataTypeString>();
_cast_param = _cast_param_data_type->create_column_const(1,
_target_data_type_name);
diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
index 12af55f060..b78a63dbeb 100644
--- a/be/src/vec/exprs/vexpr.cpp
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -35,6 +35,7 @@
#include "vec/exprs/vliteral.h"
#include "vec/exprs/vruntimefilter_wrapper.h"
#include "vec/exprs/vslot_ref.h"
+#include "vec/exprs/vstruct_literal.h"
#include "vec/exprs/vtuple_is_null_predicate.h"
namespace doris::vectorized {
@@ -119,6 +120,10 @@ Status VExpr::create_expr(doris::ObjectPool* pool, const
doris::TExprNode& texpr
*expr = pool->add(new VArrayLiteral(texpr_node));
return Status::OK();
}
+ case TExprNodeType::STRUCT_LITERAL: {
+ *expr = pool->add(new VStructLiteral(texpr_node));
+ return Status::OK();
+ }
case doris::TExprNodeType::SLOT_REF: {
*expr = pool->add(new VSlotRef(texpr_node));
break;
diff --git a/be/src/vec/exprs/vstruct_literal.cpp
b/be/src/vec/exprs/vstruct_literal.cpp
new file mode 100644
index 0000000000..9ae647b481
--- /dev/null
+++ b/be/src/vec/exprs/vstruct_literal.cpp
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exprs/vstruct_literal.h"
+
+namespace doris::vectorized {
+
+Status VStructLiteral::prepare(RuntimeState* state, const RowDescriptor&
row_desc,
+ VExprContext* context) {
+ RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, row_desc, context));
+ Field struct_field = Tuple();
+ for (const auto child : _children) {
+ Field item;
+ ColumnPtrWrapper* const_col_wrapper = nullptr;
+ RETURN_IF_ERROR(child->get_const_col(context, &const_col_wrapper));
+ const_col_wrapper->column_ptr->get(0, item);
+ struct_field.get<Tuple>().push_back(item);
+ }
+ _column_ptr = _data_type->create_column_const(1, struct_field);
+ return Status::OK();
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/exprs/vstruct_literal.h
b/be/src/vec/exprs/vstruct_literal.h
new file mode 100644
index 0000000000..4cc4fe78d6
--- /dev/null
+++ b/be/src/vec/exprs/vstruct_literal.h
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "vec/exprs/vliteral.h"
+
+namespace doris {
+
+namespace vectorized {
+class VStructLiteral : public VLiteral {
+public:
+ VStructLiteral(const TExprNode& node) : VLiteral(node, false) {}
+ ~VStructLiteral() override = default;
+ Status prepare(RuntimeState* state, const RowDescriptor& row_desc,
+ VExprContext* context) override;
+};
+} // namespace vectorized
+
+} // namespace doris
diff --git a/be/src/vec/functions/function_cast.h
b/be/src/vec/functions/function_cast.h
index 90edd3906b..123b1ea788 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -1722,6 +1722,7 @@ protected:
LOG(FATAL) << fmt::format(
"Second argument to {} must be a constant string
describing type", get_name());
}
+ // TODO(xy): support return struct type for factory
auto type =
DataTypeFactory::instance().get(type_col->get_value<String>());
DCHECK(type != nullptr);
diff --git a/be/src/vec/olap/olap_data_convertor.cpp
b/be/src/vec/olap/olap_data_convertor.cpp
index e9b4c8fd24..0b3ad4ae03 100644
--- a/be/src/vec/olap/olap_data_convertor.cpp
+++ b/be/src/vec/olap/olap_data_convertor.cpp
@@ -20,8 +20,10 @@
#include "olap/tablet_schema.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_complex.h"
+#include "vec/columns/column_struct.h"
#include "vec/columns/column_vector.h"
#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_struct.h"
namespace doris::vectorized {
@@ -114,6 +116,14 @@
OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co
case FieldType::OLAP_FIELD_TYPE_DOUBLE: {
return
std::make_unique<OlapColumnDataConvertorSimple<vectorized::Float64>>();
}
+ case FieldType::OLAP_FIELD_TYPE_STRUCT: {
+ std::vector<OlapColumnDataConvertorBaseUPtr> sub_convertors;
+ for (uint32_t i = 0; i < column.get_subtype_count(); i++) {
+ const TabletColumn& sub_column = column.get_sub_column(i);
+
sub_convertors.emplace_back(create_olap_column_data_convertor(sub_column));
+ }
+ return std::make_unique<OlapColumnDataConvertorStruct>(sub_convertors);
+ }
case FieldType::OLAP_FIELD_TYPE_ARRAY: {
const auto& sub_column = column.get_sub_column(0);
return std::make_unique<OlapColumnDataConvertorArray>(
@@ -639,6 +649,58 @@ Status
OlapBlockDataConvertor::OlapColumnDataConvertorDecimal::convert_to_olap()
return Status::OK();
}
+void OlapBlockDataConvertor::OlapColumnDataConvertorStruct::set_source_column(
+ const ColumnWithTypeAndName& typed_column, size_t row_pos, size_t
num_rows) {
+
OlapBlockDataConvertor::OlapColumnDataConvertorBase::set_source_column(typed_column,
row_pos,
+
num_rows);
+}
+
+const void* OlapBlockDataConvertor::OlapColumnDataConvertorStruct::get_data()
const {
+ return _results[0];
+}
+
+const void* OlapBlockDataConvertor::OlapColumnDataConvertorStruct::get_data_at(
+ size_t offset) const {
+ // Todo(xy): struct not supported
+ return nullptr;
+}
+
+Status
OlapBlockDataConvertor::OlapColumnDataConvertorStruct::convert_to_olap() {
+ assert(_typed_column.column);
+ const vectorized::ColumnStruct* column_struct = nullptr;
+ const vectorized::DataTypeStruct* data_type_struct = nullptr;
+ if (_nullmap) {
+ auto nullable_column =
+ assert_cast<const
vectorized::ColumnNullable*>(_typed_column.column.get());
+ column_struct = assert_cast<const vectorized::ColumnStruct*>(
+ nullable_column->get_nested_column_ptr().get());
+ data_type_struct = assert_cast<const DataTypeStruct*>(
+ (assert_cast<const
DataTypeNullable*>(_typed_column.type.get())->get_nested_type())
+ .get());
+ } else {
+ column_struct = assert_cast<const
vectorized::ColumnStruct*>(_typed_column.column.get());
+ data_type_struct = assert_cast<const
DataTypeStruct*>(_typed_column.type.get());
+ }
+ assert(column_struct);
+ assert(data_type_struct);
+
+ size_t data_size = column_struct->tuple_size();
+ size_t data_cursor = 0;
+ size_t null_map_cursor = data_cursor + data_size;
+ for (size_t i = 0; i < data_size; i++) {
+ ColumnPtr sub_column = column_struct->get_column_ptr(i);
+ DataTypePtr sub_type = data_type_struct->get_element(i);
+ ColumnWithTypeAndName sub_typed_column = {sub_column, sub_type, ""};
+ _sub_convertors[i]->set_source_column(sub_typed_column, _row_pos,
_num_rows);
+ _sub_convertors[i]->convert_to_olap();
+ _results[data_cursor] = _sub_convertors[i]->get_data();
+ _results[null_map_cursor] = _sub_convertors[i]->get_nullmap();
+ data_cursor++;
+ null_map_cursor++;
+ }
+ return Status::OK();
+}
+
Status OlapBlockDataConvertor::OlapColumnDataConvertorArray::convert_to_olap()
{
const ColumnArray* column_array = nullptr;
const DataTypeArray* data_type_array = nullptr;
diff --git a/be/src/vec/olap/olap_data_convertor.h
b/be/src/vec/olap/olap_data_convertor.h
index 6898b44a9d..aafbe8d3e3 100644
--- a/be/src/vec/olap/olap_data_convertor.h
+++ b/be/src/vec/olap/olap_data_convertor.h
@@ -359,6 +359,27 @@ private:
}
};
+ class OlapColumnDataConvertorStruct : public OlapColumnDataConvertorBase {
+ public:
+ OlapColumnDataConvertorStruct(
+ std::vector<OlapColumnDataConvertorBaseUPtr>& sub_convertors) {
+ for (auto& sub_convertor : sub_convertors) {
+ _sub_convertors.push_back(std::move(sub_convertor));
+ }
+ size_t allocate_size = _sub_convertors.size() * 2;
+ _results.resize(allocate_size);
+ }
+ void set_source_column(const ColumnWithTypeAndName& typed_column,
size_t row_pos,
+ size_t num_rows) override;
+ const void* get_data() const override;
+ const void* get_data_at(size_t offset) const override;
+ Status convert_to_olap() override;
+
+ private:
+ std::vector<OlapColumnDataConvertorBaseUPtr> _sub_convertors;
+ std::vector<const void*> _results;
+ };
+
class OlapColumnDataConvertorArray
: public OlapColumnDataConvertorPaddedPODArray<CollectionValue> {
public:
diff --git a/be/src/vec/sink/vmysql_result_writer.cpp
b/be/src/vec/sink/vmysql_result_writer.cpp
index 4c7f5028d7..2accb4dd03 100644
--- a/be/src/vec/sink/vmysql_result_writer.cpp
+++ b/be/src/vec/sink/vmysql_result_writer.cpp
@@ -29,6 +29,7 @@
#include "vec/common/assert_cast.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_struct.h"
#include "vec/exprs/vexpr.h"
#include "vec/exprs/vexpr_context.h"
#include "vec/runtime/vdatetime_value.h"
@@ -61,8 +62,8 @@ void VMysqlResultWriter::_init_profile() {
template <PrimitiveType type, bool is_nullable>
Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr,
- std::unique_ptr<TFetchDataResult>&
result,
- const DataTypePtr& nested_type_ptr,
int scale) {
+ std::unique_ptr<TFetchDataResult>&
result, int scale,
+ const DataTypes& sub_types) {
SCOPED_TIMER(_convert_tuple_timer);
const auto row_size = column_ptr->size();
@@ -145,6 +146,7 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr&
column_ptr,
result->result_batch.rows[i].append(_buffer.buf(),
_buffer.length());
}
} else if constexpr (type == TYPE_ARRAY) {
+ DCHECK_EQ(sub_types.size(), 1);
auto& column_array = assert_cast<const ColumnArray&>(*column);
auto& offsets = column_array.get_offsets();
for (ssize_t i = 0; i < row_size; ++i) {
@@ -172,12 +174,12 @@ Status VMysqlResultWriter::_add_one_column(const
ColumnPtr& column_ptr,
if (data->is_null_at(j)) {
buf_ret = _buffer.push_string("NULL", strlen("NULL"));
} else {
- if
(WhichDataType(remove_nullable(nested_type_ptr)).is_string()) {
+ if
(WhichDataType(remove_nullable(sub_types[0])).is_string()) {
buf_ret = _buffer.push_string("'", 1);
- buf_ret = _add_one_cell(data, j, nested_type_ptr,
_buffer);
+ buf_ret = _add_one_cell(data, j, sub_types[0],
_buffer);
buf_ret = _buffer.push_string("'", 1);
} else {
- buf_ret = _add_one_cell(data, j, nested_type_ptr,
_buffer);
+ buf_ret = _add_one_cell(data, j, sub_types[0],
_buffer);
}
}
begin = false;
@@ -186,8 +188,51 @@ Status VMysqlResultWriter::_add_one_column(const
ColumnPtr& column_ptr,
_buffer.close_dynamic_mode();
result->result_batch.rows[i].append(_buffer.buf(),
_buffer.length());
}
+ } else if constexpr (type == TYPE_STRUCT) {
+ DCHECK_GE(sub_types.size(), 1);
+ auto& column_struct = assert_cast<const ColumnStruct&>(*column);
+ for (ssize_t i = 0; i < row_size; ++i) {
+ if (0 != buf_ret) {
+ return Status::InternalError("pack mysql buffer failed.");
+ }
+ _buffer.reset();
+
+ if constexpr (is_nullable) {
+ if (column_ptr->is_null_at(i)) {
+ buf_ret = _buffer.push_null();
+ result->result_batch.rows[i].append(_buffer.buf(),
_buffer.length());
+ continue;
+ }
+ }
+
+ _buffer.open_dynamic_mode();
+ buf_ret = _buffer.push_string("{", 1);
+ bool begin = true;
+ for (size_t j = 0; j < sub_types.size(); ++j) {
+ if (!begin) {
+ buf_ret = _buffer.push_string(", ", 2);
+ }
+ const auto& data = column_struct.get_column_ptr(j);
+ if (data->is_null_at(j)) {
+ buf_ret = _buffer.push_string("NULL", strlen("NULL"));
+ } else {
+ if
(WhichDataType(remove_nullable(sub_types[j])).is_string()) {
+ buf_ret = _buffer.push_string("'", 1);
+ buf_ret = _add_one_cell(data, j, sub_types[j],
_buffer);
+ buf_ret = _buffer.push_string("'", 1);
+ } else {
+ buf_ret = _add_one_cell(data, j, sub_types[j],
_buffer);
+ }
+ }
+ begin = false;
+ }
+ buf_ret = _buffer.push_string("}", 1);
+ _buffer.close_dynamic_mode();
+ result->result_batch.rows[i].append(_buffer.buf(),
_buffer.length());
+ }
} else if constexpr (type == TYPE_DECIMAL32 || type == TYPE_DECIMAL64 ||
type == TYPE_DECIMAL128I) {
+ DCHECK_EQ(sub_types.size(), 1);
for (int i = 0; i < row_size; ++i) {
if (0 != buf_ret) {
return Status::InternalError("pack mysql buffer failed.");
@@ -201,7 +246,7 @@ Status VMysqlResultWriter::_add_one_column(const ColumnPtr&
column_ptr,
continue;
}
}
- std::string decimal_str = nested_type_ptr->to_string(*column, i);
+ std::string decimal_str = sub_types[0]->to_string(*column, i);
buf_ret = _buffer.push_string(decimal_str.c_str(),
decimal_str.length());
result->result_batch.rows[i].append(_buffer.buf(),
_buffer.length());
}
@@ -397,6 +442,7 @@ int VMysqlResultWriter::_add_one_cell(const ColumnPtr&
column_ptr, size_t row_id
auto decimal_str = assert_cast<const
DataTypeDecimal<Decimal128I>*>(nested_type.get())
->to_string(*column, row_idx);
return buffer.push_string(decimal_str.c_str(), decimal_str.length());
+ // TODO(xy): support nested struct
} else if (which.is_array()) {
auto& column_array = assert_cast<const ColumnArray&>(*column);
auto& offsets = column_array.get_offsets();
@@ -554,10 +600,10 @@ Status VMysqlResultWriter::append_block(Block&
input_block) {
auto& nested_type =
assert_cast<const
DataTypeNullable&>(*type_ptr).get_nested_type();
status = _add_one_column<PrimitiveType::TYPE_DECIMALV2,
true>(column_ptr, result,
-
nested_type, scale);
+
scale, {nested_type});
} else {
status = _add_one_column<PrimitiveType::TYPE_DECIMALV2,
false>(column_ptr, result,
-
type_ptr, scale);
+
scale, {type_ptr});
}
break;
}
@@ -566,10 +612,10 @@ Status VMysqlResultWriter::append_block(Block&
input_block) {
auto& nested_type =
assert_cast<const
DataTypeNullable&>(*type_ptr).get_nested_type();
status = _add_one_column<PrimitiveType::TYPE_DECIMAL32,
true>(column_ptr, result,
-
nested_type, scale);
+
scale, {nested_type});
} else {
status = _add_one_column<PrimitiveType::TYPE_DECIMAL32,
false>(column_ptr, result,
-
type_ptr, scale);
+
scale, {type_ptr});
}
break;
}
@@ -578,10 +624,10 @@ Status VMysqlResultWriter::append_block(Block&
input_block) {
auto& nested_type =
assert_cast<const
DataTypeNullable&>(*type_ptr).get_nested_type();
status = _add_one_column<PrimitiveType::TYPE_DECIMAL64,
true>(column_ptr, result,
-
nested_type, scale);
+
scale, {nested_type});
} else {
status = _add_one_column<PrimitiveType::TYPE_DECIMAL64,
false>(column_ptr, result,
-
type_ptr, scale);
+
scale, {type_ptr});
}
break;
}
@@ -589,11 +635,11 @@ Status VMysqlResultWriter::append_block(Block&
input_block) {
if (type_ptr->is_nullable()) {
auto& nested_type =
assert_cast<const
DataTypeNullable&>(*type_ptr).get_nested_type();
- status = _add_one_column<PrimitiveType::TYPE_DECIMAL128I,
true>(column_ptr, result,
-
nested_type, scale);
+ status = _add_one_column<PrimitiveType::TYPE_DECIMAL128I,
true>(
+ column_ptr, result, scale, {nested_type});
} else {
status = _add_one_column<PrimitiveType::TYPE_DECIMAL128I,
false>(column_ptr, result,
-
type_ptr, scale);
+
scale, {type_ptr});
}
break;
}
@@ -625,10 +671,10 @@ Status VMysqlResultWriter::append_block(Block&
input_block) {
case TYPE_DATETIMEV2: {
if (type_ptr->is_nullable()) {
status = _add_one_column<PrimitiveType::TYPE_DATETIMEV2,
true>(column_ptr, result,
-
nullptr, scale);
+
scale);
} else {
status = _add_one_column<PrimitiveType::TYPE_DATETIMEV2,
false>(column_ptr, result,
-
nullptr, scale);
+
scale);
}
break;
}
@@ -646,12 +692,26 @@ Status VMysqlResultWriter::append_block(Block&
input_block) {
auto& nested_type =
assert_cast<const
DataTypeNullable&>(*type_ptr).get_nested_type();
auto& sub_type = assert_cast<const
DataTypeArray&>(*nested_type).get_nested_type();
- status = _add_one_column<PrimitiveType::TYPE_ARRAY,
true>(column_ptr, result,
-
sub_type);
+ status = _add_one_column<PrimitiveType::TYPE_ARRAY,
true>(column_ptr, result, scale,
+
{sub_type});
} else {
auto& sub_type = assert_cast<const
DataTypeArray&>(*type_ptr).get_nested_type();
status = _add_one_column<PrimitiveType::TYPE_ARRAY,
false>(column_ptr, result,
-
sub_type);
+
scale, {sub_type});
+ }
+ break;
+ }
+ case TYPE_STRUCT: {
+ if (type_ptr->is_nullable()) {
+ auto& nested_type =
+ assert_cast<const
DataTypeNullable&>(*type_ptr).get_nested_type();
+ auto& sub_types = assert_cast<const
DataTypeStruct&>(*nested_type).get_elements();
+ status = _add_one_column<PrimitiveType::TYPE_STRUCT,
true>(column_ptr, result,
+
scale, sub_types);
+ } else {
+ auto& sub_types = assert_cast<const
DataTypeStruct&>(*type_ptr).get_elements();
+ status = _add_one_column<PrimitiveType::TYPE_STRUCT,
false>(column_ptr, result,
+
scale, sub_types);
}
break;
}
diff --git a/be/src/vec/sink/vmysql_result_writer.h
b/be/src/vec/sink/vmysql_result_writer.h
index 3f79f0e2d6..db61dcbf41 100644
--- a/be/src/vec/sink/vmysql_result_writer.h
+++ b/be/src/vec/sink/vmysql_result_writer.h
@@ -49,7 +49,7 @@ private:
template <PrimitiveType type, bool is_nullable>
Status _add_one_column(const ColumnPtr& column_ptr,
std::unique_ptr<TFetchDataResult>& result,
- const DataTypePtr& nested_type_ptr = nullptr, int
scale = -1);
+ int scale = -1, const DataTypes& sub_types =
DataTypes());
int _add_one_cell(const ColumnPtr& column_ptr, size_t row_idx, const
DataTypePtr& type,
MysqlRowBuffer& buffer);
diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp
index f042ce50b4..5549c33641 100644
--- a/be/src/vec/sink/vtablet_sink.cpp
+++ b/be/src/vec/sink/vtablet_sink.cpp
@@ -1401,11 +1401,12 @@ Status VOlapTableSink::_validate_column(RuntimeState*
state, const TypeDescripto
}
fmt::format_to(error_prefix, "ARRAY type failed: ");
RETURN_IF_ERROR(_validate_column(
- state, nested_type, nested_type.contains_null,
column_array->get_data_ptr(),
+ state, nested_type, nested_type.contains_nulls[0],
column_array->get_data_ptr(),
slot_index, filter_bitmap, stop_processing, error_prefix,
&permutation));
}
break;
}
+ // TODO(xy): add struct type validate
default:
break;
}
diff --git a/fe/fe-core/src/main/cup/sql_parser.cup
b/fe/fe-core/src/main/cup/sql_parser.cup
index b6bba6a5d7..bfc1aa3706 100644
--- a/fe/fe-core/src/main/cup/sql_parser.cup
+++ b/fe/fe-core/src/main/cup/sql_parser.cup
@@ -606,7 +606,7 @@ terminal String
KW_MTMV,
KW_TYPECAST;
-terminal COMMA, COLON, DOT, DOTDOTDOT, AT, STAR, LPAREN, RPAREN, SEMICOLON,
LBRACKET, RBRACKET, DIVIDE, MOD, ADD, SUBTRACT;
+terminal COMMA, COLON, DOT, DOTDOTDOT, AT, STAR, LPAREN, RPAREN, SEMICOLON,
LBRACKET, RBRACKET, LBRACE, RBRACE, DIVIDE, MOD, ADD, SUBTRACT;
terminal BITAND, BITOR, BITXOR, BITNOT;
terminal EQUAL, NOT, LESSTHAN, GREATERTHAN, SET_VAR;
terminal COMMENTED_PLAN_HINT_START, COMMENTED_PLAN_HINT_END;
@@ -716,6 +716,7 @@ nonterminal Expr function_call_expr, array_expr;
nonterminal ArrayLiteral array_literal;
nonterminal StructField struct_field;
nonterminal ArrayList<StructField> struct_field_list;
+nonterminal StructLiteral struct_literal;
nonterminal AnalyticWindow opt_window_clause;
nonterminal AnalyticWindow.Type window_type;
nonterminal AnalyticWindow.Boundary window_boundary;
@@ -914,6 +915,7 @@ precedence left KW_PARTITION;
precedence left KW_PARTITIONS;
precedence right KW_TEMPORARY;
precedence right LBRACKET;
+precedence right LBRACE;
precedence left KW_ENGINE;
// unused
@@ -5791,16 +5793,23 @@ struct_field ::=
;
struct_field_list ::=
- struct_field:field
- {:
- RESULT = Lists.newArrayList(field);
- :}
- | struct_field_list:fields COMMA struct_field:field
- {:
- fields.add(field);
- RESULT = fields;
- :}
- ;
+ struct_field:field
+ {:
+ RESULT = Lists.newArrayList(field);
+ :}
+ | struct_field_list:fields COMMA struct_field:field
+ {:
+ fields.add(field);
+ RESULT = fields;
+ :}
+ ;
+
+struct_literal ::=
+ LBRACE expr_list:list RBRACE
+ {:
+ RESULT = new StructLiteral(list.toArray(new LiteralExpr[0]));
+ :}
+ ;
exists_predicate ::=
KW_EXISTS subquery:s
@@ -5824,6 +5833,8 @@ non_pred_expr ::=
{: RESULT = a; :}
| array_literal:a
{: RESULT = a; :}
+ | struct_literal:s
+ {: RESULT = s; :}
| function_call_expr:e
{: RESULT = e; :}
| KW_DATE STRING_LITERAL:l
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java
index 36b556c623..2610aa6bd9 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java
@@ -292,6 +292,10 @@ public class ColumnDef {
}
if (type.getPrimitiveType() == PrimitiveType.STRUCT) {
+ if (isKey()) {
+ throw new AnalysisException("Struct can only be used in the
non-key column of"
+ + " the duplicate table at present.");
+ }
if (defaultValue.isSet && defaultValue !=
DefaultValue.NULL_DEFAULT_VALUE) {
throw new AnalysisException("Struct type column default value
just support null");
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java
index 1f43193035..b59ffdf031 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java
@@ -336,7 +336,7 @@ public class CreateTableStmt extends DdlStmt {
// So the float and double could not be the first column
in OLAP table.
if (keysColumnNames.isEmpty()) {
throw new AnalysisException("The olap table first
column could not be float, double, string"
- + " or array, please use decimal or varchar
instead.");
+ + " or array, struct, map, please use decimal
or varchar instead.");
}
keysDesc = new KeysDesc(KeysType.DUP_KEYS,
keysColumnNames);
}
@@ -390,14 +390,14 @@ public class CreateTableStmt extends DdlStmt {
for (ColumnDef columnDef : columnDefs) {
columnDef.analyze(engineName.equals("olap"));
- if (columnDef.getType().isArrayType()) {
+ if (columnDef.getType().isComplexType()) {
if (columnDef.getAggregateType() != null &&
columnDef.getAggregateType() != AggregateType.NONE) {
- throw new AnalysisException("Array column can't support
aggregation "
- + columnDef.getAggregateType());
+ throw new
AnalysisException(columnDef.getType().getPrimitiveType()
+ + " column can't support aggregation " +
columnDef.getAggregateType());
}
if (columnDef.isKey()) {
- throw new AnalysisException("Array can only be used in the
non-key column of"
- + " the duplicate table at present.");
+ throw new
AnalysisException(columnDef.getType().getPrimitiveType()
+ + " can only be used in the non-key column of the
duplicate table at present.");
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java
index 79df8f3395..41183119b0 100755
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java
@@ -1782,7 +1782,8 @@ public abstract class Expr extends TreeNode<Expr>
implements ParseNode, Cloneabl
ARRAY_LITERAL(13),
CAST_EXPR(14),
JSON_LITERAL(15),
- ARITHMETIC_EXPR(16);
+ ARITHMETIC_EXPR(16),
+ STRUCT_LITERAL(17);
private static Map<Integer, ExprSerCode> codeMap = Maps.newHashMap();
@@ -1834,6 +1835,8 @@ public abstract class Expr extends TreeNode<Expr>
implements ParseNode, Cloneabl
output.writeInt(ExprSerCode.FUNCTION_CALL.getCode());
} else if (expr instanceof ArrayLiteral) {
output.writeInt(ExprSerCode.ARRAY_LITERAL.getCode());
+ } else if (expr instanceof StructLiteral) {
+ output.writeInt(ExprSerCode.STRUCT_LITERAL.getCode());
} else if (expr instanceof CastExpr) {
output.writeInt(ExprSerCode.CAST_EXPR.getCode());
} else if (expr instanceof ArithmeticExpr) {
@@ -1883,6 +1886,8 @@ public abstract class Expr extends TreeNode<Expr>
implements ParseNode, Cloneabl
return FunctionCallExpr.read(in);
case ARRAY_LITERAL:
return ArrayLiteral.read(in);
+ case STRUCT_LITERAL:
+ return StructLiteral.read(in);
case CAST_EXPR:
return CastExpr.read(in);
case ARITHMETIC_EXPR:
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java
new file mode 100644
index 0000000000..a9209f1c5a
--- /dev/null
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.analysis;
+
+import org.apache.doris.catalog.StructField;
+import org.apache.doris.catalog.StructType;
+import org.apache.doris.catalog.Type;
+import org.apache.doris.common.AnalysisException;
+import org.apache.doris.thrift.TExprNode;
+import org.apache.doris.thrift.TExprNodeType;
+
+import org.apache.commons.lang.StringUtils;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class StructLiteral extends LiteralExpr {
+ // only for persist
+ public StructLiteral() {
+ type = new StructType();
+ children = new ArrayList<>();
+ }
+
+ public StructLiteral(LiteralExpr... exprs) throws AnalysisException {
+ type = new StructType();
+ children = new ArrayList<>();
+ for (LiteralExpr expr : exprs) {
+ if (!type.supportSubType(expr.getType())) {
+ throw new AnalysisException("Invalid element type in STRUCT.");
+ }
+ ((StructType) type).addField(new StructField(expr.getType()));
+ children.add(expr);
+ }
+ }
+
+ protected StructLiteral(StructLiteral other) {
+ super(other);
+ }
+
+ @Override
+ protected String toSqlImpl() {
+ List<String> list = new ArrayList<>(children.size());
+ children.forEach(v -> list.add(v.toDigestImpl()));
+ return "STRUCT(" + StringUtils.join(list, ", ") + ")";
+ }
+
+ @Override
+ public String toDigestImpl() {
+ List<String> list = new ArrayList<>(children.size());
+ children.forEach(v -> list.add(v.toDigestImpl()));
+ return "STRUCT(" + StringUtils.join(list, ", ") + ")";
+ }
+
+ @Override
+ public String getStringValue() {
+ List<String> list = new ArrayList<>(children.size());
+ children.forEach(v -> list.add(v.getStringValue()));
+ return "{" + StringUtils.join(list, ", ") + "}";
+ }
+
+ @Override
+ public String getStringValueForArray() {
+ return null;
+ }
+
+ @Override
+ protected void toThrift(TExprNode msg) {
+ msg.node_type = TExprNodeType.STRUCT_LITERAL;
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ super.write(out);
+ out.writeInt(children.size());
+ for (Expr e : children) {
+ Expr.writeTo(e, out);
+ }
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ super.readFields(in);
+ int size = in.readInt();
+ children = new ArrayList<>(size);
+ for (int i = 0; i < size; i++) {
+ children.add(Expr.readIn(in));
+ }
+ }
+
+ public static StructLiteral read(DataInput in) throws IOException {
+ StructLiteral literal = new StructLiteral();
+ literal.readFields(in);
+ return literal;
+ }
+
+ @Override
+ public Expr clone() {
+ return new StructLiteral(this);
+ }
+
+ @Override
+ public boolean isMinValue() {
+ return false;
+ }
+
+ @Override
+ public int compareLiteral(LiteralExpr expr) {
+ return 0;
+ }
+
+ @Override
+ public Expr uncheckedCastTo(Type targetType) throws AnalysisException {
+ if (!targetType.isStructType()) {
+ return super.uncheckedCastTo(targetType);
+ }
+ ArrayList<StructField> fields = ((StructType) targetType).getFields();
+ StructLiteral literal = new StructLiteral(this);
+ for (int i = 0; i < children.size(); ++ i) {
+ Expr child = children.get(i);
+ literal.children.set(i,
child.uncheckedCastTo((fields.get(i).getType())));
+ }
+ literal.setType(targetType);
+ return literal;
+ }
+
+ @Override
+ public void checkValueValid() throws AnalysisException {
+ for (Expr e : children) {
+ e.checkValueValid();
+ }
+ }
+}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Subquery.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/Subquery.java
index 4246edb5e4..c318e067a0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Subquery.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Subquery.java
@@ -172,7 +172,7 @@ public class Subquery extends Expr {
fieldName = "_" + Integer.toString(i);
}
Preconditions.checkNotNull(fieldName);
- structFields.add(new StructField(fieldName, expr.getType(), null));
+ structFields.add(new StructField(fieldName, expr.getType()));
}
Preconditions.checkState(structFields.size() != 0);
return new StructType(structFields);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java
index df00ef62b5..5690c115c1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java
@@ -30,6 +30,8 @@ import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
/**
* Represents an anonymous type definition, e.g., used in DDL and CASTs.
@@ -94,45 +96,46 @@ public class TypeDef implements ParseNode {
if (type.isArrayType()) {
Type itemType = ((ArrayType) type).getItemType();
if (itemType instanceof ScalarType) {
- analyzeNestedType((ScalarType) itemType);
+ analyzeNestedType(type, (ScalarType) itemType);
}
}
if (type.isMapType()) {
ScalarType keyType = (ScalarType) ((MapType)
type).getKeyType();
ScalarType valueType = (ScalarType) ((MapType)
type).getKeyType();
- analyzeNestedType(keyType);
- analyzeNestedType(valueType);
+ analyzeNestedType(type, keyType);
+ analyzeNestedType(type, valueType);
}
if (type.isStructType()) {
ArrayList<StructField> fields = ((StructType)
type).getFields();
- for (int i = 0; i < fields.size(); i++) {
- ScalarType filedType = (ScalarType)
fields.get(i).getType();
- analyzeNestedType(filedType);
+ Set<String> fieldNames = new HashSet<>();
+ for (StructField field : fields) {
+ Type fieldType = field.getType();
+ if (fieldType instanceof ScalarType) {
+ analyzeNestedType(type, (ScalarType) fieldType);
+ if (!fieldNames.add(field.getName())) {
+ throw new AnalysisException("Duplicate field name "
+ + field.getName() + " in struct " +
type.toSql());
+ }
+ }
}
}
}
}
- private void analyzeNestedType(ScalarType type) throws AnalysisException {
- if (type.isNull()) {
- throw new AnalysisException("Unsupported data type: " +
type.toSql());
- }
- // check whether the array sub-type is supported
- Boolean isSupportType = false;
- for (Type subType : Type.getArraySubTypes()) {
- if (type.getPrimitiveType() == subType.getPrimitiveType()) {
- isSupportType = true;
- break;
- }
+ private void analyzeNestedType(Type parent, ScalarType child) throws
AnalysisException {
+ if (child.isNull()) {
+ throw new AnalysisException("Unsupported data type: " +
child.toSql());
}
- if (!isSupportType) {
- throw new AnalysisException("Array unsupported sub-type: " +
type.toSql());
+ // check whether the sub-type is supported
+ if (!parent.supportSubType(child)) {
+ throw new AnalysisException(
+ parent.getPrimitiveType() + "unsupported sub-type: " +
child.toSql());
}
- if (type.getPrimitiveType().isStringType() && !type.isLengthSet()) {
- type.setLength(1);
+ if (child.getPrimitiveType().isStringType() && !child.isLengthSet()) {
+ child.setLength(1);
}
- analyze(type);
+ analyze(child);
}
private void analyzeScalarType(ScalarType scalarType)
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java
index 45d01019d1..8aa6acb31a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java
@@ -159,6 +159,16 @@ public class ArrayType extends Type {
return !itemType.isNull();
}
+ @Override
+ public boolean supportSubType(Type subType) {
+ for (Type supportedType : getArraySubTypes()) {
+ if (subType.getPrimitiveType() ==
supportedType.getPrimitiveType()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
@Override
public String toString() {
return toSql(0).toUpperCase();
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
index cd7cde3cbc..0075da1d13 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java
@@ -110,7 +110,7 @@ public class Column implements Writable,
GsonPostProcessable {
this.stats = new ColumnStats();
this.visible = true;
this.defineExpr = null;
- this.children = new ArrayList<>(Type.MAX_NESTING_DEPTH);
+ this.children = new ArrayList<>();
this.uniqueId = -1;
}
@@ -159,7 +159,7 @@ public class Column implements Writable,
GsonPostProcessable {
this.comment = comment;
this.stats = new ColumnStats();
this.visible = visible;
- this.children = new ArrayList<>(Type.MAX_NESTING_DEPTH);
+ this.children = new ArrayList<>();
createChildrenColumn(this.type, this);
this.uniqueId = colUniqueId;
}
@@ -186,6 +186,13 @@ public class Column implements Writable,
GsonPostProcessable {
Column c = new Column(COLUMN_ARRAY_CHILDREN, ((ArrayType)
type).getItemType());
c.setIsAllowNull(((ArrayType) type).getContainsNull());
column.addChildrenColumn(c);
+ } else if (type.isStructType()) {
+ ArrayList<StructField> fields = ((StructType) type).getFields();
+ for (StructField field : fields) {
+ Column c = new Column(field.getName(), field.getType());
+ c.setIsAllowNull(field.getContainsNull());
+ column.addChildrenColumn(c);
+ }
}
}
@@ -396,34 +403,41 @@ public class Column implements Writable,
GsonPostProcessable {
return tColumn;
}
+ private void setChildrenTColumn(Column children, TColumn tColumn) {
+ TColumn childrenTColumn = new TColumn();
+ childrenTColumn.setColumnName(children.name);
+
+ TColumnType childrenTColumnType = new TColumnType();
+ childrenTColumnType.setType(children.getDataType().toThrift());
+ childrenTColumnType.setLen(children.getStrLen());
+ childrenTColumnType.setPrecision(children.getPrecision());
+ childrenTColumnType.setScale(children.getScale());
+ childrenTColumnType.setIndexLen(children.getOlapColumnIndexSize());
+
+ childrenTColumn.setColumnType(childrenTColumnType);
+ childrenTColumn.setIsAllowNull(children.isAllowNull());
+ // TODO: If we don't set the aggregate type for children, the type
will be
+ // considered as TAggregationType::SUM after deserializing in BE.
+ // For now, we make children inherit the aggregate type from their
parent.
+ if (tColumn.getAggregationType() != null) {
+ childrenTColumn.setAggregationType(tColumn.getAggregationType());
+ }
+
+ tColumn.children_column.add(childrenTColumn);
+ toChildrenThrift(children, childrenTColumn);
+ }
+
private void toChildrenThrift(Column column, TColumn tColumn) {
if (column.type.isArrayType()) {
Column children = column.getChildren().get(0);
-
- TColumn childrenTColumn = new TColumn();
- childrenTColumn.setColumnName(children.name);
-
- TColumnType childrenTColumnType = new TColumnType();
- childrenTColumnType.setType(children.getDataType().toThrift());
- childrenTColumnType.setType(children.getDataType().toThrift());
- childrenTColumnType.setLen(children.getStrLen());
- childrenTColumnType.setPrecision(children.getPrecision());
- childrenTColumnType.setScale(children.getScale());
-
- childrenTColumnType.setIndexLen(children.getOlapColumnIndexSize());
- childrenTColumn.setColumnType(childrenTColumnType);
- childrenTColumn.setIsAllowNull(children.isAllowNull());
- // TODO: If we don't set the aggregate type for children, the type
will be
- // considered as TAggregationType::SUM after deserializing in BE.
- // For now, we make children inherit the aggregate type from
their parent.
- if (tColumn.getAggregationType() != null) {
-
childrenTColumn.setAggregationType(tColumn.getAggregationType());
- }
-
tColumn.setChildrenColumn(new ArrayList<>());
- tColumn.children_column.add(childrenTColumn);
-
- toChildrenThrift(children, childrenTColumn);
+ setChildrenTColumn(children, tColumn);
+ } else if (column.type.isStructType()) {
+ List<Column> childrenColumns = column.getChildren();
+ tColumn.setChildrenColumn(new ArrayList<>());
+ for (Column children : childrenColumns) {
+ setChildrenTColumn(children, tColumn);
+ }
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java
index 1a746a2374..adf74f5c6a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java
@@ -88,6 +88,11 @@ public class MapType extends Type {
return String.format("%sMAP<%s,%s>", leftPadding, keyType.toSql(),
structStr);
}
+ @Override
+ public boolean supportSubType(Type subType) {
+ return true;
+ }
+
@Override
public void toThrift(TTypeDesc container) {
TTypeNode node = new TTypeNode();
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java
index 8ae8c76ed5..b4ad2a27f3 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java
@@ -67,7 +67,10 @@ public enum PrimitiveType {
// sizeof(CollectionValue)
ARRAY("ARRAY", 32, TPrimitiveType.ARRAY),
MAP("MAP", 24, TPrimitiveType.MAP),
- STRUCT("STRUCT", 24, TPrimitiveType.STRUCT),
+ // sizeof(StructValue)
+ // 8-byte pointer and 4-byte size and 1 bytes has_null (13 bytes total)
+ // Aligning to 16 bytes total.
+ STRUCT("STRUCT", 16, TPrimitiveType.STRUCT),
STRING("STRING", 16, TPrimitiveType.STRING),
// Unsupported scalar types.
BINARY("BINARY", -1, TPrimitiveType.BINARY),
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/StructField.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/StructField.java
index 6e87b1be40..a084bfe0e4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/StructField.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/StructField.java
@@ -22,25 +22,43 @@ import org.apache.doris.thrift.TTypeDesc;
import org.apache.doris.thrift.TTypeNode;
import com.google.common.base.Strings;
+import com.google.gson.annotations.SerializedName;
/**
* TODO: Support comments for struct fields. The Metastore does not properly
store
* comments of struct fields. We set comment to null to avoid compatibility
issues.
*/
public class StructField {
+ @SerializedName(value = "name")
protected final String name;
+
+ @SerializedName(value = "type")
protected final Type type;
+
+ @SerializedName(value = "comment")
protected final String comment;
+
+ @SerializedName(value = "position")
protected int position; // in struct
- public StructField(String name, Type type, String comment) {
- this.name = name;
+ @SerializedName(value = "containsNull")
+ private final boolean containsNull; // Now always true (nullable field)
+
+ private static final String DEFAULT_FIELD_NAME = "col";
+
+ public StructField(String name, Type type, String comment, boolean
containsNull) {
+ this.name = name.toLowerCase();
this.type = type;
this.comment = comment;
+ this.containsNull = containsNull;
}
public StructField(String name, Type type) {
- this(name, type, null);
+ this(name, type, null, true);
+ }
+
+ public StructField(Type type) {
+ this(DEFAULT_FIELD_NAME, type, null, true);
}
public String getComment() {
@@ -63,11 +81,20 @@ public class StructField {
this.position = position;
}
+ public boolean getContainsNull() {
+ return containsNull;
+ }
+
public String toSql(int depth) {
- String typeSql = (depth < Type.MAX_NESTING_DEPTH) ? type.toSql(depth)
: "...";
+ String typeSql;
+ if (depth < Type.MAX_NESTING_DEPTH) {
+ typeSql = !containsNull ? "not_null(" + type.toSql(depth) + ")" :
type.toSql(depth);
+ } else {
+ typeSql = "...";
+ }
StringBuilder sb = new StringBuilder(name);
if (type != null) {
- sb.append(":" + typeSql);
+ sb.append(":").append(typeSql);
}
if (comment != null) {
sb.append(String.format(" COMMENT '%s'", comment));
@@ -87,7 +114,7 @@ public class StructField {
// even if we then strip the top-level padding.
String typeStr = type.prettyPrint(lpad);
typeStr = typeStr.substring(lpad);
- sb.append(":" + typeStr);
+ sb.append(":").append(typeStr);
}
if (comment != null) {
sb.append(String.format(" COMMENT '%s'", comment));
@@ -95,12 +122,25 @@ public class StructField {
return sb.toString();
}
+ public static boolean canCastTo(StructField field, StructField
targetField) {
+ // TODO(xy): support cast field
+ return false;
+ }
+
+ public boolean matchesField(StructField f) {
+ if (equals(f)) {
+ return true;
+ }
+ return type.matchesType(f.getType()) && containsNull ==
f.getContainsNull();
+ }
+
public void toThrift(TTypeDesc container, TTypeNode node) {
TStructField field = new TStructField();
field.setName(name);
if (comment != null) {
field.setComment(comment);
}
+ field.setContainsNull(containsNull);
node.struct_fields.add(field);
type.toThrift(container);
}
@@ -111,6 +151,7 @@ public class StructField {
return false;
}
StructField otherStructField = (StructField) other;
- return otherStructField.name.equals(name) &&
otherStructField.type.equals(type);
+ return otherStructField.name.equals(name) &&
otherStructField.type.equals(type)
+ && otherStructField.containsNull == containsNull;
}
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/StructType.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/StructType.java
index e3ffa3a521..881d451743 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/StructType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/StructType.java
@@ -17,6 +17,7 @@
package org.apache.doris.catalog;
+import org.apache.doris.thrift.TColumnType;
import org.apache.doris.thrift.TStructField;
import org.apache.doris.thrift.TTypeDesc;
import org.apache.doris.thrift.TTypeNode;
@@ -27,6 +28,7 @@ import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
+import com.google.gson.annotations.SerializedName;
import java.util.ArrayList;
import java.util.HashMap;
@@ -35,7 +37,14 @@ import java.util.HashMap;
* Describes a STRUCT type. STRUCT types have a list of named struct fields.
*/
public class StructType extends Type {
+
+ @SerializedName(value = "fieldMap")
private final HashMap<String, StructField> fieldMap = Maps.newHashMap();
+
+ //@SerializedName(value = "positionToField")
+ //private final HashMap<Integer, StructField> positionToField =
Maps.newHashMap();
+
+ @SerializedName(value = "fields")
private final ArrayList<StructField> fields;
public StructType(ArrayList<StructField> fields) {
@@ -44,11 +53,12 @@ public class StructType extends Type {
for (int i = 0; i < this.fields.size(); ++i) {
this.fields.get(i).setPosition(i);
fieldMap.put(this.fields.get(i).getName().toLowerCase(),
this.fields.get(i));
+ //positionToField.put(this.fields.get(i).getPosition(),
this.fields.get(i));
}
}
public StructType() {
- fields = Lists.newArrayList();
+ this.fields = Lists.newArrayList();
}
@Override
@@ -74,10 +84,36 @@ public class StructType extends Type {
leftPadding, Joiner.on(",\n").join(fieldsSql), leftPadding);
}
+ public static boolean canCastTo(StructType type, StructType targetType) {
+ // TODO(xy) : support cast struct type
+ return false;
+ }
+
+ @Override
+ public boolean isSupported() {
+ for (StructField f : fields) {
+ if (!f.getType().isSupported()) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public boolean supportSubType(Type subType) {
+ for (Type supportedType : Type.getStructSubTypes()) {
+ if (subType.getPrimitiveType() ==
supportedType.getPrimitiveType()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
public void addField(StructField field) {
field.setPosition(fields.size());
fields.add(field);
fieldMap.put(field.getName().toLowerCase(), field);
+ //positionToField.put(field.getPosition(), field);
}
public ArrayList<StructField> getFields() {
@@ -88,9 +124,41 @@ public class StructType extends Type {
return fieldMap.get(fieldName.toLowerCase());
}
+ //public StructField getField(int position) {
+ // return positionToField.get(position);
+ //}
+
public void clearFields() {
fields.clear();
fieldMap.clear();
+ //positionToField.clear();
+ }
+
+ @Override
+ public PrimitiveType getPrimitiveType() {
+ return PrimitiveType.STRUCT;
+ }
+
+ @Override
+ public boolean matchesType(Type t) {
+ if (equals(t)) {
+ return true;
+ }
+
+ if (t.isStructType()) {
+ return false;
+ }
+
+ if (fields.size() != ((StructType) t).getFields().size()) {
+ return false;
+ }
+
+ for (int i = 0; i < fields.size(); i++) {
+ if (!fields.get(i).matchesField(((StructType)
t).getFields().get(i))) {
+ return false;
+ }
+ }
+ return true;
}
@Override
@@ -107,7 +175,7 @@ public class StructType extends Type {
TTypeNode node = new TTypeNode();
container.types.add(node);
Preconditions.checkNotNull(fields);
- Preconditions.checkNotNull(!fields.isEmpty());
+ Preconditions.checkState(!fields.isEmpty());
node.setType(TTypeNodeType.STRUCT);
node.setStructFields(new ArrayList<TStructField>());
for (StructField field : fields) {
@@ -119,4 +187,26 @@ public class StructType extends Type {
public String toString() {
return toSql(0);
}
+
+ @Override
+ public TColumnType toColumnTypeThrift() {
+ TColumnType thrift = new TColumnType();
+ thrift.type = PrimitiveType.STRUCT.toThrift();
+ return thrift;
+ }
+
+ @Override
+ public boolean isFixedLengthType() {
+ return false;
+ }
+
+ @Override
+ public boolean supportsTablePartitioning() {
+ return false;
+ }
+
+ @Override
+ public int getSlotSize() {
+ return PrimitiveType.STRUCT.getSlotSize();
+ }
}
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
index b8ee68aacd..01bb062825 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
@@ -110,6 +110,7 @@ public abstract class Type {
private static final ArrayList<ScalarType> numericTypes;
private static final ArrayList<ScalarType> supportedTypes;
private static final ArrayList<Type> arraySubTypes;
+ private static final ArrayList<Type> structSubTypes;
private static final ArrayList<ScalarType> trivialTypes;
static {
@@ -166,6 +167,10 @@ public abstract class Type {
arraySubTypes.add(CHAR);
arraySubTypes.add(VARCHAR);
arraySubTypes.add(STRING);
+
+ structSubTypes = Lists.newArrayList();
+ structSubTypes.add(INT);
+ structSubTypes.add(STRING);
}
public static ArrayList<ScalarType> getIntegerTypes() {
@@ -188,6 +193,17 @@ public abstract class Type {
return arraySubTypes;
}
+ public static ArrayList<Type> getStructSubTypes() {
+ return structSubTypes;
+ }
+
+ /**
+ * Return true if this is complex type and support subType
+ */
+ public boolean supportSubType(Type subType) {
+ return false;
+ }
+
/**
* The output of this is stored directly in the hive metastore as the
column type.
* The string must match exactly.
@@ -370,7 +386,7 @@ public abstract class Type {
}
public boolean isCollectionType() {
- return isMapType() || isArrayType() || isMultiRowType();
+ return isMapType() || isArrayType() || isMultiRowType() ||
isStructType();
}
public boolean isMapType() {
@@ -495,6 +511,10 @@ public abstract class Type {
&& !sourceType.isNull()) {
// TODO: current not support cast any non-array type(except for
null) to nested array type.
return false;
+ } else if (targetType.isStructType() && sourceType.isStringType()) {
+ return true;
+ } else if (sourceType.isStructType() && targetType.isStructType()) {
+ return StructType.canCastTo((StructType) sourceType, (StructType)
targetType);
}
return sourceType.isNull() ||
sourceType.getPrimitiveType().isCharFamily();
}
@@ -813,7 +833,7 @@ public abstract class Type {
}
Pair<Type, Integer> res = fromThrift(col, tmpNodeIdx);
tmpNodeIdx = res.second.intValue();
- structFields.add(new StructField(name, res.first,
comment));
+ structFields.add(new StructField(name, res.first, comment,
true));
}
type = new StructType(structFields);
break;
diff --git a/fe/fe-core/src/main/jflex/sql_scanner.flex
b/fe/fe-core/src/main/jflex/sql_scanner.flex
index 730e6affb3..192e699ec7 100644
--- a/fe/fe-core/src/main/jflex/sql_scanner.flex
+++ b/fe/fe-core/src/main/jflex/sql_scanner.flex
@@ -487,6 +487,8 @@ import org.apache.doris.qe.SqlModeHelper;
tokenIdMap.put(new Integer(SqlParserSymbols.RPAREN), ")");
tokenIdMap.put(new Integer(SqlParserSymbols.LBRACKET), "[");
tokenIdMap.put(new Integer(SqlParserSymbols.RBRACKET), "]");
+ tokenIdMap.put(new Integer(SqlParserSymbols.LBRACE), "{");
+ tokenIdMap.put(new Integer(SqlParserSymbols.RBRACE), "}");
tokenIdMap.put(new Integer(SqlParserSymbols.COLON), ":");
tokenIdMap.put(new Integer(SqlParserSymbols.SEMICOLON), ";");
tokenIdMap.put(new Integer(SqlParserSymbols.FLOATINGPOINT_LITERAL),
@@ -635,6 +637,8 @@ EndOfLineComment = "--"
!({HintContent}|{ContainsLineTerminator}) {LineTerminato
";" { return newToken(SqlParserSymbols.SEMICOLON, null); }
"[" { return newToken(SqlParserSymbols.LBRACKET, null); }
"]" { return newToken(SqlParserSymbols.RBRACKET, null); }
+"{" { return newToken(SqlParserSymbols.LBRACE, null); }
+"}" { return newToken(SqlParserSymbols.RBRACE, null); }
"/" { return newToken(SqlParserSymbols.DIVIDE, null); }
"%" { return newToken(SqlParserSymbols.MOD, null); }
"+" { return newToken(SqlParserSymbols.ADD, null); }
diff --git a/gensrc/proto/types.proto b/gensrc/proto/types.proto
index 5686a6ac9d..dea020d0b4 100644
--- a/gensrc/proto/types.proto
+++ b/gensrc/proto/types.proto
@@ -39,6 +39,7 @@ message PScalarType {
message PStructField {
required string name = 1;
optional string comment = 2;
+ optional bool contains_null = 3;
};
message PTypeNode {
diff --git a/gensrc/thrift/Exprs.thrift b/gensrc/thrift/Exprs.thrift
index ddef7fa94e..93241d488c 100644
--- a/gensrc/thrift/Exprs.thrift
+++ b/gensrc/thrift/Exprs.thrift
@@ -44,6 +44,7 @@ enum TExprNodeType {
INFO_FUNC,
FUNCTION_CALL,
ARRAY_LITERAL,
+ STRUCT_LITERAL,
// TODO: old style compute functions. this will be deprecated
COMPUTE_FUNCTION_CALL,
diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift
index 08e8e69bf1..0ecf9b5592 100644
--- a/gensrc/thrift/Types.thrift
+++ b/gensrc/thrift/Types.thrift
@@ -124,6 +124,7 @@ struct TScalarType {
struct TStructField {
1: required string name
2: optional string comment
+ 3: optional bool contains_null
}
struct TTypeNode {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]