This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 687c676160 [FIX](map)fix column map for offset next_array_item_rowid
order (#23250)
687c676160 is described below
commit 687c67616060c57f706cb21e92824a6fd0f94205
Author: amory <[email protected]>
AuthorDate: Thu Aug 24 10:57:40 2023 +0800
[FIX](map)fix column map for offset next_array_item_rowid order (#23250)
* fix column map for offset next_array_item_rowid order
* add regress test
---
be/src/olap/rowset/segment_v2/column_reader.cpp | 26 +++++++---
be/src/olap/rowset/segment_v2/column_writer.cpp | 20 ++++----
.../complex_types/map_uniq_with_local_tvf.out | 10 ++++
regression-test/data/types/complex_types/mm.orc | Bin 0 -> 19666267 bytes
.../complex_types/map_uniq_with_local_tvf.groovy | 57 +++++++++++++++++++++
5 files changed, 98 insertions(+), 15 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp
b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 7a7eec9578..1f98b9032c 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -674,6 +674,7 @@ Status MapFileColumnIterator::next_batch(size_t* n,
vectorized::MutableColumnPtr
auto& column_offsets =
static_cast<vectorized::ColumnArray::ColumnOffsets&>(*column_offsets_ptr);
RETURN_IF_ERROR(_offsets_iterator->_calculate_offsets(start,
column_offsets));
+ DCHECK(column_offsets.get_data().back() >= column_offsets.get_data()[start
- 1]);
size_t num_items =
column_offsets.get_data().back() - column_offsets.get_data()[start
- 1]; // -1 is valid
auto key_ptr = column_map->get_keys().assume_mutable();
@@ -808,20 +809,33 @@ Status
OffsetFileColumnIterator::_peek_one_offset(ordinal_t* offset) {
return Status::OK();
}
+/**
+ * first_storage_offset read from page should smaller than
next_storage_offset which here call _peek_one_offset from page,
+ and first_column_offset is keep in memory data which is different
dimension with (first_storage_offset and next_storage_offset)
+ eg. step1. read page: first_storage_offset = 16382
+ step2. read page below with _peek_one_offset(&last_offset):
last_offset = 16387
+ step3. first_offset = 126 which is calculate in column offsets
+ for loop column offsets element in size
+ we can calculate from first_storage_offset to next_storage_offset
one by one to fill with offsets_data in memory column offsets
+ * @param start
+ * @param column_offsets
+ * @return
+ */
Status OffsetFileColumnIterator::_calculate_offsets(
ssize_t start, vectorized::ColumnArray::ColumnOffsets& column_offsets)
{
- ordinal_t last_offset = 0;
- RETURN_IF_ERROR(_peek_one_offset(&last_offset));
+ ordinal_t next_storage_offset = 0;
+ RETURN_IF_ERROR(_peek_one_offset(&next_storage_offset));
// calculate real offsets
auto& offsets_data = column_offsets.get_data();
- ordinal_t first_offset = offsets_data[start - 1]; // -1 is valid
- ordinal_t first_ord = offsets_data[start];
+ ordinal_t first_column_offset = offsets_data[start - 1]; // -1 is valid
+ ordinal_t first_storage_offset = offsets_data[start];
for (ssize_t i = start; i < offsets_data.size() - 1; ++i) {
- offsets_data[i] = first_offset + (offsets_data[i + 1] - first_ord);
+ offsets_data[i] = first_column_offset + (offsets_data[i + 1] -
first_storage_offset);
}
// last offset
- offsets_data[offsets_data.size() - 1] = first_offset + (last_offset -
first_ord);
+ offsets_data[offsets_data.size() - 1] =
+ first_column_offset + (next_storage_offset - first_storage_offset);
return Status::OK();
}
diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp
b/be/src/olap/rowset/segment_v2/column_writer.cpp
index 4ddd7e2c6b..dcb0f89858 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/column_writer.cpp
@@ -1083,17 +1083,19 @@ Status MapColumnWriter::append_data(const uint8_t**
ptr, size_t num_rows) {
size_t element_cnt = size_t((unsigned long)(*data_ptr));
auto offset_data = *(data_ptr + 1);
const uint8_t* offsets_ptr = (const uint8_t*)offset_data;
- RETURN_IF_ERROR(_offsets_writer->append_data(&offsets_ptr, num_rows));
- if (element_cnt == 0) {
- return Status::OK();
- }
- for (size_t i = 0; i < 2; ++i) {
- auto data = *(data_ptr + 2 + i);
- auto nested_null_map = *(data_ptr + 2 + 2 + i);
- RETURN_IF_ERROR(_kv_writers[i]->append(reinterpret_cast<const
uint8_t*>(nested_null_map),
- reinterpret_cast<const
void*>(data), element_cnt));
+ if (element_cnt > 0) {
+ for (size_t i = 0; i < 2; ++i) {
+ auto data = *(data_ptr + 2 + i);
+ auto nested_null_map = *(data_ptr + 2 + 2 + i);
+ RETURN_IF_ERROR(
+ _kv_writers[i]->append(reinterpret_cast<const
uint8_t*>(nested_null_map),
+ reinterpret_cast<const
void*>(data), element_cnt));
+ }
}
+ // make sure the order : offset writer flush next_array_item_ordinal after
kv_writers append_data
+ // because we use _kv_writers[0]->get_next_rowid() to set
next_array_item_ordinal in offset page footer
+ RETURN_IF_ERROR(_offsets_writer->append_data(&offsets_ptr, num_rows));
return Status::OK();
}
diff --git
a/regression-test/data/types/complex_types/map_uniq_with_local_tvf.out
b/regression-test/data/types/complex_types/map_uniq_with_local_tvf.out
new file mode 100644
index 0000000000..dee62c1518
--- /dev/null
+++ b/regression-test/data/types/complex_types/map_uniq_with_local_tvf.out
@@ -0,0 +1,10 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+25000
+
+-- !sql --
+25000
+
+-- !sql --
+25000
+
diff --git a/regression-test/data/types/complex_types/mm.orc
b/regression-test/data/types/complex_types/mm.orc
new file mode 100644
index 0000000000..8042f56dae
Binary files /dev/null and b/regression-test/data/types/complex_types/mm.orc
differ
diff --git
a/regression-test/suites/types/complex_types/map_uniq_with_local_tvf.groovy
b/regression-test/suites/types/complex_types/map_uniq_with_local_tvf.groovy
new file mode 100644
index 0000000000..c77e74e0b6
--- /dev/null
+++ b/regression-test/suites/types/complex_types/map_uniq_with_local_tvf.groovy
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("map_uniq_with_local_tvf", "p0") {
+ def table_name = "map_uniq"
+ List<List<Object>> backends = sql """ show backends """
+ def dataFilePath = context.config.dataPath + "/types/complex_types/"
+ assertTrue(backends.size() > 0)
+ def be_id = backends[0][0]
+ // cluster mode need to make sure all be has this data
+ def outFilePath="/"
+ def transFile01="${dataFilePath}/mm.orc"
+ for (List<Object> backend : backends) {
+ def be_host = backend[1]
+ scpFiles ("root", be_host, transFile01, outFilePath, false);
+ }
+ sql "DROP TABLE IF EXISTS ${table_name};"
+ sql """
+ CREATE TABLE ${table_name} (
+ `id` int(11) NULL,
+ `m` MAP<text,text> NULL
+ ) ENGINE=OLAP
+ UNIQUE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "is_being_synced" = "false",
+ "storage_format" = "V2",
+ "light_schema_change" = "true",
+ "disable_auto_compaction" = "false",
+ "enable_single_replica_compaction" = "false"
+ ); """
+
+ qt_sql """
+ insert into ${table_name} select * from local(
+ "file_path" = "${outFilePath}/mm.orc",
+ "backend_id" = "${be_id}",
+ "format" = "orc");"""
+ qt_sql """ select count(m) from ${table_name}; """
+ qt_sql """ select count(m) from ${table_name} where map_size(m) > 0;"""
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]