This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new e30c3f3a65b [fix](csv_reader)fix bug that Read garbled files caused be
crash. (#24164)
e30c3f3a65b is described below
commit e30c3f3a65bf6eb7887b3128d233376648de3acd
Author: daidai <[email protected]>
AuthorDate: Wed Sep 13 14:12:55 2023 +0800
[fix](csv_reader)fix bug that Read garbled files caused be crash. (#24164)
fix bug that read garbled files caused be crash.
---
be/src/exec/text_converter.cpp | 3 +-
.../vec/data_types/serde/data_type_array_serde.cpp | 9 +++-
.../vec/data_types/serde/data_type_map_serde.cpp | 11 +++--
.../data_types/serde/data_type_struct_serde.cpp | 5 ++-
.../hive/test_text_garbled_file.out | Bin 0 -> 296830 bytes
.../hive/test_text_garbled_file.groovy | 46 +++++++++++++++++++++
6 files changed, 67 insertions(+), 7 deletions(-)
diff --git a/be/src/exec/text_converter.cpp b/be/src/exec/text_converter.cpp
index 95232678ea0..59417bc9265 100644
--- a/be/src/exec/text_converter.cpp
+++ b/be/src/exec/text_converter.cpp
@@ -330,9 +330,10 @@ bool TextConverter::_write_data(const TypeDescriptor&
type_desc,
kv = i;
continue;
}
- if (i == len || data[i] == _collection_delimiter) {
+ if ((i == len || data[i] == _collection_delimiter) && i >= kv + 1)
{
ranges.push_back({from, kv, i - 1});
from = i + 1;
+ kv = from;
}
}
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp
b/be/src/vec/data_types/serde/data_type_array_serde.cpp
index 1dca5299894..5aa78a1f886 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp
@@ -71,7 +71,9 @@ Status
DataTypeArraySerDe::deserialize_column_from_json_vector(IColumn& column,
Status DataTypeArraySerDe::deserialize_one_cell_from_json(IColumn& column,
Slice& slice,
const FormatOptions&
options) const {
- DCHECK(!slice.empty());
+ if (slice.empty()) {
+ return Status::InvalidArgument("slice is empty!");
+ }
auto& array_column = assert_cast<ColumnArray&>(column);
auto& offsets = array_column.get_offsets();
IColumn& nested_column = array_column.get_data();
@@ -132,6 +134,9 @@ Status
DataTypeArraySerDe::deserialize_one_cell_from_json(IColumn& column, Slice
Status DataTypeArraySerDe::deserialize_one_cell_from_hive_text(IColumn&
column, Slice& slice,
const
FormatOptions& options,
int
nesting_level) const {
+ if (slice.empty()) {
+ return Status::InvalidArgument("slice is empty!");
+ }
auto& array_column = assert_cast<ColumnArray&>(column);
auto& offsets = array_column.get_offsets();
IColumn& nested_column = array_column.get_data();
@@ -303,4 +308,4 @@ Status DataTypeArraySerDe::write_column_to_mysql(const
IColumn& column,
}
} // namespace vectorized
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp
b/be/src/vec/data_types/serde/data_type_map_serde.cpp
index a0e6636c507..cce5986b195 100644
--- a/be/src/vec/data_types/serde/data_type_map_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp
@@ -65,7 +65,9 @@ void DataTypeMapSerDe::serialize_one_cell_to_json(const
IColumn& column, int row
Status DataTypeMapSerDe::deserialize_one_cell_from_hive_text(IColumn& column,
Slice& slice,
const
FormatOptions& options,
int
nesting_level) const {
- DCHECK(!slice.empty());
+ if (slice.empty()) {
+ return Status::InvalidArgument("slice is empty!");
+ }
auto& array_column = assert_cast<ColumnMap&>(column);
auto& offsets = array_column.get_offsets();
IColumn& nested_key_column = array_column.get_keys();
@@ -92,10 +94,11 @@ Status
DataTypeMapSerDe::deserialize_one_cell_from_hive_text(IColumn& column, Sl
kv = i;
continue;
}
- if (i == slice.size || slice[i] == collection_delimiter) {
+ if ((i == slice.size || slice[i] == collection_delimiter) && i >= kv +
1) {
key_slices.push_back({slice.data + from, kv - from});
value_slices.push_back({slice.data + kv + 1, i - 1 - kv});
from = i + 1;
+ kv = from;
}
}
@@ -169,7 +172,9 @@ Status
DataTypeMapSerDe::deserialize_column_from_json_vector(IColumn& column,
Status DataTypeMapSerDe::deserialize_one_cell_from_json(IColumn& column,
Slice& slice,
const FormatOptions&
options) const {
- DCHECK(!slice.empty());
+ if (slice.empty()) {
+ return Status::InvalidArgument("slice is empty!");
+ }
auto& array_column = assert_cast<ColumnMap&>(column);
auto& offsets = array_column.get_offsets();
IColumn& nested_key_column = array_column.get_keys();
diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
index b202d0fb237..06ec4d709bb 100644
--- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
@@ -44,6 +44,9 @@ void DataTypeStructSerDe::write_one_cell_to_jsonb(const
IColumn& column, JsonbWr
Status DataTypeStructSerDe::deserialize_one_cell_from_hive_text(IColumn&
column, Slice& slice,
const
FormatOptions& options,
int
nesting_level) const {
+ if (slice.empty()) {
+ return Status::InvalidArgument("slice is empty!");
+ }
char struct_delimiter = options.get_collection_delimiter(nesting_level);
std::vector<Slice> slices;
@@ -190,4 +193,4 @@ Status DataTypeStructSerDe::write_column_to_mysql(const
IColumn& column,
}
} // namespace vectorized
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git
a/regression-test/data/external_table_p2/hive/test_text_garbled_file.out
b/regression-test/data/external_table_p2/hive/test_text_garbled_file.out
new file mode 100644
index 00000000000..b003cd49e97
Binary files /dev/null and
b/regression-test/data/external_table_p2/hive/test_text_garbled_file.out differ
diff --git
a/regression-test/suites/external_table_p2/hive/test_text_garbled_file.groovy
b/regression-test/suites/external_table_p2/hive/test_text_garbled_file.groovy
new file mode 100644
index 00000000000..a3ea6a3bcc2
--- /dev/null
+++
b/regression-test/suites/external_table_p2/hive/test_text_garbled_file.groovy
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_text_garbled_file",
"p2,external,hive,external_remote,external_remote_hive") {
+ //test hive garbled files , prevent be hanged
+
+ String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+ if (enabled != null && enabled.equalsIgnoreCase("true")) {
+ String extHiveHmsHost =
context.config.otherConfigs.get("extHiveHmsHost")
+ String extHiveHmsPort =
context.config.otherConfigs.get("extHiveHmsPort")
+ String catalog_name = "test_text_garbled_file"
+ sql """drop catalog if exists ${catalog_name};"""
+ sql """
+ create catalog if not exists ${catalog_name} properties (
+ 'type'='hms',
+ 'hadoop.username' = 'hadoop',
+ 'hive.metastore.uris' =
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+ );
+ """
+ logger.info("catalog " + catalog_name + " created")
+ sql """switch ${catalog_name};"""
+ logger.info("switched to catalog " + catalog_name)
+
+
+ order_qt_garbled_file """
+ select * from ${catalog_name}.multi_catalog.test_csv_format_error;
+ """
+
+
+ }
+}
+
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]