This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new cc4778a271 [Fix](orc-reader) Check hasNulls() firstly when use notNull
data in ColumnVectorBatch. #18674
cc4778a271 is described below
commit cc4778a271111fcb2d4027a3d5831bfc9c6a730c
Author: Qi Chen <[email protected]>
AuthorDate: Sat Apr 15 19:48:31 2023 +0800
[Fix](orc-reader) Check hasNulls() firstly when use notNull data in
ColumnVectorBatch. #18674
---
be/src/vec/exec/format/orc/vorc_reader.cpp | 35 ++++++++++++++++++++----------
1 file changed, 24 insertions(+), 11 deletions(-)
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index c4fd3c2409..ada1284f21 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -660,23 +660,36 @@ Status OrcReader::_decode_string_column(const
std::string& col_name,
string_values.reserve(num_values);
if (type_kind == orc::TypeKind::CHAR) {
// Possibly there are some zero padding characters in CHAR type, we
have to strip them off.
- for (int i = 0; i < num_values; ++i) {
- if (cvb->notNull[i]) {
+ if (cvb->hasNulls) {
+ for (int i = 0; i < num_values; ++i) {
+ if (cvb->notNull[i]) {
+ string_values.emplace_back(data->data[i],
+ trim_right(data->data[i],
data->length[i]));
+ } else {
+ // Orc doesn't fill null values in new batch, but the
former batch has been release.
+ // Other types like int/long/timestamp... are flat types
without pointer in them,
+ // so other types do not need to be handled separately
like string.
+ string_values.emplace_back(empty_string.data(), 0);
+ }
+ }
+ } else {
+ for (int i = 0; i < num_values; ++i) {
string_values.emplace_back(data->data[i],
trim_right(data->data[i],
data->length[i]));
- } else {
- // Orc doesn't fill null values in new batch, but the former
batch has been release.
- // Other types like int/long/timestamp... are flat types
without pointer in them,
- // so other types do not need to be handled separately like
string.
- string_values.emplace_back(empty_string.data(), 0);
}
}
} else {
- for (int i = 0; i < num_values; ++i) {
- if (cvb->notNull[i]) {
+ if (cvb->hasNulls) {
+ for (int i = 0; i < num_values; ++i) {
+ if (cvb->notNull[i]) {
+ string_values.emplace_back(data->data[i], data->length[i]);
+ } else {
+ string_values.emplace_back(empty_string.data(), 0);
+ }
+ }
+ } else {
+ for (int i = 0; i < num_values; ++i) {
string_values.emplace_back(data->data[i], data->length[i]);
- } else {
- string_values.emplace_back(empty_string.data(), 0);
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]