This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 53ba46e404 [Fix][Refactor] Fix 'not member call on null pointer of 
type 'doris::TextConverter' error in ubsan env and refactor text converter. 
(#19849)
53ba46e404 is described below

commit 53ba46e40419cb2c2146b30ccf4a85fe4a5a5c05
Author: Qi Chen <[email protected]>
AuthorDate: Mon May 22 21:00:19 2023 +0800

    [Fix][Refactor] Fix 'not member call on null pointer of type 
'doris::TextConverter' error in ubsan env and refactor text converter. (#19849)
    
    Fix 'not member call on null pointer of type doris::TextConverter' error in 
ubsan env and refactor text converter.
---
 be/src/exec/text_converter.cpp                     |  16 ++
 be/src/exec/text_converter.h                       |  18 +-
 be/src/exec/text_converter.hpp                     | 254 ---------------------
 be/src/vec/exec/format/csv/csv_reader.cpp          |   1 -
 be/src/vec/exec/format/orc/vorc_reader.cpp         |   1 +
 .../exec/format/parquet/vparquet_group_reader.cpp  |   1 +
 be/src/vec/exec/scan/new_odbc_scanner.cpp          |   1 -
 be/src/vec/exec/vmysql_scan_node.cpp               |   1 -
 8 files changed, 31 insertions(+), 262 deletions(-)

diff --git a/be/src/exec/text_converter.cpp b/be/src/exec/text_converter.cpp
index 194dde13e6..02893c1dfc 100644
--- a/be/src/exec/text_converter.cpp
+++ b/be/src/exec/text_converter.cpp
@@ -44,6 +44,22 @@ namespace doris {
 
 TextConverter::TextConverter(char escape_char) : _escape_char(escape_char) {}
 
+void TextConverter::write_string_column(const SlotDescriptor* slot_desc,
+                                        vectorized::MutableColumnPtr* 
column_ptr, const char* data,
+                                        size_t len) {
+    DCHECK(column_ptr->get()->is_nullable());
+    auto* nullable_column = 
reinterpret_cast<vectorized::ColumnNullable*>(column_ptr->get());
+    if ((len == 2 && data[0] == '\\' && data[1] == 'N') || len == 
SQL_NULL_DATA) {
+        nullable_column->get_null_map_data().push_back(1);
+        
reinterpret_cast<vectorized::ColumnString&>(nullable_column->get_nested_column())
+                .insert_default();
+    } else {
+        nullable_column->get_null_map_data().push_back(0);
+        
reinterpret_cast<vectorized::ColumnString&>(nullable_column->get_nested_column())
+                .insert_data(data, len);
+    }
+}
+
 bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc,
                                      vectorized::IColumn* nullable_col_ptr, 
const char* data,
                                      size_t len, bool copy_string, bool 
need_escape, size_t rows) {
diff --git a/be/src/exec/text_converter.h b/be/src/exec/text_converter.h
index d70cfba982..9615471a8f 100644
--- a/be/src/exec/text_converter.h
+++ b/be/src/exec/text_converter.h
@@ -35,11 +35,19 @@ public:
                              vectorized::MutableColumnPtr* column_ptr, const 
char* data,
                              size_t len);
 
-    bool write_column(const SlotDescriptor* slot_desc, 
vectorized::MutableColumnPtr* column_ptr,
-                      const char* data, size_t len, bool copy_string, bool 
need_escape);
-
-    bool write_vec_column(const SlotDescriptor* slot_desc, 
vectorized::IColumn* nullable_col_ptr,
-                          const char* data, size_t len, bool copy_string, bool 
need_escape);
+    inline bool write_column(const SlotDescriptor* slot_desc,
+                             vectorized::MutableColumnPtr* column_ptr, const 
char* data, size_t len,
+                             bool copy_string, bool need_escape) {
+        vectorized::IColumn* nullable_col_ptr = column_ptr->get();
+        return write_vec_column(slot_desc, nullable_col_ptr, data, len, 
copy_string, need_escape);
+    }
+
+    inline bool write_vec_column(const SlotDescriptor* slot_desc,
+                                 vectorized::IColumn* nullable_col_ptr, const 
char* data,
+                                 size_t len, bool copy_string, bool 
need_escape) {
+        return write_vec_column(slot_desc, nullable_col_ptr, data, len, 
copy_string, need_escape,
+                                1);
+    }
 
     /// Write consecutive rows of the same data.
     bool write_vec_column(const SlotDescriptor* slot_desc, 
vectorized::IColumn* nullable_col_ptr,
diff --git a/be/src/exec/text_converter.hpp b/be/src/exec/text_converter.hpp
deleted file mode 100644
index ddef2dd42e..0000000000
--- a/be/src/exec/text_converter.hpp
+++ /dev/null
@@ -1,254 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <sql.h>
-
-#include <boost/algorithm/string.hpp>
-
-#include "runtime/datetime_value.h"
-#include "runtime/decimalv2_value.h"
-#include "runtime/descriptors.h"
-#include "text_converter.h"
-#include "util/binary_cast.hpp"
-#include "util/string_parser.hpp"
-#include "util/types.h"
-#include "vec/columns/column_complex.h"
-#include "vec/columns/column_nullable.h"
-#include "vec/runtime/vdatetime_value.h"
-
-namespace doris {
-
-inline void TextConverter::write_string_column(const SlotDescriptor* slot_desc,
-                                               vectorized::MutableColumnPtr* 
column_ptr,
-                                               const char* data, size_t len) {
-    DCHECK(column_ptr->get()->is_nullable());
-    auto* nullable_column = 
reinterpret_cast<vectorized::ColumnNullable*>(column_ptr->get());
-    if (len == 2 && data[0] == '\\' && data[1] == 'N') {
-        nullable_column->get_null_map_data().push_back(1);
-        
reinterpret_cast<vectorized::ColumnString&>(nullable_column->get_nested_column())
-                .insert_default();
-    } else {
-        nullable_column->get_null_map_data().push_back(0);
-        
reinterpret_cast<vectorized::ColumnString&>(nullable_column->get_nested_column())
-                .insert_data(data, len);
-    }
-}
-
-inline bool TextConverter::write_column(const SlotDescriptor* slot_desc,
-                                        vectorized::MutableColumnPtr* 
column_ptr, const char* data,
-                                        size_t len, bool copy_string, bool 
need_escape) {
-    vectorized::IColumn* nullable_col_ptr = column_ptr->get();
-    return write_vec_column(slot_desc, nullable_col_ptr, data, len, 
copy_string, need_escape);
-}
-
-inline bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc,
-                                            vectorized::IColumn* 
nullable_col_ptr, const char* data,
-                                            size_t len, bool copy_string, bool 
need_escape) {
-    vectorized::IColumn* col_ptr = nullable_col_ptr;
-    // \N means it's NULL
-    if (slot_desc->is_nullable()) {
-        auto* nullable_column = 
reinterpret_cast<vectorized::ColumnNullable*>(nullable_col_ptr);
-        if ((len == 2 && data[0] == '\\' && data[1] == 'N') || len == 
SQL_NULL_DATA) {
-            nullable_column->insert_data(nullptr, 0);
-            return true;
-        } else {
-            nullable_column->get_null_map_data().push_back(0);
-            col_ptr = &nullable_column->get_nested_column();
-        }
-    }
-
-    bool insert_after_parse_failure = true;
-    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
-    // Parse the raw-text data. Translate the text string to internal format.
-    switch (slot_desc->type().type) {
-    case TYPE_HLL: {
-        
reinterpret_cast<vectorized::ColumnHLL*>(col_ptr)->get_data().emplace_back(
-                HyperLogLog(Slice(data, len)));
-        break;
-    }
-    case TYPE_STRING:
-    case TYPE_VARCHAR:
-    case TYPE_CHAR: {
-        if (need_escape) {
-            unescape_string_on_spot(data, &len);
-        }
-        
reinterpret_cast<vectorized::ColumnString*>(col_ptr)->insert_data(data, len);
-        break;
-    }
-
-    case TYPE_BOOLEAN: {
-        bool num = StringParser::string_to_bool(data, len, &parse_result);
-        
reinterpret_cast<vectorized::ColumnVector<vectorized::UInt8>*>(col_ptr)->insert_value(
-                (uint8_t)num);
-        break;
-    }
-    case TYPE_TINYINT: {
-        int8_t num = StringParser::string_to_int<int8_t>(data, len, 
&parse_result);
-        
reinterpret_cast<vectorized::ColumnVector<vectorized::Int8>*>(col_ptr)->insert_value(num);
-        break;
-    }
-    case TYPE_SMALLINT: {
-        int16_t num = StringParser::string_to_int<int16_t>(data, len, 
&parse_result);
-        
reinterpret_cast<vectorized::ColumnVector<vectorized::Int16>*>(col_ptr)->insert_value(num);
-        break;
-    }
-    case TYPE_INT: {
-        int32_t num = StringParser::string_to_int<int32_t>(data, len, 
&parse_result);
-        
reinterpret_cast<vectorized::ColumnVector<vectorized::Int32>*>(col_ptr)->insert_value(num);
-        break;
-    }
-    case TYPE_BIGINT: {
-        int64_t num = StringParser::string_to_int<int64_t>(data, len, 
&parse_result);
-        
reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_value(num);
-        break;
-    }
-    case TYPE_LARGEINT: {
-        __int128 num = StringParser::string_to_int<__int128>(data, len, 
&parse_result);
-        
reinterpret_cast<vectorized::ColumnVector<vectorized::Int128>*>(col_ptr)->insert_value(num);
-        break;
-    }
-
-    case TYPE_FLOAT: {
-        float num = StringParser::string_to_float<float>(data, len, 
&parse_result);
-        
reinterpret_cast<vectorized::ColumnVector<vectorized::Float32>*>(col_ptr)->insert_value(
-                num);
-        break;
-    }
-    case TYPE_DOUBLE: {
-        double num = StringParser::string_to_float<double>(data, len, 
&parse_result);
-        
reinterpret_cast<vectorized::ColumnVector<vectorized::Float64>*>(col_ptr)->insert_value(
-                num);
-        break;
-    }
-    case TYPE_DATE: {
-        vectorized::VecDateTimeValue ts_slot;
-        if (!ts_slot.from_date_str(data, len)) {
-            parse_result = StringParser::PARSE_FAILURE;
-            insert_after_parse_failure = false;
-            break;
-        }
-        ts_slot.cast_to_date();
-        
reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_data(
-                reinterpret_cast<char*>(&ts_slot), 0);
-        break;
-    }
-    case TYPE_DATEV2: {
-        vectorized::VecDateTimeValue ts_slot;
-        if (!ts_slot.from_date_str(data, len)) {
-            parse_result = StringParser::PARSE_FAILURE;
-            insert_after_parse_failure = false;
-            break;
-        }
-        ts_slot.cast_to_date();
-        uint32_t num = ts_slot.to_date_v2();
-        
reinterpret_cast<vectorized::ColumnVector<vectorized::UInt32>*>(col_ptr)->insert_value(num);
-        break;
-    }
-    case TYPE_DATETIME: {
-        vectorized::VecDateTimeValue ts_slot;
-        if (!ts_slot.from_date_str(data, len)) {
-            parse_result = StringParser::PARSE_FAILURE;
-            insert_after_parse_failure = false;
-            break;
-        }
-        ts_slot.to_datetime();
-        
reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_data(
-                reinterpret_cast<char*>(&ts_slot), 0);
-        break;
-    }
-    case TYPE_DATETIMEV2: {
-        vectorized::DateV2Value<vectorized::DateTimeV2ValueType> ts_slot;
-        if (!ts_slot.from_date_str(data, len, slot_desc->type().scale)) {
-            parse_result = StringParser::PARSE_FAILURE;
-            insert_after_parse_failure = false;
-            break;
-        }
-        uint64_t num = ts_slot.to_date_int_val();
-        
reinterpret_cast<vectorized::ColumnVector<vectorized::UInt64>*>(col_ptr)->insert_value(num);
-        break;
-    }
-
-    case TYPE_DECIMALV2: {
-        DecimalV2Value decimal_slot;
-        if (decimal_slot.parse_from_str(data, len)) {
-            parse_result = StringParser::PARSE_FAILURE;
-            insert_after_parse_failure = false;
-            break;
-        }
-        
reinterpret_cast<vectorized::ColumnVector<vectorized::Int128>*>(col_ptr)->insert_value(
-                decimal_slot.value());
-        break;
-    }
-    case TYPE_DECIMAL32: {
-        StringParser::ParseResult result = StringParser::PARSE_SUCCESS;
-        int32_t value = StringParser::string_to_decimal<int32_t>(
-                data, len, slot_desc->type().precision, 
slot_desc->type().scale, &result);
-        if (result != StringParser::PARSE_SUCCESS) {
-            parse_result = StringParser::PARSE_FAILURE;
-            break;
-        }
-        
reinterpret_cast<vectorized::ColumnVector<vectorized::Int32>*>(col_ptr)->insert_value(
-                value);
-        break;
-    }
-    case TYPE_DECIMAL64: {
-        StringParser::ParseResult result = StringParser::PARSE_SUCCESS;
-        int64_t value = StringParser::string_to_decimal<int64_t>(
-                data, len, slot_desc->type().precision, 
slot_desc->type().scale, &result);
-        if (result != StringParser::PARSE_SUCCESS) {
-            parse_result = StringParser::PARSE_FAILURE;
-            break;
-        }
-        
reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_value(
-                value);
-        break;
-    }
-    case TYPE_DECIMAL128I: {
-        StringParser::ParseResult result = StringParser::PARSE_SUCCESS;
-        vectorized::Int128 value = 
StringParser::string_to_decimal<vectorized::Int128>(
-                data, len, slot_desc->type().precision, 
slot_desc->type().scale, &result);
-        if (result != StringParser::PARSE_SUCCESS) {
-            parse_result = StringParser::PARSE_FAILURE;
-            break;
-        }
-        
reinterpret_cast<vectorized::ColumnVector<vectorized::Int128>*>(col_ptr)->insert_value(
-                value);
-        break;
-    }
-    default:
-        DCHECK(false) << "bad slot type: " << slot_desc->type();
-        break;
-    }
-
-    if (UNLIKELY(parse_result == StringParser::PARSE_FAILURE)) {
-        if (slot_desc->is_nullable()) {
-            auto* nullable_column = 
reinterpret_cast<vectorized::ColumnNullable*>(nullable_col_ptr);
-            size_t size = nullable_column->get_null_map_data().size();
-            doris::vectorized::NullMap& null_map_data = 
nullable_column->get_null_map_data();
-            null_map_data[size - 1] = 1;
-            if (!insert_after_parse_failure) {
-                nullable_column->get_nested_column().insert_default();
-            }
-        }
-        return false;
-    }
-    return true;
-}
-
-} // namespace doris
diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp 
b/be/src/vec/exec/format/csv/csv_reader.cpp
index 7cb36e94c7..09de2c6479 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -36,7 +36,6 @@
 #include "exec/decompressor.h"
 #include "exec/line_reader.h"
 #include "exec/text_converter.h"
-#include "exec/text_converter.hpp"
 #include "io/file_factory.h"
 #include "io/fs/broker_file_reader.h"
 #include "io/fs/buffered_reader.h"
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index eb944bb84c..990dfc0477 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -244,6 +244,7 @@ Status OrcReader::init_reader(
         VExprContext* vconjunct_ctx) {
     _colname_to_value_range = colname_to_value_range;
     _lazy_read_ctx.vconjunct_ctx = vconjunct_ctx;
+    _text_converter.reset(new TextConverter('\\'));
     SCOPED_RAW_TIMER(&_statistics.parse_meta_time);
     RETURN_IF_ERROR(_create_file_reader());
     RETURN_IF_ERROR(_init_read_columns());
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index 82c9b122a1..abda93afde 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -116,6 +116,7 @@ Status RowGroupReader::init(
     _row_descriptor = row_descriptor;
     _col_name_to_slot_id = colname_to_slot_id;
     _slot_id_to_filter_conjuncts = slot_id_to_filter_conjuncts;
+    _text_converter.reset(new TextConverter('\\'));
     if (not_single_slot_filter_conjuncts) {
         _filter_conjuncts.insert(_filter_conjuncts.end(), 
not_single_slot_filter_conjuncts->begin(),
                                  not_single_slot_filter_conjuncts->end());
diff --git a/be/src/vec/exec/scan/new_odbc_scanner.cpp 
b/be/src/vec/exec/scan/new_odbc_scanner.cpp
index 8494973f08..1022be3c83 100644
--- a/be/src/vec/exec/scan/new_odbc_scanner.cpp
+++ b/be/src/vec/exec/scan/new_odbc_scanner.cpp
@@ -28,7 +28,6 @@
 
 #include "common/logging.h"
 #include "common/status.h"
-#include "exec/text_converter.hpp"
 #include "runtime/descriptors.h"
 #include "runtime/runtime_state.h"
 #include "runtime/types.h"
diff --git a/be/src/vec/exec/vmysql_scan_node.cpp 
b/be/src/vec/exec/vmysql_scan_node.cpp
index 8673861f66..1d620141b9 100644
--- a/be/src/vec/exec/vmysql_scan_node.cpp
+++ b/be/src/vec/exec/vmysql_scan_node.cpp
@@ -20,7 +20,6 @@
 #include <gen_cpp/PlanNodes_types.h>
 
 #include "exec/text_converter.h"
-#include "exec/text_converter.hpp"
 #include "runtime/runtime_state.h"
 #include "util/runtime_profile.h"
 #include "util/types.h"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to