This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 113023fb86 (Enhancement)[load-json] support simdjson in new json 
reader (#16903)
113023fb86 is described below

commit 113023fb8632454e33d73e67c6bcbd5074e0dd81
Author: lihangyu <[email protected]>
AuthorDate: Tue Feb 21 11:31:00 2023 +0800

    (Enhancement)[load-json] support simdjson in new json reader (#16903)
    
    be config:
    enable_simdjson_reader=true
    
    related PR #11665
---
 be/src/common/configbase.cpp                       |   1 +
 be/src/exprs/json_functions.cpp                    |  66 +++
 be/src/exprs/json_functions.h                      |  28 +-
 be/src/vec/exec/format/json/new_json_reader.cpp    | 615 +++++++++++++++++++++
 be/src/vec/exec/format/json/new_json_reader.h      |  46 ++
 docs/en/docs/admin-manual/config/be-config.md      |   5 +
 docs/zh-CN/docs/admin-manual/config/be-config.md   |   5 +
 .../data/load_p0/stream_load/invalid_json.json     |  37 ++
 .../load_p0/stream_load/invalid_json_array.json    |   8 +-
 ...id_json_array.json => invalid_json_array1.json} |  10 +-
 .../load_p0/stream_load/invalid_json_array2.json   | 121 ++++
 .../load_p0/stream_load/invalid_json_array3.json   | 123 +++++
 ...id_json_array.json => invalid_json_array4.json} |  10 +-
 .../load_p0/stream_load/invalid_nest_json1.json    |   5 +
 .../load_p0/stream_load/invalid_nest_json2.json    |  16 +
 .../load_p0/stream_load/invalid_nest_json3.json    |   5 +
 .../stream_load/invalid_nest_json_array.json       |  26 +
 .../stream_load/invalid_nest_json_array1.json      |  26 +
 .../stream_load/invalid_nest_json_array2.json      |  26 +
 .../stream_load/invalid_nest_json_array3.json      |  25 +
 .../data/load_p0/stream_load/nest_json_array.json  |  43 +-
 .../data/load_p0/stream_load/simple_json2.json     |   2 +
 .../data/load_p0/stream_load/test_json_load.out    |  17 +
 .../load_p0/stream_load/test_json_load.groovy      |  96 +++-
 24 files changed, 1306 insertions(+), 56 deletions(-)

diff --git a/be/src/common/configbase.cpp b/be/src/common/configbase.cpp
index 8f9ca0e6ac..af20b37260 100644
--- a/be/src/common/configbase.cpp
+++ b/be/src/common/configbase.cpp
@@ -441,6 +441,7 @@ void set_fuzzy_configs() {
     // random value true or false
     set_fuzzy_config("disable_storage_page_cache", ((rand() % 2) == 0) ? 
"true" : "false");
     set_fuzzy_config("enable_system_metrics", ((rand() % 2) == 0) ? "true" : 
"false");
+    set_fuzzy_config("enable_simdjson_reader", ((rand() % 2) == 0) ? "true" : 
"false");
     // random value from 8 to 48
     // s = set_fuzzy_config("doris_scanner_thread_pool_thread_num", 
std::to_string((rand() % 41) + 8));
     // LOG(INFO) << s.to_string();
diff --git a/be/src/exprs/json_functions.cpp b/be/src/exprs/json_functions.cpp
index 3016463482..5a3cbcc9e0 100644
--- a/be/src/exprs/json_functions.cpp
+++ b/be/src/exprs/json_functions.cpp
@@ -248,4 +248,70 @@ void JsonFunctions::get_parsed_paths(const 
std::vector<std::string>& path_exprs,
     }
 }
 
+Status JsonFunctions::extract_from_object(simdjson::ondemand::object& obj,
+                                          const std::vector<JsonPath>& 
jsonpath,
+                                          simdjson::ondemand::value* value) 
noexcept {
+// Return DataQualityError when it's a malformed json.
+// Otherwise the path was not found, due to array out of bound or not exist
+#define HANDLE_SIMDJSON_ERROR(err, msg)                                        
                \
+    do {                                                                       
                \
+        const simdjson::error_code& _err = err;                                
                \
+        const std::string& _msg = msg;                                         
                \
+        if (UNLIKELY(_err)) {                                                  
                \
+            if (_err == simdjson::NO_SUCH_FIELD || _err == 
simdjson::INDEX_OUT_OF_BOUNDS) {    \
+                return Status::NotFound(                                       
                \
+                        fmt::format("err: {}, msg: {}", 
simdjson::error_message(_err), _msg)); \
+            }                                                                  
                \
+            return Status::DataQualityError(                                   
                \
+                    fmt::format("err: {}, msg: {}", 
simdjson::error_message(_err), _msg));     \
+        }                                                                      
                \
+    } while (false);
+
+    if (jsonpath.size() <= 1) {
+        // The first elem of json path should be '$'.
+        // A valid json path's size is >= 2.
+        return Status::DataQualityError("empty json path");
+    }
+
+    simdjson::ondemand::value tvalue;
+
+    // Skip the first $.
+    for (int i = 1; i < jsonpath.size(); i++) {
+        if (UNLIKELY(!jsonpath[i].is_valid)) {
+            return Status::DataQualityError(fmt::format("invalid json path: 
{}", jsonpath[i].key));
+        }
+
+        const std::string& col = jsonpath[i].key;
+        int index = jsonpath[i].idx;
+
+        // Since the simdjson::ondemand::object cannot be converted to 
simdjson::ondemand::value,
+        // we have to do some special treatment for the second elem of json 
path.
+        // If the key is not found in json object, simdjson::NO_SUCH_FIELD 
would be returned.
+        if (i == 1) {
+            HANDLE_SIMDJSON_ERROR(obj.find_field_unordered(col).get(tvalue),
+                                  fmt::format("unable to find field: {}", 
col));
+        } else {
+            HANDLE_SIMDJSON_ERROR(tvalue.find_field_unordered(col).get(tvalue),
+                                  fmt::format("unable to find field: {}", 
col));
+        }
+
+        // TODO support [*] which idex == -2
+        if (index != -1) {
+            // try to access tvalue as array.
+            // If the index is beyond the length of array, 
simdjson::INDEX_OUT_OF_BOUNDS would be returned.
+            simdjson::ondemand::array arr;
+            HANDLE_SIMDJSON_ERROR(tvalue.get_array().get(arr),
+                                  fmt::format("failed to access field as 
array, field: {}", col));
+
+            HANDLE_SIMDJSON_ERROR(
+                    arr.at(index).get(tvalue),
+                    fmt::format("failed to access array field: {}, index: {}", 
col, index));
+        }
+    }
+
+    std::swap(*value, tvalue);
+
+    return Status::OK();
+}
+
 } // namespace doris
diff --git a/be/src/exprs/json_functions.h b/be/src/exprs/json_functions.h
index 39fbda875b..a070b136b5 100644
--- a/be/src/exprs/json_functions.h
+++ b/be/src/exprs/json_functions.h
@@ -19,9 +19,11 @@
 
 #include <fmt/core.h>
 #include <rapidjson/document.h>
+#include <simdjson.h>
 
 #include <sstream>
 
+#include "common/status.h"
 #include "udf/udf.h"
 
 namespace doris {
@@ -63,26 +65,6 @@ struct JsonPath {
         return ss.str();
     }
 
-    std::string to_simdjson_pointer(bool* valid) const {
-        std::stringstream ss;
-        if (!is_valid) {
-            *valid = false;
-            return "";
-        }
-        ss << "/";
-        if (!key.empty()) {
-            ss << key;
-        }
-        if (idx == -2) {
-            // not support [*]
-            *valid = false;
-            return "";
-        } else if (idx > -1) {
-            ss << "/" << idx;
-        }
-        return ss.str();
-    }
-
     std::string debug_string() const {
         return fmt::format("key:{}, idx:{}, valid:{}", key, idx, is_valid);
     }
@@ -113,6 +95,12 @@ public:
 
     static void parse_json_paths(const std::string& path_strings,
                                  std::vector<JsonPath>* parsed_paths);
+    // extract_from_object extracts value from object according to the json 
path.
+    // Now, we do not support complete functions of json path.
+    // Eg. city[*].id is not supported in this function
+    static Status extract_from_object(simdjson::ondemand::object& obj,
+                                      const std::vector<JsonPath>& jsonpath,
+                                      simdjson::ondemand::value* value) 
noexcept;
 
 private:
     static rapidjson::Value* match_value(const std::vector<JsonPath>& 
parsed_paths,
diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp 
b/be/src/vec/exec/format/json/new_json_reader.cpp
index f321a43d9c..a2d1d89d4b 100644
--- a/be/src/vec/exec/format/json/new_json_reader.cpp
+++ b/be/src/vec/exec/format/json/new_json_reader.cpp
@@ -88,6 +88,10 @@ NewJsonReader::NewJsonReader(RuntimeProfile* profile, const 
TFileScanRangeParams
           _io_ctx(io_ctx) {}
 
 Status NewJsonReader::init_reader() {
+    if (config::enable_simdjson_reader) {
+        RETURN_IF_ERROR(_simdjson_init_reader());
+        return Status::OK();
+    }
     RETURN_IF_ERROR(_get_range_params());
 
     RETURN_IF_ERROR(_open_file_reader());
@@ -1009,4 +1013,615 @@ Status 
NewJsonReader::_read_one_message(std::unique_ptr<uint8_t[]>* file_buf, si
     }
     return Status::OK();
 }
+// ---------SIMDJSON----------
+// simdjson, replace none simdjson function if it is ready
+Status NewJsonReader::_simdjson_init_reader() {
+    RETURN_IF_ERROR(_get_range_params());
+
+    RETURN_IF_ERROR(_open_file_reader());
+    if (_read_json_by_line) {
+        RETURN_IF_ERROR(_open_line_reader());
+    }
+
+    // generate _parsed_jsonpaths and _parsed_json_root
+    RETURN_IF_ERROR(_parse_jsonpath_and_json_root());
+
+    //improve performance
+    if (_parsed_jsonpaths.empty() || _is_dynamic_schema) { // input is a 
simple json-string
+        _vhandle_json_callback = _is_dynamic_schema ? 
&NewJsonReader::_vhandle_dynamic_json
+                                                    : 
&NewJsonReader::_simdjson_handle_simple_json;
+    } else { // input is a complex json-string and a json-path
+        if (_strip_outer_array) {
+            _vhandle_json_callback = 
&NewJsonReader::_simdjson_handle_flat_array_complex_json;
+        } else {
+            _vhandle_json_callback = 
&NewJsonReader::_simdjson_handle_nested_complex_json;
+        }
+    }
+    if (_is_dynamic_schema) {
+        _json_parser = 
std::make_unique<vectorized::JSONDataParser<vectorized::SimdJSONParser>>();
+    }
+    _ondemand_json_parser = std::make_unique<simdjson::ondemand::parser>();
+    for (int i = 0; i < _file_slot_descs.size(); ++i) {
+        _slot_desc_index.emplace(_file_slot_descs[i]->col_name(), i);
+    }
+    _simdjson_ondemand_padding_buffer.resize(_padded_size);
+    return Status::OK();
+}
+
+Status 
NewJsonReader::_simdjson_handle_simple_json(std::vector<MutableColumnPtr>& 
columns,
+                                                   const 
std::vector<SlotDescriptor*>& slot_descs,
+                                                   bool* is_empty_row, bool* 
eof) {
+    // simple json
+    simdjson::ondemand::object objectValue;
+    size_t num_rows = columns[0]->size();
+    do {
+        bool valid = false;
+        try {
+            if (_next_row >= _total_rows) { // parse json and generic document
+                Status st = _simdjson_parse_json(is_empty_row, eof);
+                if (st.is<DATA_QUALITY_ERROR>()) {
+                    continue; // continue to read next
+                }
+                RETURN_IF_ERROR(st);
+                if (*is_empty_row == true) {
+                    return Status::OK();
+                }
+                if (_json_value.type() == 
simdjson::ondemand::json_type::array) {
+                    _array = _json_value.get_array();
+                    _array_iter = _array.begin();
+
+                    _total_rows = _array.count_elements();
+                    if (_total_rows == 0) {
+                        // may be passing an empty json, such as "[]"
+                        RETURN_IF_ERROR(_append_error_msg(nullptr, "Empty json 
line", "", nullptr));
+                        if (*_scanner_eof) {
+                            *is_empty_row = true;
+                            return Status::OK();
+                        }
+                        continue;
+                    }
+                } else {
+                    _total_rows = 1; // only one row
+                    objectValue = _json_value;
+                }
+                _next_row = 0;
+            }
+
+            if (_json_value.type() == simdjson::ondemand::json_type::array) { 
// handle case 1
+                objectValue = *_array_iter;
+                RETURN_IF_ERROR(
+                        _simdjson_set_column_value(&objectValue, columns, 
slot_descs, &valid));
+                if (_array_iter == _array.end()) {
+                    // Hint to read next json doc
+                    _next_row = _total_rows + 1;
+                    break;
+                }
+                ++_array_iter;
+            } else { // handle case 2
+                // objectValue = _json_value.get_object();
+                RETURN_IF_ERROR(
+                        _simdjson_set_column_value(&objectValue, columns, 
slot_descs, &valid));
+            }
+            _next_row++;
+            if (!valid) {
+                if (*_scanner_eof) {
+                    // When _scanner_eof is true and valid is false, it means 
that we have encountered
+                    // unqualified data and decided to stop the scan.
+                    *is_empty_row = true;
+                    return Status::OK();
+                }
+                continue;
+            }
+            *is_empty_row = false;
+            break; // get a valid row, then break
+        } catch (simdjson::simdjson_error& e) {
+            // prevent from endless loop
+            _next_row = _total_rows + 1;
+            fmt::memory_buffer error_msg;
+            fmt::format_to(error_msg, "Parse json data for array failed. code: 
{}, error info: {}",
+                           e.error(), e.what());
+            RETURN_IF_ERROR(_state->append_error_msg_to_file(
+                    [&]() -> std::string { return ""; },
+                    [&]() -> std::string { return fmt::to_string(error_msg); 
}, eof));
+            _counter->num_rows_filtered++;
+            // Before continuing to process other rows, we need to first clean 
the fail parsed row.
+            for (int i = 0; i < columns.size(); ++i) {
+                if (columns[i]->size() > num_rows) {
+                    columns[i]->pop_back(columns[i]->size() - num_rows);
+                }
+            }
+            if (!valid) {
+                if (*_scanner_eof) {
+                    // When _scanner_eof is true and valid is false, it means 
that we have encountered
+                    // unqualified data and decided to stop the scan.
+                    *is_empty_row = true;
+                    return Status::OK();
+                }
+                continue;
+            }
+            continue;
+        }
+    } while (_next_row <= _total_rows);
+    return Status::OK();
+}
+
+Status NewJsonReader::_simdjson_handle_flat_array_complex_json(
+        std::vector<MutableColumnPtr>& columns, const 
std::vector<SlotDescriptor*>& slot_descs,
+        bool* is_empty_row, bool* eof) {
+// Advance one row in array list, if it is the endpoint, stop advance and 
break the loop
+#define ADVANCE_ROW()                  \
+    if (_array_iter == _array.end()) { \
+        _next_row = _total_rows + 1;   \
+        break;                         \
+    }                                  \
+    ++_array_iter;                     \
+    ++_next_row;
+
+    // array complex json
+    size_t num_rows = columns[0]->size();
+    simdjson::ondemand::object cur;
+    do {
+        try {
+            if (_next_row >= _total_rows) {
+                Status st = _simdjson_parse_json(is_empty_row, eof);
+                if (st.is<DATA_QUALITY_ERROR>()) {
+                    continue; // continue to read next
+                }
+                RETURN_IF_ERROR(st);
+                if (*is_empty_row == true) {
+                    if (st == Status::OK()) {
+                        return Status::OK();
+                    }
+                    if (_total_rows == 0) {
+                        continue;
+                    }
+                }
+                _array = _json_value.get_array();
+                _array_iter = _array.begin();
+            }
+
+            bool valid = true;
+            cur = (*_array_iter).get_object();
+            // extract root
+            if (_parsed_json_root.size() != 0) {
+                simdjson::ondemand::value val;
+                Status st = JsonFunctions::extract_from_object(cur, 
_parsed_json_root, &val);
+                if (UNLIKELY(!st.ok())) {
+                    if (st.is_not_found()) {
+                        RETURN_IF_ERROR(
+                                _append_error_msg(nullptr, "JsonPath not 
found", "", nullptr));
+                        ADVANCE_ROW();
+                        continue;
+                    }
+                    return st;
+                }
+                if (val.type() != simdjson::ondemand::json_type::object) {
+                    RETURN_IF_ERROR(_append_error_msg(nullptr, "Not object 
item", "", nullptr));
+                    ADVANCE_ROW();
+                    continue;
+                }
+                cur = val.get_object();
+            }
+            RETURN_IF_ERROR(_simdjson_write_columns_by_jsonpath(&cur, 
slot_descs, columns, &valid));
+            ADVANCE_ROW();
+            if (!valid) {
+                continue; // process next line
+            }
+            *is_empty_row = false;
+            break; // get a valid row, then break
+        } catch (simdjson::simdjson_error& e) {
+            // prevent from endless loop
+            _next_row = _total_rows + 1;
+            fmt::memory_buffer error_msg;
+            fmt::format_to(error_msg, "Parse json data failed. code: {}, error 
info: {}", e.error(),
+                           e.what());
+            RETURN_IF_ERROR(_state->append_error_msg_to_file(
+                    [&]() -> std::string { return ""; },
+                    [&]() -> std::string { return fmt::to_string(error_msg); 
}, eof));
+            _counter->num_rows_filtered++;
+            // Before continuing to process other rows, we need to first clean 
the fail parsed row.
+            for (int i = 0; i < columns.size(); ++i) {
+                if (columns[i]->size() > num_rows) {
+                    columns[i]->pop_back(columns[i]->size() - num_rows);
+                }
+            }
+            if (*_scanner_eof) {
+                // When _scanner_eof is true and valid is false, it means that 
we have encountered
+                // unqualified data and decided to stop the scan.
+                *is_empty_row = true;
+                return Status::OK();
+            }
+            continue;
+        }
+    } while (_next_row <= _total_rows);
+    return Status::OK();
+}
+
+Status NewJsonReader::_simdjson_handle_nested_complex_json(
+        std::vector<MutableColumnPtr>& columns, const 
std::vector<SlotDescriptor*>& slot_descs,
+        bool* is_empty_row, bool* eof) {
+    // nested complex json
+    while (true) {
+        size_t num_rows = columns[0]->size();
+        simdjson::ondemand::object cur;
+        try {
+            Status st = _simdjson_parse_json(is_empty_row, eof);
+            if (st.is<DATA_QUALITY_ERROR>()) {
+                continue; // continue to read next
+            }
+            RETURN_IF_ERROR(st);
+            if (*is_empty_row == true) {
+                return Status::OK();
+            }
+            *is_empty_row = false;
+            bool valid = true;
+            if (_json_value.type() != simdjson::ondemand::json_type::object) {
+                RETURN_IF_ERROR(_append_error_msg(nullptr, "Not object item", 
"", nullptr));
+                continue;
+            }
+            cur = _json_value.get_object();
+            st = _simdjson_write_columns_by_jsonpath(&cur, slot_descs, 
columns, &valid);
+            if (!st.ok()) {
+                RETURN_IF_ERROR(_append_error_msg(nullptr, st.to_string(), "", 
nullptr));
+                // Before continuing to process other rows, we need to first 
clean the fail parsed row.
+                for (int i = 0; i < columns.size(); ++i) {
+                    if (columns[i]->size() > num_rows) {
+                        columns[i]->pop_back(columns[i]->size() - num_rows);
+                    }
+                }
+                continue;
+            }
+            if (!valid) {
+                // there is only one line in this case, so if it return false, 
just set is_empty_row true
+                // so that the caller will continue reading next line.
+                *is_empty_row = true;
+            }
+            break; // read a valid row
+        } catch (simdjson::simdjson_error& e) {
+            fmt::memory_buffer error_msg;
+            fmt::format_to(error_msg, "Parse json data failed. code: {}, error 
info: {}", e.error(),
+                           e.what());
+            RETURN_IF_ERROR(_state->append_error_msg_to_file(
+                    [&]() -> std::string { return ""; },
+                    [&]() -> std::string { return fmt::to_string(error_msg); 
}, eof));
+            _counter->num_rows_filtered++;
+            // Before continuing to process other rows, we need to first clean 
the fail parsed row.
+            for (int i = 0; i < columns.size(); ++i) {
+                if (columns[i]->size() > num_rows) {
+                    columns[i]->pop_back(columns[i]->size() - num_rows);
+                }
+            }
+            if (*_scanner_eof) {
+                // When _scanner_eof is true and valid is false, it means that 
we have encountered
+                // unqualified data and decided to stop the scan.
+                *is_empty_row = true;
+                return Status::OK();
+            }
+            continue;
+        }
+    }
+    return Status::OK();
+}
+
+Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* 
value,
+                                                 
std::vector<MutableColumnPtr>& columns,
+                                                 const 
std::vector<SlotDescriptor*>& slot_descs,
+                                                 bool* valid) {
+    // set
+    size_t cur_row_count = columns[0]->size();
+    bool has_valid_value = false;
+    // iterate through object, simdjson::ondemond will parsing on the fly
+    for (auto field : *value) {
+        std::string_view key = field.unescaped_key();
+        auto iter = _slot_desc_index.find(std::string(key));
+        if (iter == _slot_desc_index.end()) {
+            // This key is not exist in slot desc, just ignore
+            continue;
+        }
+        simdjson::ondemand::value val = field.value();
+        RETURN_IF_ERROR(_simdjson_write_data_to_column(val, 
slot_descs[iter->second],
+                                                       
columns[iter->second].get(), valid));
+        if (!(*valid)) {
+            return Status::OK();
+        }
+        has_valid_value = true;
+    }
+    if (!has_valid_value) {
+        RETURN_IF_ERROR(
+                _append_error_msg(value, "All fields is null, this is a 
invalid row.", "", valid));
+        return Status::OK();
+    }
+
+    // fill missing slot
+    int nullcount = 0;
+    int ctx_idx = 0;
+    for (auto slot_desc : slot_descs) {
+        if (!slot_desc->is_materialized()) {
+            continue;
+        }
+        int dest_index = ctx_idx++;
+        auto* column_ptr = columns[dest_index].get();
+        if (column_ptr->size() < cur_row_count + 1) {
+            DCHECK(column_ptr->size() == cur_row_count);
+            column_ptr->assume_mutable()->insert_default();
+            ++nullcount;
+        }
+        DCHECK(column_ptr->size() == cur_row_count + 1);
+    }
+    // There is at least one valid value here
+    DCHECK(nullcount < columns.size());
+    *valid = true;
+    return Status::OK();
+}
+
+Status 
NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& value,
+                                                     SlotDescriptor* slot_desc,
+                                                     vectorized::IColumn* 
column, bool* valid) {
+    // write
+    vectorized::ColumnNullable* nullable_column = nullptr;
+    vectorized::IColumn* column_ptr = nullptr;
+    if (slot_desc->is_nullable()) {
+        nullable_column = assert_cast<vectorized::ColumnNullable*>(column);
+        column_ptr = &nullable_column->get_nested_column();
+    }
+    // TODO: if the vexpr can support another 'slot_desc type' than 
'TYPE_VARCHAR',
+    // we need use a function to support these types to insert data in columns.
+    ColumnString* column_string = assert_cast<ColumnString*>(column_ptr);
+    switch (value.type()) {
+    case simdjson::ondemand::json_type::null: {
+        if (column->is_nullable()) {
+            // insert_default already push 1 to null_map
+            nullable_column->insert_default();
+        } else {
+            RETURN_IF_ERROR(_append_error_msg(
+                    nullptr, "Json value is null, but the column `{}` is not 
nullable.",
+                    slot_desc->col_name(), valid));
+            return Status::OK();
+        }
+        break;
+    }
+    case simdjson::ondemand::json_type::boolean: {
+        nullable_column->get_null_map_data().push_back(0);
+        if (value.get_bool()) {
+            column_string->insert_data("1", 1);
+        } else {
+            column_string->insert_data("0", 1);
+        }
+        break;
+    }
+    case simdjson::ondemand::json_type::object:
+    case simdjson::ondemand::json_type::array: {
+        auto str_view = simdjson::to_json_string(value).value();
+        std::string value_str(str_view.data(), str_view.size());
+        // compact json value
+        value_str.erase(std::remove_if(value_str.begin(), value_str.end(),
+                                       [](const char& c) {
+                                           // white space
+                                           return c == ' ' || c == '\t' || c 
== '\n' || c == '\r' ||
+                                                  c == '\f' || c == '\v';
+                                       }),
+                        value_str.end());
+        nullable_column->get_null_map_data().push_back(0);
+        column_string->insert_data(value_str.data(), value_str.length());
+        break;
+    }
+    default: {
+        auto str_view = simdjson::to_json_string(value).value();
+        if (value.type() == simdjson::ondemand::json_type::string) {
+            // trim
+            str_view = str_view.substr(1, str_view.length() - 2);
+        }
+        nullable_column->get_null_map_data().push_back(0);
+        column_string->insert_data(str_view.data(), str_view.length());
+    }
+    }
+    *valid = true;
+    return Status::OK();
+}
+
+Status NewJsonReader::_append_error_msg(simdjson::ondemand::object* obj, 
std::string error_msg,
+                                        std::string col_name, bool* valid) {
+    std::string err_msg;
+    if (!col_name.empty()) {
+        fmt::memory_buffer error_buf;
+        fmt::format_to(error_buf, error_msg, col_name);
+        err_msg = fmt::to_string(error_buf);
+    } else {
+        err_msg = error_msg;
+    }
+
+    RETURN_IF_ERROR(_state->append_error_msg_to_file(
+            [&]() -> std::string {
+                if (!obj) {
+                    return "";
+                }
+                std::string_view str_view;
+                (void)!obj->raw_json().get(str_view);
+                return std::string(str_view.data(), str_view.size());
+            },
+            [&]() -> std::string { return err_msg; }, _scanner_eof));
+
+    _counter->num_rows_filtered++;
+    if (valid != nullptr) {
+        // current row is invalid
+        *valid = false;
+    }
+    return Status::OK();
+}
+
+Status NewJsonReader::_simdjson_parse_json(bool* is_empty_row, bool* eof) {
+    size_t size = 0;
+    RETURN_IF_ERROR(_simdjson_parse_json_doc(&size, eof));
+
+    // read all data, then return
+    if (size == 0 || *eof) {
+        *is_empty_row = true;
+        return Status::OK();
+    }
+
+    if (!_parsed_jsonpaths.empty() && _strip_outer_array) {
+        _total_rows = _json_value.count_elements().value();
+        _next_row = 0;
+
+        if (_total_rows == 0) {
+            // meet an empty json array.
+            *is_empty_row = true;
+        }
+    }
+    return Status::OK();
+}
+Status NewJsonReader::_simdjson_parse_json_doc(size_t* size, bool* eof) {
+    // read a whole message
+    SCOPED_TIMER(_file_read_timer);
+    const uint8_t* json_str = nullptr;
+    std::unique_ptr<uint8_t[]> json_str_ptr;
+    if (_line_reader != nullptr) {
+        RETURN_IF_ERROR(_line_reader->read_line(&json_str, size, eof));
+    } else {
+        size_t length = 0;
+        RETURN_IF_ERROR(_read_one_message(&json_str_ptr, &length));
+        json_str = json_str_ptr.get();
+        *size = length;
+        if (length == 0) {
+            *eof = true;
+        }
+    }
+
+    _bytes_read_counter += *size;
+    if (*eof) {
+        return Status::OK();
+    }
+    if (*size + simdjson::SIMDJSON_PADDING > _padded_size) {
+        // For efficiency reasons, simdjson requires a string with a few bytes 
(simdjson::SIMDJSON_PADDING) at the end.
+        // Hence, a re-allocation is needed if the space is not enough.
+        _simdjson_ondemand_padding_buffer.resize(*size + 
simdjson::SIMDJSON_PADDING);
+        _padded_size = *size + simdjson::SIMDJSON_PADDING;
+    }
+    memcpy(&_simdjson_ondemand_padding_buffer.front(), json_str, *size);
+    auto error =
+            _ondemand_json_parser
+                    
->iterate(std::string_view(_simdjson_ondemand_padding_buffer.data(), *size),
+                              _padded_size)
+                    .get(_original_json_doc);
+    auto return_quality_error = [&](fmt::memory_buffer& error_msg,
+                                    const std::string& doc_info) -> Status {
+        RETURN_IF_ERROR(_state->append_error_msg_to_file(
+                [&]() -> std::string { return doc_info; },
+                [&]() -> std::string { return fmt::to_string(error_msg); }, 
_scanner_eof));
+        _counter->num_rows_filtered++;
+        if (*_scanner_eof) {
+            // Case A: if _scanner_eof is set to true in 
"append_error_msg_to_file", which means
+            // we meet enough invalid rows and the scanner should be stopped.
+            // So we set eof to true and return OK, the caller will stop the 
process as we meet the end of file.
+            *eof = true;
+            return Status::OK();
+        }
+        return Status::DataQualityError(fmt::to_string(error_msg));
+    };
+    if (error != simdjson::error_code::SUCCESS) {
+        fmt::memory_buffer error_msg;
+        fmt::format_to(error_msg, "Parse json data for JsonDoc failed. code: 
{}, error info: {}",
+                       error, simdjson::error_message(error));
+        return return_quality_error(error_msg, std::string((char*)json_str, 
*size));
+    }
+    try {
+        // set json root
+        // if it is an array at top level, then we should iterate the entire 
array in
+        // ::_simdjson_handle_flat_array_complex_json
+        if (_parsed_json_root.size() != 0 &&
+            _original_json_doc.type() == 
simdjson::ondemand::json_type::object) {
+            simdjson::ondemand::object object = _original_json_doc;
+            Status st = JsonFunctions::extract_from_object(object, 
_parsed_json_root, &_json_value);
+            if (!st.ok()) {
+                fmt::memory_buffer error_msg;
+                fmt::format_to(error_msg, "{}", st.to_string());
+                return return_quality_error(error_msg, 
std::string((char*)json_str, *size));
+            }
+        } else {
+            _json_value = _original_json_doc;
+        }
+    } catch (simdjson::simdjson_error& e) {
+        fmt::memory_buffer error_msg;
+        fmt::format_to(error_msg, "Encounter error while extract_from_object, 
error: {}", e.what());
+        return return_quality_error(error_msg, std::string((char*)json_str, 
*size));
+    }
+
+    if (_json_value.type() == simdjson::ondemand::json_type::array && 
!_strip_outer_array) {
+        fmt::memory_buffer error_msg;
+        fmt::format_to(error_msg, "{}",
+                       "JSON data is array-object, `strip_outer_array` must be 
TRUE.");
+        return return_quality_error(error_msg, std::string((char*)json_str, 
*size));
+    }
+
+    if (_json_value.type() != simdjson::ondemand::json_type::array && 
_strip_outer_array) {
+        fmt::memory_buffer error_msg;
+        fmt::format_to(error_msg, "{}",
+                       "JSON data is not an array-object, `strip_outer_array` 
must be FALSE.");
+        return return_quality_error(error_msg, std::string((char*)json_str, 
*size));
+    }
+    return Status::OK();
+}
+
+Status NewJsonReader::_simdjson_write_columns_by_jsonpath(
+        simdjson::ondemand::object* value, const std::vector<SlotDescriptor*>& 
slot_descs,
+        std::vector<MutableColumnPtr>& columns, bool* valid) {
+    // write by jsonpath
+    size_t column_num = slot_descs.size();
+    bool has_valid_value = false;
+    size_t cur_row_count = columns[0]->size();
+    for (size_t i = 0; i < column_num; i++) {
+        auto* column_ptr = columns[i].get();
+        simdjson::ondemand::value json_value;
+        Status st;
+        if (i < _parsed_jsonpaths.size()) {
+            st = JsonFunctions::extract_from_object(*value, 
_parsed_jsonpaths[i], &json_value);
+            if (!st.ok() && !st.is<NOT_FOUND>()) {
+                return st;
+            }
+        }
+        if (i >= _parsed_jsonpaths.size() || st.is<NOT_FOUND>()) {
+            // not match in jsondata.
+            if (!slot_descs[i]->is_nullable()) {
+                RETURN_IF_ERROR(_append_error_msg(
+                        value, "The column `{}` is not nullable, but it's not 
found in jsondata.",
+                        slot_descs[i]->col_name(), valid));
+                return Status::OK();
+            }
+        } else {
+            RETURN_IF_ERROR(
+                    _simdjson_write_data_to_column(json_value, slot_descs[i], 
column_ptr, valid));
+            if (!(*valid)) {
+                return Status::OK();
+            }
+            has_valid_value = true;
+        }
+    }
+    if (!has_valid_value) {
+        RETURN_IF_ERROR(
+                _append_error_msg(value, "All fields is null, this is a 
invalid row.", "", valid));
+        return Status::OK();
+    }
+
+    // fill missing slot
+    int ctx_idx = 0;
+    int nullcount = 0;
+    for (auto slot_desc : slot_descs) {
+        if (!slot_desc->is_materialized()) {
+            continue;
+        }
+        int dest_index = ctx_idx++;
+        auto* column_ptr = columns[dest_index].get();
+        if (column_ptr->size() < cur_row_count + 1) {
+            DCHECK(column_ptr->size() == cur_row_count);
+            column_ptr->assume_mutable()->insert_default();
+            ++nullcount;
+        }
+        DCHECK(column_ptr->size() == cur_row_count + 1);
+    }
+    // There is at least one valid value here
+    DCHECK(nullcount < columns.size());
+    *valid = true;
+    return Status::OK();
+}
+
 } // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/json/new_json_reader.h 
b/be/src/vec/exec/format/json/new_json_reader.h
index 0c14342204..42737f7094 100644
--- a/be/src/vec/exec/format/json/new_json_reader.h
+++ b/be/src/vec/exec/format/json/new_json_reader.h
@@ -107,6 +107,37 @@ private:
 
     Status _read_one_message(std::unique_ptr<uint8_t[]>* file_buf, size_t* 
read_size);
 
+    // simdjson, replace none simdjson function if it is ready
+    Status _simdjson_init_reader();
+    Status _simdjson_parse_json(bool* is_empty_row, bool* eof);
+    Status _simdjson_parse_json_doc(size_t* size, bool* eof);
+
+    Status _simdjson_handle_simple_json(std::vector<MutableColumnPtr>& columns,
+                                        const std::vector<SlotDescriptor*>& 
slot_descs,
+                                        bool* is_empty_row, bool* eof);
+
+    Status 
_simdjson_handle_flat_array_complex_json(std::vector<MutableColumnPtr>& columns,
+                                                    const 
std::vector<SlotDescriptor*>& slot_descs,
+                                                    bool* is_empty_row, bool* 
eof);
+
+    Status _simdjson_handle_nested_complex_json(std::vector<MutableColumnPtr>& 
columns,
+                                                const 
std::vector<SlotDescriptor*>& slot_descs,
+                                                bool* is_empty_row, bool* eof);
+
+    Status _simdjson_set_column_value(simdjson::ondemand::object* value,
+                                      std::vector<MutableColumnPtr>& columns,
+                                      const std::vector<SlotDescriptor*>& 
slot_descs, bool* valid);
+
+    Status _simdjson_write_data_to_column(simdjson::ondemand::value& value,
+                                          SlotDescriptor* slot_desc,
+                                          vectorized::IColumn* column_ptr, 
bool* valid);
+
+    Status _simdjson_write_columns_by_jsonpath(simdjson::ondemand::object* 
value,
+                                               const 
std::vector<SlotDescriptor*>& slot_descs,
+                                               std::vector<MutableColumnPtr>& 
columns, bool* valid);
+    Status _append_error_msg(simdjson::ondemand::object* obj, std::string 
error_msg,
+                             std::string col_name, bool* valid);
+
     Status (NewJsonReader::*_vhandle_json_callback)(
             std::vector<vectorized::MutableColumnPtr>& columns,
             const std::vector<SlotDescriptor*>& slot_descs, bool* 
is_empty_row, bool* eof);
@@ -163,7 +194,22 @@ private:
     RuntimeProfile::Counter* _file_read_timer;
 
     bool _is_dynamic_schema = false;
+    // name mapping
+    phmap::flat_hash_map<String, size_t> _slot_desc_index;
+    // simdjson
+    static constexpr size_t _init_buffer_size = 1024 * 1024 * 8;
+    size_t _padded_size = _init_buffer_size + simdjson::SIMDJSON_PADDING;
+    std::string _simdjson_ondemand_padding_buffer;
+    // char _simdjson_ondemand_padding_buffer[_padded_size];
+    simdjson::ondemand::document _original_json_doc;
+    simdjson::ondemand::value _json_value;
+    // for strip outer array
+    // array_iter pointed to _array
+    simdjson::ondemand::array_iterator _array_iter;
+    simdjson::ondemand::array _array;
     std::unique_ptr<JSONDataParser<SimdJSONParser>> _json_parser;
+    std::unique_ptr<simdjson::ondemand::parser> _ondemand_json_parser = 
nullptr;
 };
+
 } // namespace vectorized
 } // namespace doris
diff --git a/docs/en/docs/admin-manual/config/be-config.md 
b/docs/en/docs/admin-manual/config/be-config.md
index 180e36f5b7..e224649bd9 100644
--- a/docs/en/docs/admin-manual/config/be-config.md
+++ b/docs/en/docs/admin-manual/config/be-config.md
@@ -1415,4 +1415,9 @@ Indicates how many tablets failed to load in the data 
directory. At the same tim
 * Description: Whether parse multidimensional array, if false encountering 
will return ERROR
 * Default value: true
 
+#### `enable_simdjson_reader`
+
+* Description: Whether enable simdjson to parse json while stream load
+* Default value: false
+
 </version>
diff --git a/docs/zh-CN/docs/admin-manual/config/be-config.md 
b/docs/zh-CN/docs/admin-manual/config/be-config.md
index 734e264b40..65531b8824 100644
--- a/docs/zh-CN/docs/admin-manual/config/be-config.md
+++ b/docs/zh-CN/docs/admin-manual/config/be-config.md
@@ -1433,4 +1433,9 @@ load tablets from header failed, failed tablets size: 
xxx, path=xxx
 * 描述: 在动态表中是否解析多维数组,如果是false遇到多维数组则会报错。
 * 默认值: true
 
+#### `enable_simdjson_reader`
+
+* 描述: 是否在导入json数据时用simdjson来解析。
+* 默认值: false
+
 </version>
diff --git a/regression-test/data/load_p0/stream_load/invalid_json.json 
b/regression-test/data/load_p0/stream_load/invalid_json.json
index 4ee59e809e..f6f945893a 100644
--- a/regression-test/data/load_p0/stream_load/invalid_json.json
+++ b/regression-test/data/load_p0/stream_load/invalid_json.json
@@ -1,3 +1,40 @@
 {"no": 1, "item: {"id": 1, "city": "beijing", "code": 2345671}}
 {"no": 2, "item": {}}
 {"no": 2, "item": {"id":"123}}
+{"no": 2, item": {"id":"123}}
+{"no": 2, "item": ["id":"123'}
+{hno": 2, "item": ["id":"123'}
+hno": 2, [],"item": ["id":"123'}
+hno": , [],"item": ["id":"123'}
+{"no": 2, "item": {"id", }}
+{"no": 2, "item": {"id", ""}}
+{"no": 2, "item": {"id" : "123", ""}}
+{"no": 2, "item": {"id" : "123", "}}
+{"no": 2, "item": [{"id" : "123"}]}
+{"no": 2, "item": [{"id" : "123"}]}
+{
+}
+{"ni",}
+}{
+-
++
+/
+"{}"
+{1}
+{[12]}
+{1:"1["}
+{"no" : 1, "item" : {"id"}}}
+{"no" : 1, "item" : "id"}}}
+{"no" : 1, "item" : ["id"}}}
+"
+["1"]
+[
+]
+{}
+null
+{null}
+{"no" : 1, "item" : {"id" : 1921}}"}
+{"no" : 1, "item" : {"id" : 1921}}
+{"no" : "xxx", "item" : {"x":"123", "id" : 1281111}}
+{"no" : 1.22, "texx": "111",  "item" : {"x":"123", "id" : 17117171}}
+{"no" : 10011.0, "texx": "111",  "item" : {"id" : null}, "item" : 191218}
\ No newline at end of file
diff --git a/regression-test/data/load_p0/stream_load/invalid_json_array.json 
b/regression-test/data/load_p0/stream_load/invalid_json_array.json
index 7f1e5f1884..e98771f034 100644
--- a/regression-test/data/load_p0/stream_load/invalid_json_array.json
+++ b/regression-test/data/load_p0/stream_load/invalid_json_array.json
@@ -6,6 +6,10 @@
     {"v6": "6514.405051", "k4": "6000", "k3": "600", "k2": "60", "k1": "6", 
"v3": "obdrei", "v2": "m", "v1": "2010-01-06", "k5": "2016-01-01 00:00:00", 
"v5": "882.708491", "v4": "921.867848"},
     {"v6": "8604.198677", "k4": "7000", "k3": "700", "k2": "70", "k1": "7", 
"v3": "cuobdhvrgkugknj", "v2": "a", "v1": "2010-01-07", "k5": "2017-01-01 
00:00:00", "v5": "209.420112", "v4": "141.656421"},
     {"v6": "7784.859446", "k4": "8000", "k3": "800", "k2": "80", "k1": "8", 
"v3": "phcxztwgjllhmj", "v2": "z", "v1": "2010-01-08", "k5": "2018-01-01 
00:00:00", "v5": "285.664871", "v4": "762.813376"},
-    {"v6": "4846.735593", "k4": "9000", "k3": "900", "k2": "90", "k1": "9", 
"v3": "nbarqjwilbkelk", "v2": "b", "v1": "2010-01-09", "k5": "2019-01-01 
00:00:00", "v5": "535.285510", "v4": "92.702403"},
-    {"v6": "7996.434686", "k4": "10000", "k3": "1000", "k2": "100", "k1": 
"10", "v3": "zucprgdnlgzzfl", "v2": "s", "v1": "2010-01-10", "k5": "2020-01-01 
00:00:00", "v5": "155.861217", "v4": "26.874738"},]
+    {"v6": "4846.735593", "k4": "9000", "k3": "900", "k2": "90", "k1": "9", 
"v3": "nbarqjwilbkelk" "v2": "b", "v1": "2010-01-09", "k5": "2019-01-01 
00:00:00", "v5": "535.285510", "v4": "92.702403"},
+    {"v6": 4846.735593", "k4": "9000", "k3": "900", "k2": "90", "k1": "9", 
"v3": "nbarqjwilbkelk" "v2": "b", "v1": "2010-01-09", "k5": "2019-01-01 
00:00:00", "v5": "535.285510", "v4": "92.702403"},
+    {"v6": 4846.735593", "k4": "9000", "k3": "900, "k2": "90", "k1": "9", 
"v3": "nbarqjwilbkelk" "v2": "b", "v1": "2010-01-09", "k5": "2019-01-01 
00:00:00", "v5": "535.285510", "v4": "92.702403"},
+    {"v6": 4846.735593", "k4": "9000", "k3": "900, "k2":} "90", "k1": "9", 
"v3": "nbarqjwilbkelk" "v2": "b", "v1": "2010-01-09", "k5": "2019-01-01 
00:00:00", "v5": "535.285510", "v4": "92.702403"},
+    {"v6": 4846.735593", "k4": "9000",}"k3": "900, "k2":} "90", "k1": "9", 
"v3": "nbarqjwilbkelk" "v2": "b", "v1": "2010-01-09", "k5": "2019-01-01 
00:00:00", "v5": "535.285510", "v4": "92.702403"},
+    {"v6": "7996.434686", "k4": "10000", "k3": "1000", "k2": "100", "k1": 
"10", "v3": "zucprgdnlgzzfl", "v2": "s", "v1": "2010-01-10", "k5": "2020-01-01 
00:00:00", "v5": "155.861217", "v4": "26.874738"}]
 
diff --git a/regression-test/data/load_p0/stream_load/invalid_json_array.json 
b/regression-test/data/load_p0/stream_load/invalid_json_array1.json
similarity index 57%
copy from regression-test/data/load_p0/stream_load/invalid_json_array.json
copy to regression-test/data/load_p0/stream_load/invalid_json_array1.json
index 7f1e5f1884..293831d8b6 100644
--- a/regression-test/data/load_p0/stream_load/invalid_json_array.json
+++ b/regression-test/data/load_p0/stream_load/invalid_json_array1.json
@@ -1,4 +1,4 @@
- [   {"v6": "7395.231067", "k4": "1000", "k3": "100", "k2": "10", "k1": "1", 
"v3": "ynqnzeowymt", "v2": "t", "v1": "2010-01-01", "k5": "2011-01-01 
00:00:00", "v5": "180.998031", "v4": "38.638843"},
+ { "item":  [{"v6": "7395.231067", "k4": "1000", "k3": "100", "k2": "10", 
"k1": "1", "v3": "ynqnzeowymt", "v2": "t", "v1": "2010-01-01", "k5": 
"2011-01-01 00:00:00", "v5": "180.998031", "v4": "38.638843"},
     {"v6": "2080.504502", "k4": "2000", "k3": "200", "k2": "20", "k1": "2", 
"v3": "hfkfwlr", "v2": "f", "v1": "2010-01-02", "k5": "2012-01-01 00:00:00", 
"v5": "539.922834", "v4": "506.044046"},
     {"v6": "4605.253205", "k4": "3000", "k3": "300", "k2": "30", "k1": "3", 
"v3": "uoclasp", "v2": "t", "v1": "2010-01-03", "k5": "2013-01-01 00:00:00", 
"v5": "577.044148", "v4": "377.793209"},
     {"v6": "7291.703724", "k4": "4000", "k3": "400", "k2": "40", "k1": "4", 
"v3": "iswngzeodfhptjzgswsddt", "v2": "n", "v1": "2010-01-04", "k5": 
"2014-01-01 00:00:00", "v5": "919.067864", "v4": "871.354536"},
@@ -6,6 +6,10 @@
     {"v6": "6514.405051", "k4": "6000", "k3": "600", "k2": "60", "k1": "6", 
"v3": "obdrei", "v2": "m", "v1": "2010-01-06", "k5": "2016-01-01 00:00:00", 
"v5": "882.708491", "v4": "921.867848"},
     {"v6": "8604.198677", "k4": "7000", "k3": "700", "k2": "70", "k1": "7", 
"v3": "cuobdhvrgkugknj", "v2": "a", "v1": "2010-01-07", "k5": "2017-01-01 
00:00:00", "v5": "209.420112", "v4": "141.656421"},
     {"v6": "7784.859446", "k4": "8000", "k3": "800", "k2": "80", "k1": "8", 
"v3": "phcxztwgjllhmj", "v2": "z", "v1": "2010-01-08", "k5": "2018-01-01 
00:00:00", "v5": "285.664871", "v4": "762.813376"},
-    {"v6": "4846.735593", "k4": "9000", "k3": "900", "k2": "90", "k1": "9", 
"v3": "nbarqjwilbkelk", "v2": "b", "v1": "2010-01-09", "k5": "2019-01-01 
00:00:00", "v5": "535.285510", "v4": "92.702403"},
-    {"v6": "7996.434686", "k4": "10000", "k3": "1000", "k2": "100", "k1": 
"10", "v3": "zucprgdnlgzzfl", "v2": "s", "v1": "2010-01-10", "k5": "2020-01-01 
00:00:00", "v5": "155.861217", "v4": "26.874738"},]
+    "v6": "4846.735593", "k4": "9000", "k3": "900", "k2": "90", "k1": "9", 
"v3": "nbarqjwilbkelk" "v2": "b", "v1": "2010-01-09", "k5": "2019-01-01 
00:00:00", "v5": "535.285510", "v4": "92.702403"},
+    {"v6": 4846.735593", "k4": "9000", "k3": "900", "k2": "90", "k1": "9", 
"v3": "nbarqjwilbkelk" "v2": "b", "v1": "2010-01-09", "k5": "2019-01-01 
00:00:00", "v5": "535.285510", "v4": "92.702403"},
+    {"v6": 4846.735593", "k4": "9000", "k3": "900, "k2": "90", "k1": "9", 
"v3": "nbarqjwilbkelk" "v2": "b", "v1": "2010-01-09", "k5": "2019-01-01 
00:00:00", "v5": "535.285510", "v4": "92.702403"},
+    {"v6": 4846.735593", "k4": "9000", "k3": "900, "k2":} "90", "k1": "9", 
"v3": "nbarqjwilbkelk" "v2": "b", "v1": "2010-01-09", "k5": "2019-01-01 
00:00:00", "v5": "535.285510", "v4": "92.702403"},
+    {"v6": 4846.735593", "k4": "9000",}"k3": "900, "k2":} "90", "k1": "9", 
"v3": "nbarqjwilbkelk" "v2": "b", "v1": "2010-01-09", "k5": "2019-01-01 
00:00:00", "v5": "535.285510", "v4": "92.702403"},
+    {"v6": "7996.434686", "k4": "10000", "k3": "1000", "k2": "100", "k1": 
"10", "v3": "zucprgdnlgzzfl", "v2": "s", "v1": "2010-01-10", "k5": "2020-01-01 
00:00:00", "v5": "155.861217", "v4": "26.874738"}]}
 
diff --git a/regression-test/data/load_p0/stream_load/invalid_json_array2.json 
b/regression-test/data/load_p0/stream_load/invalid_json_array2.json
new file mode 100644
index 0000000000..a8022b3dbe
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/invalid_json_array2.json
@@ -0,0 +1,121 @@
+{"item" : {
+   "v6": "7395.231067",
+   "k4": "1000",
+   "k3": "100",
+   "k2": "10",
+   "k1": "1",
+   "v3": "ynqnzeowymt",
+   "v2": "t",
+   "v1": "2010-01-01",
+   "k5": "2011-01-01 00:00:00",
+   "v5": "180.998031",
+   "v4": "38.638843"
+},
+{
+   "v6": "2080.504502",
+   "k4": "2000",
+   "k3": "200",
+   "k2": "20",
+   "k1": "2",
+   "v3": "hfkfwlr",
+   "v2": "f",
+   "v1": "2010-01-02",
+   "k5": "2012-01-01 00:00:00",
+   "v5": "539.922834",
+   "v4": "506.044046"
+},
+{
+   "v6": "4605.253205",
+   "k4": "3000",
+   "k3": "300",
+   "k2": "30",
+   "k1": "3",
+   "v3": "uoclasp",
+   "v2": "t",
+   "v1": "2010-01-03",
+   "k5": "2013-01-01 00:00:00",
+   "v5": "577.044148",
+   "v4": "377.793209"
+},
+{
+   "v6": "7291.703724",
+   "k4": "4000",
+   "k3": "400",
+   "k2": "40",
+   "k1": "4",
+   "v3": "iswngzeodfhptjzgswsddt",
+   "v2": "n",
+   "v1": "2010-01-04",
+   "k5": "2014-01-01 00:00:00",
+   "v5": "919.067864",
+   "v4": "871.354536"
+},
+{
+   "fake" : null
+},
+{
+   "v6": "6514.405051",
+   "k4": "6000",
+   "k3": "600",
+   "k2": "60",
+   "k1": "6",
+   "v3": "obdrei",
+   "v2": "m",
+   "v1": "2010-01-06",
+   "k5": "2016-01-01 00:00:00",
+   "v5": "882.708491",
+   "v4": "921.867848"
+},
+{
+   "v6": "8604.198677",
+   "k4": "7000",
+   "k3": "700",
+   "k2": "70",
+   "k1": "7",
+   "v3": "cuobdhvrgkugknj",
+   "v2": "a",
+   "v1": "2010-01-07",
+   "k5": "2017-01-01 00:00:00",
+   "v5": "209.420112",
+   "v4": "141.656421"
+},
+{
+   "v6": "7784.859446",
+   "k4": "8000",
+   "k3": "800",
+   "k2": "80",
+   "k1": "8",
+   "v3": "phcxztwgjllhmj",
+   "v2": "z",
+   "v1": "2010-01-08",
+   "k5": "2018-01-01 00:00:00",
+   "v5": "285.664871",
+   "v4": "762.813376"
+},
+
+   "v6": "4846.735593",
+   "k4": "9000",
+   "k3": "900",
+   "k2": "90",
+   "k1": "9",
+   "v3": "nbarqjwilbkelk",
+   "v2": "b",
+   "v1": "2010-01-09",
+   "k5": "2019-01-01 00:00:00",
+   "v5": "535.285510",
+   "v4": "92.702403"
+},
+{
+   "v6": "7996.434686",
+   "k4": "10000",
+   "k3": "1000",
+   "k2": "100",
+   "k1": "10",
+   "v3": "zucprgdnlgzzfl",
+   "v2": "s",
+   "v1": "2010-01-10",
+   "k5": "2020-01-01 00:00:00",
+   "v5": "155.861217",
+   "v4": "26.874738"
+}
+] }
\ No newline at end of file
diff --git a/regression-test/data/load_p0/stream_load/invalid_json_array3.json 
b/regression-test/data/load_p0/stream_load/invalid_json_array3.json
new file mode 100644
index 0000000000..a53e6e727a
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/invalid_json_array3.json
@@ -0,0 +1,123 @@
+{
+       "item": [{
+                       "v6": "7395.231067",
+                       "k4": "1000",
+                       "k3": "100",
+                       "k2": "10",
+                       "k1": "1",
+                       "v3": "ynqnzeowymt",
+                       "v2": "t",
+                       "v1": "2010-01-01",
+                       "k5": "2011-01-01 00:00:00",
+                       "v5": "180.998031",
+                       "v4": "38.638843"
+               ,
+               {
+                       "v6": "2080.504502",
+                       "k4": "2000",
+                       "k3": "200",
+                       "k2": "20",
+                       "k1": "2",
+                       "v3": "hfkfwlr",
+                       "v2": "f",
+                       "v1": "2010-01-02",
+                       "k5": "2012-01-01 00:00:00",
+                       "v5": "539.922834",
+                       "v4": "506.044046"
+               },
+               {
+                       "v6": "4605.253205",
+                       "k4": "3000",
+                       "k3": "300",
+                       "k2": "30",
+                       "k1": "3",
+                       "v3": "uoclasp",
+                       "v2": "t",
+                       "v1": "2010-01-03",
+                       "k5": "2013-01-01 00:00:00",
+                       "v5": "577.044148",
+                       "v4": "377.793209"
+               },
+               {
+                       "v6": "7291.703724",
+                       "k4": "4000",
+                       "k3": "400",
+                       "k2": "40",
+                       "k1": "4",
+                       "v3": "iswngzeodfhptjzgswsddt",
+                       "v2": "n",
+                       "v1": "2010-01-04",
+                       "k5": "2014-01-01 00:00:00",
+                       "v5": "919.067864",
+                       "v4": "871.354536"
+               },
+               {
+                       "fake": null
+               },
+               {
+                       "v6": "6514.405051",
+                       "k4": "6000",
+                       "k3": "600",
+                       "k2": "60",
+                       "k1": "6",
+                       "v3": "obdrei",
+                       "v2": "m",
+                       "v1": "2010-01-06",
+                       "k5": "2016-01-01 00:00:00",
+                       "v5": "882.708491",
+                       "v4": "921.867848"
+               },
+               {
+                       "v6": "8604.198677",
+                       "k4": "7000",
+                       "k3": "700",
+                       "k2": "70",
+                       "k1": "7",
+                       "v3": "cuobdhvrgkugknj",
+                       "v2": "a",
+                       "v1": "2010-01-07",
+                       "k5": "2017-01-01 00:00:00",
+                       "v5": "209.420112",
+                       "v4": "141.656421"
+               },
+               {
+                       "v6": "7784.859446",
+                       "k4": "8000",
+                       "k3": "800",
+                       "k2": "80",
+                       "k1": "8",
+                       "v3": "phcxztwgjllhmj",
+                       "v2": "z",
+                       "v1": "2010-01-08",
+                       "k5": "2018-01-01 00:00:00",
+                       "v5": "285.664871",
+                       "v4": "762.813376"
+               },
+               {
+                       "v6": "4846.735593",
+                       "k4": "9000",
+                       "k3": "900",
+                       "k2": "90",
+                       "k1": "9",
+                       "v3": "nbarqjwilbkelk",
+                       "v2": "b",
+                       "v1": "2010-01-09",
+                       "k5": "2019-01-01 00:00:00",
+                       "v5": "535.285510",
+                       "v4": "92.702403"
+               },
+               {
+                       "v6": "7996.434686",
+                       "k4": "10000",
+                       "k3": "1000",
+                       "k2": "100",
+                       "k1": "10",
+                       "v3": "zucprgdnlgzzfl",
+                       "v2": "s",
+                       "v1": "2010-01-10",
+                       "k5": "2020-01-01 00:00:00",
+                       "v5": "155.861217",
+                       "v4": "26.874738"
+               }
+       ]
+}
\ No newline at end of file
diff --git a/regression-test/data/load_p0/stream_load/invalid_json_array.json 
b/regression-test/data/load_p0/stream_load/invalid_json_array4.json
similarity index 57%
copy from regression-test/data/load_p0/stream_load/invalid_json_array.json
copy to regression-test/data/load_p0/stream_load/invalid_json_array4.json
index 7f1e5f1884..293831d8b6 100644
--- a/regression-test/data/load_p0/stream_load/invalid_json_array.json
+++ b/regression-test/data/load_p0/stream_load/invalid_json_array4.json
@@ -1,4 +1,4 @@
- [   {"v6": "7395.231067", "k4": "1000", "k3": "100", "k2": "10", "k1": "1", 
"v3": "ynqnzeowymt", "v2": "t", "v1": "2010-01-01", "k5": "2011-01-01 
00:00:00", "v5": "180.998031", "v4": "38.638843"},
+ { "item":  [{"v6": "7395.231067", "k4": "1000", "k3": "100", "k2": "10", 
"k1": "1", "v3": "ynqnzeowymt", "v2": "t", "v1": "2010-01-01", "k5": 
"2011-01-01 00:00:00", "v5": "180.998031", "v4": "38.638843"},
     {"v6": "2080.504502", "k4": "2000", "k3": "200", "k2": "20", "k1": "2", 
"v3": "hfkfwlr", "v2": "f", "v1": "2010-01-02", "k5": "2012-01-01 00:00:00", 
"v5": "539.922834", "v4": "506.044046"},
     {"v6": "4605.253205", "k4": "3000", "k3": "300", "k2": "30", "k1": "3", 
"v3": "uoclasp", "v2": "t", "v1": "2010-01-03", "k5": "2013-01-01 00:00:00", 
"v5": "577.044148", "v4": "377.793209"},
     {"v6": "7291.703724", "k4": "4000", "k3": "400", "k2": "40", "k1": "4", 
"v3": "iswngzeodfhptjzgswsddt", "v2": "n", "v1": "2010-01-04", "k5": 
"2014-01-01 00:00:00", "v5": "919.067864", "v4": "871.354536"},
@@ -6,6 +6,10 @@
     {"v6": "6514.405051", "k4": "6000", "k3": "600", "k2": "60", "k1": "6", 
"v3": "obdrei", "v2": "m", "v1": "2010-01-06", "k5": "2016-01-01 00:00:00", 
"v5": "882.708491", "v4": "921.867848"},
     {"v6": "8604.198677", "k4": "7000", "k3": "700", "k2": "70", "k1": "7", 
"v3": "cuobdhvrgkugknj", "v2": "a", "v1": "2010-01-07", "k5": "2017-01-01 
00:00:00", "v5": "209.420112", "v4": "141.656421"},
     {"v6": "7784.859446", "k4": "8000", "k3": "800", "k2": "80", "k1": "8", 
"v3": "phcxztwgjllhmj", "v2": "z", "v1": "2010-01-08", "k5": "2018-01-01 
00:00:00", "v5": "285.664871", "v4": "762.813376"},
-    {"v6": "4846.735593", "k4": "9000", "k3": "900", "k2": "90", "k1": "9", 
"v3": "nbarqjwilbkelk", "v2": "b", "v1": "2010-01-09", "k5": "2019-01-01 
00:00:00", "v5": "535.285510", "v4": "92.702403"},
-    {"v6": "7996.434686", "k4": "10000", "k3": "1000", "k2": "100", "k1": 
"10", "v3": "zucprgdnlgzzfl", "v2": "s", "v1": "2010-01-10", "k5": "2020-01-01 
00:00:00", "v5": "155.861217", "v4": "26.874738"},]
+    "v6": "4846.735593", "k4": "9000", "k3": "900", "k2": "90", "k1": "9", 
"v3": "nbarqjwilbkelk" "v2": "b", "v1": "2010-01-09", "k5": "2019-01-01 
00:00:00", "v5": "535.285510", "v4": "92.702403"},
+    {"v6": 4846.735593", "k4": "9000", "k3": "900", "k2": "90", "k1": "9", 
"v3": "nbarqjwilbkelk" "v2": "b", "v1": "2010-01-09", "k5": "2019-01-01 
00:00:00", "v5": "535.285510", "v4": "92.702403"},
+    {"v6": 4846.735593", "k4": "9000", "k3": "900, "k2": "90", "k1": "9", 
"v3": "nbarqjwilbkelk" "v2": "b", "v1": "2010-01-09", "k5": "2019-01-01 
00:00:00", "v5": "535.285510", "v4": "92.702403"},
+    {"v6": 4846.735593", "k4": "9000", "k3": "900, "k2":} "90", "k1": "9", 
"v3": "nbarqjwilbkelk" "v2": "b", "v1": "2010-01-09", "k5": "2019-01-01 
00:00:00", "v5": "535.285510", "v4": "92.702403"},
+    {"v6": 4846.735593", "k4": "9000",}"k3": "900, "k2":} "90", "k1": "9", 
"v3": "nbarqjwilbkelk" "v2": "b", "v1": "2010-01-09", "k5": "2019-01-01 
00:00:00", "v5": "535.285510", "v4": "92.702403"},
+    {"v6": "7996.434686", "k4": "10000", "k3": "1000", "k2": "100", "k1": 
"10", "v3": "zucprgdnlgzzfl", "v2": "s", "v1": "2010-01-10", "k5": "2020-01-01 
00:00:00", "v5": "155.861217", "v4": "26.874738"}]}
 
diff --git a/regression-test/data/load_p0/stream_load/invalid_nest_json1.json 
b/regression-test/data/load_p0/stream_load/invalid_nest_json1.json
new file mode 100644
index 0000000000..09caf85733
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/invalid_nest_json1.json
@@ -0,0 +1,5 @@
+{"no": 1, "item": {"id": 1, "city": "beijing", "code": 2345671}}
+{"no": 2, "item": {"id": 2, "city" "shanghai", "code": 2345672}}
+{"no": 3, "item": {"id": 3, "city": "hangzhou", "code": 2345673}}
+{"no": 4, "item": id": 4, "city": "shenzhen", "code": 2345674}}
+{"no": 5, "item": {"id": 5, "city": "guangzhou", "code": 2345675}}
diff --git a/regression-test/data/load_p0/stream_load/invalid_nest_json2.json 
b/regression-test/data/load_p0/stream_load/invalid_nest_json2.json
new file mode 100644
index 0000000000..46da8b855c
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/invalid_nest_json2.json
@@ -0,0 +1,16 @@
+{"no": 1, "item": {"id": 1, "city": "beijing", "code": 2345671}}
+{"no": 2, "item": {"id": 2, "city": "shanghai", "code": 2345672}}
+{"no": 3, "item": ["id": 3, "city"], "hangzhou", "code": 2345673}}
+{"no": 3, "xxxxx": ["id": 3, "city"], "hangzhou", "code": 2345673}}
+{"no": 3, xxxxx": ["id": 3, "city"], "hangzhou", "code": 2345673}}
+{"no": 4, "item": {"id": 4, "city": "shenzhen", "code": 2345674}}
+{"no": 5, "item": {"id": 5, "city": "guangzhou", "code": 2345675}}
+{"no": 5, "item": {"id": 5, "city": ["guangzhou"], "code": 2345675}}
+{"no": 5, "item": {"id": 5, "city": {"guangzhou": 1}, "code": 2345675}}
+{"no": 5, "item": {"id": 5, "city": ["guangzhou", "code": 2345675]]}
+{"no": 5, "item": {"id": 5, "city": ["guangzhou", "code": 2345675}}
+{"no": 5, "item": {"id": 5, "city": {"guangzhou", "code": 2345675}}
+{"no": 5, "item": {"id": 5, "city": {"guangzhou":1, "code": 2345675}}}
+{"no": 5, "item": {"id": 5, "city": "1}}}
+{"no": 5, "item": {"id": 5, "city": "1]}}}
+{"no": 5, ["item": {"id": 5, "city": {"guangzhou": 1, "code": 2345675}]}
diff --git a/regression-test/data/load_p0/stream_load/invalid_nest_json3.json 
b/regression-test/data/load_p0/stream_load/invalid_nest_json3.json
new file mode 100644
index 0000000000..57d8a6fc74
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/invalid_nest_json3.json
@@ -0,0 +1,5 @@
+no": 1, "item": {"id": 1, "city": "beijing", "code": 2345671}}
+{"no": , "item": {"id": 2, "city": "shanghai", "code": 2345672}}
+{"no": 3, "item": ["id": 3, "city": "hangzhou", "code": 2345673}}
+{"no": 4, item": {"id": 4, "city": "shenzhen", "code": 2345674}}
+{"no": 5, "item": {"id": 5, "city": guangzhou", "code": 2345675}}
diff --git 
a/regression-test/data/load_p0/stream_load/invalid_nest_json_array.json 
b/regression-test/data/load_p0/stream_load/invalid_nest_json_array.json
new file mode 100644
index 0000000000..b7874664ad
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/invalid_nest_json_array.json
@@ -0,0 +1,26 @@
+[
+    {
+        "no": 1,
+        "item": {
+            "id": 1,
+            "city": [
+                "zhejiang",
+                "hangzhou",
+                "xihu"
+            ],
+            "code": 2345671
+        }
+    },
+    {
+        "no": 2,
+        "item":
+            "id": 2,
+            "city": [
+                "zhejiang",
+                "hangzhou",
+                "xiaoshan"
+            ],
+            "code": 2345672
+        }
+    }
+]
\ No newline at end of file
diff --git 
a/regression-test/data/load_p0/stream_load/invalid_nest_json_array1.json 
b/regression-test/data/load_p0/stream_load/invalid_nest_json_array1.json
new file mode 100644
index 0000000000..5ebe4ef998
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/invalid_nest_json_array1.json
@@ -0,0 +1,26 @@
+{
+    {
+        "no": 1,
+        "item": {
+            "id": 1,
+            city": [
+                "zhejiang",
+                "hangzhou",
+                "xihu"
+            ],
+            "code": 2345671
+        }
+    },
+    {
+        "no": 2,
+        "item":
+            "id": 2,
+            "city": [
+                "zhejiang",
+                "hangzhou",
+                "xiaoshan"
+            ],
+            "code": 2345672
+        }
+    }
+]
\ No newline at end of file
diff --git 
a/regression-test/data/load_p0/stream_load/invalid_nest_json_array2.json 
b/regression-test/data/load_p0/stream_load/invalid_nest_json_array2.json
new file mode 100644
index 0000000000..4685f528a1
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/invalid_nest_json_array2.json
@@ -0,0 +1,26 @@
+[
+    {
+        "no": 1,
+        "tem": {{
+            "id": 1,
+            "city": [
+                "zhejiang",
+                "hangzhou",
+                "xihu"
+            ],
+            "code": 2345671
+        }
+    },
+    {
+        "no": 2,
+        "item":
+            "id": 2,
+            "city": [
+                "zhejiang",
+                "hangzhou",
+                "xiaoshan"
+            ],
+            "code": 2345672
+        }
+    }
+]
\ No newline at end of file
diff --git 
a/regression-test/data/load_p0/stream_load/invalid_nest_json_array3.json 
b/regression-test/data/load_p0/stream_load/invalid_nest_json_array3.json
new file mode 100644
index 0000000000..a493bfd7d0
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/invalid_nest_json_array3.json
@@ -0,0 +1,25 @@
+[
+    {
+        "no": 1,
+        "item": {
+            "id": 1,
+            "city": [
+                "zhejiang",
+                "hangzhou",
+                "xihu
+            ],
+            "code": 2345671
+    },
+    {
+        "no": 2,
+        "item":
+            "id": 2,
+            "city": [
+                "zhejiang",
+                "hangzhou",
+                "xiaoshan"
+            ],
+            "code": 2345672
+        }
+    }
+]
\ No newline at end of file
diff --git a/regression-test/data/load_p0/stream_load/nest_json_array.json 
b/regression-test/data/load_p0/stream_load/nest_json_array.json
index b8b3cf917d..af2605f98f 100644
--- a/regression-test/data/load_p0/stream_load/nest_json_array.json
+++ b/regression-test/data/load_p0/stream_load/nest_json_array.json
@@ -50,7 +50,7 @@
     {
         "no": 5,
         "item": {
-            "id": 5,
+            "id" : 5,
             "city": [
                 "zhejiang",
                 "hangzhou",
@@ -70,5 +70,46 @@
             ],
             "code": 2345676
         }
+    },
+    {   "no": 7,
+        "item": {
+            "idx": 2,
+            "cityx": [
+                "zhejiang",
+                "hangzhou",
+                "xiaoshan"
+            ],
+            "codex": 2345672
+        }
+    },
+    {   "no": 7,
+        "item": {
+        }
+    },
+    {   "no": 7,
+        "item": []
+    },
+    {   "no": 7,
+        "itemxxx": {}
+    },
+    {   "no": 7,
+        "item": "123"
+    },
+    {   
+        "no": 7,
+        "item": {
+        }
+    },
+    {
+        "no": 8,
+        "item": {
+            "id": 7,
+            "city": [
+                "zhejiang",
+                "hangzhou",
+                "fuyang"
+            ],
+            "code": 2345676
+        }
     }
 ]
\ No newline at end of file
diff --git a/regression-test/data/load_p0/stream_load/simple_json2.json 
b/regression-test/data/load_p0/stream_load/simple_json2.json
index eb698453de..ca3197bbf9 100644
--- a/regression-test/data/load_p0/stream_load/simple_json2.json
+++ b/regression-test/data/load_p0/stream_load/simple_json2.json
@@ -9,6 +9,8 @@
         "id": 2,
         "city": "shanghai"
     },
+    {},
+    {"xxx":1},
     {
         "code": 2345673,
         "id": 3,
diff --git a/regression-test/data/load_p0/stream_load/test_json_load.out 
b/regression-test/data/load_p0/stream_load/test_json_load.out
index d0de96b7d7..b297fa2236 100644
--- a/regression-test/data/load_p0/stream_load/test_json_load.out
+++ b/regression-test/data/load_p0/stream_load/test_json_load.out
@@ -102,7 +102,11 @@
 200    changsha        3456789
 
 -- !select10 --
+\N     \N      \N
 200    changsha        3456789
+19210  \N      \N
+12811110       \N      \N
+171171710      \N      \N
 
 -- !select11 --
 1      beijing 2345671
@@ -141,10 +145,17 @@
 
 -- !select14 --
 10     2345671 \N
+10     beijing 2345671
 20     2345672 \N
+20     shanghai        2345672
 30     2345673 \N
 40     2345674 \N
+40     shenzhen        2345674
 50     2345675 \N
+50     {"guangzhou":1,"code":2345675}  \N
+50     ["guangzhou"]   2345675
+50     guangzhou       2345675
+50     {"guangzhou":1} 2345675
 200    changsha        3456789
 
 -- !select15 --
@@ -162,5 +173,11 @@
 4      shangcheng      2345674
 5      tonglu  2345675
 6      fuyang  2345676
+7      fuyang  2345676
+200    changsha        3456789
+
+-- !select17 --
+
+-- !select18 --
 200    changsha        3456789
 
diff --git a/regression-test/suites/load_p0/stream_load/test_json_load.groovy 
b/regression-test/suites/load_p0/stream_load/test_json_load.groovy
index d2c5d15fe0..f874eb2b87 100644
--- a/regression-test/suites/load_p0/stream_load/test_json_load.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_json_load.groovy
@@ -115,7 +115,8 @@ suite("test_json_load", "p0") {
     }
     
     def load_json_data = {label, strip_flag, read_flag, format_flag, exprs, 
json_paths, 
-                        json_root, where_expr, fuzzy_flag, file_name, 
ignore_failure=false ->
+                        json_root, where_expr, fuzzy_flag, file_name, 
ignore_failure=false,
+                        expected_succ_rows = -1 ->
         
         // load the json data
         streamLoad {
@@ -133,19 +134,26 @@ suite("test_json_load", "p0") {
             set 'fuzzy_parse', fuzzy_flag
             file file_name // import json file
             time 10000 // limit inflight 10s
+            if (expected_succ_rows >= 0) {
+                set 'max_filter_ratio', '1'
+            }
 
             // if declared a check callback, the default check condition will 
ignore.
             // So you must check all condition
             check { result, exception, startTime, endTime ->
-               if (ignore_failure) { return }
+               if (ignore_failure && expected_succ_rows < 0) { return }
                 if (exception != null) {
                     throw exception
                 }
                 log.info("Stream load result: ${result}".toString())
                 def json = parseJson(result)
                 assertEquals("success", json.Status.toLowerCase())
-                assertEquals(json.NumberTotalRows, json.NumberLoadedRows + 
json.NumberUnselectedRows)
-                assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+                if (expected_succ_rows >= 0) {
+                    assertEquals(json.NumberLoadedRows, expected_succ_rows)
+                } else {
+                    assertEquals(json.NumberTotalRows, json.NumberLoadedRows + 
json.NumberUnselectedRows)
+                    assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
+                }
             }
         }
     }
@@ -350,13 +358,13 @@ suite("test_json_load", "p0") {
         create_test_table1.call(testTable)
         
         load_json_data.call('test_json_load_case10_2', '', 'true', 'json', 
'id= id * 10', '',
-                            '$.item', '', 'true', 'invalid_json.json', true)
+                            '$.item', '', 'false', 'invalid_json.json', false, 
4)
 
         sql "sync"
         qt_select10 "select * from ${testTable} order by id"
 
     } finally {
-        try_sql("DROP TABLE IF EXISTS ${testTable}")
+        // try_sql("DROP TABLE IF EXISTS ${testTable}")
     }
 
     // case11: test json file which is unordered and no use json_path
@@ -365,7 +373,7 @@ suite("test_json_load", "p0") {
         
         create_test_table1.call(testTable)
 
-        load_json_data.call('test_json_load_case11_2', 'true', '', 'json', '', 
'', '', '', '', 'simple_json2.json')
+        load_json_data.call('test_json_load_case11_2', 'true', '', 'json', '', 
'', '', '', '', 'simple_json2.json', false, 10)
 
         sql "sync"
         qt_select11 "select * from ${testTable} order by id"
@@ -435,11 +443,19 @@ suite("test_json_load", "p0") {
         load_json_data.call('test_json_load_case14_2', '', 'true', 'json', 
'id= id * 10', '[\"$.id\", \"$.code\"]',
                             '$.item', '', 'true', 'nest_json.json')
 
+        // invalid nest_json
+        load_json_data.call('test_json_load_case14_3', '', 'true', 'json', 
'id= id * 10', '[\"$.id\",  \"$.city\", \"$.code\"]',
+                            '$.item', '', 'true', 'invalid_nest_json1.json', 
true) 
+        load_json_data.call('test_json_load_case14_4', '', 'true', 'json', 
'id= id * 10', '[\"$.id\",  \"$.city\", \"$.code\"]',
+                            '$.item', '', 'true', 'invalid_nest_json2.json', 
false, 7) 
+        load_json_data.call('test_json_load_case14_5', '', 'true', 'json', 
'id= id * 10', '[\"$.id\",  \"$.city\", \"$.code\"]',
+                            '$.item', '', 'true', 'invalid_nest_json3.json', 
true) 
+
         sql "sync"
-        qt_select14 "select * from ${testTable} order by id"
+        qt_select14 "select * from ${testTable} order by id, code, city"
 
     } finally {
-        try_sql("DROP TABLE IF EXISTS ${testTable}")
+        // try_sql("DROP TABLE IF EXISTS ${testTable}")
     }
 
     // case15: apply jsonpaths & exprs & json_root
@@ -465,7 +481,7 @@ suite("test_json_load", "p0") {
         create_test_table1.call(testTable)
         
         load_json_data.call('test_json_load_case16_2', 'true', '', 'json', 
'id, code, city',
-                            '[\"$.id\", \"$.code\", \"$.city[2]\"]', '$.item', 
'', 'true', 'nest_json_array.json')
+                            '[\"$.id\", \"$.code\", \"$.city[2]\"]', '$.item', 
'', 'true', 'nest_json_array.json', false, 7)
 
         sql "sync"
         qt_select16 "select * from ${testTable} order by id"
@@ -473,6 +489,47 @@ suite("test_json_load", "p0") {
     } finally {
         try_sql("DROP TABLE IF EXISTS ${testTable}")
     }
+
+    // case17: invalid json
+    try {
+        sql "DROP TABLE IF EXISTS ${testTable}"
+
+        test_invalid_json_array_table.call(testTable)
+        load_json_data.call('test_json_load_case17', 'true', '', 'json', '', 
'',
+                '', '', '', 'invalid_json_array.json', false, 0)
+        load_json_data.call('test_json_load_case17_1', 'true', '', 'json', '', 
'',
+                '$.item', '', '', 'invalid_json_array1.json', false, 0)
+        load_json_data.call('test_json_load_case17_2', 'true', '', 'json', '', 
'',
+                '$.item', '', '', 'invalid_json_array2.json', false, 0)
+        load_json_data.call('test_json_load_case17_3', 'true', '', 'json', '', 
'',
+                '$.item', '', '', 'invalid_json_array3.json', false, 0)
+        sql "sync"
+        qt_select17 "select * from ${testTable}"
+
+    } finally {
+        try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
+
+    // case18: invalid nest json
+    try {
+        sql "DROP TABLE IF EXISTS ${testTable}"
+
+        create_test_table1.call(testTable)
+        load_json_data.call('test_json_load_case16_2', 'true', '', 'json', 
'id, code, city',
+                            '[\"$.id\", \"$.code\", \"$.city[2]\"]', '$.item', 
'', 'true', 'invalid_nest_json_array.json', true) 
+        load_json_data.call('test_json_load_case16_2', 'true', '', 'json', 
'id, code, city',
+                            '[\"$.id\", \"$.code\", \"$.city[100]\"]', 
'$.item', '', 'true', 'invalid_nest_json_array1.json', true) 
+        load_json_data.call('test_json_load_case16_2', 'true', '', 'json', 
'id, code, city',
+                            '[\"$.id\", \"$.code\", \"$.city\"]', '$.item', 
'', 'true', 'invalid_nest_json_array2.json', true) 
+        load_json_data.call('test_json_load_case16_2', 'true', '', 'json', 
'id, code, city',
+                            '[\"$.id\", \"$.code\", \"$.city[2]\"]', '$.item', 
'', 'true', 'invalid_nest_json_array3.json', true) 
+
+        sql "sync"
+        qt_select18 "select * from ${testTable} order by id"
+
+    } finally {
+        try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
     
     // if 'enableHdfs' in regression-conf.groovy has been set to true,
     // the test will run these case as below.
@@ -483,7 +540,7 @@ suite("test_json_load", "p0") {
         def hdfs_file_path = uploadToHdfs "stream_load/simple_object_json.json"
         def format = "json" 
 
-        // case17: import json use pre-filter exprs
+        // case18: import json use pre-filter exprs
         try {
             sql "DROP TABLE IF EXISTS ${testTable}"
             
@@ -498,7 +555,7 @@ suite("test_json_load", "p0") {
             try_sql("DROP TABLE IF EXISTS ${testTable}")
         }
 
-        // case18: import json use pre-filter and where exprs
+        // case19: import json use pre-filter and where exprs
         try {
             sql "DROP TABLE IF EXISTS ${testTable}"
             
@@ -512,20 +569,5 @@ suite("test_json_load", "p0") {
         } finally {
             try_sql("DROP TABLE IF EXISTS ${testTable}")
         }
-
-        // case19: invalid json
-        try {
-            sql "DROP TABLE IF EXISTS ${testTable}"
-
-            test_invalid_json_array_table.call(testTable)
-            load_json_data.call('test_json_load_case19', 'true', '', 'json', 
'', '',
-                    '', '', '', 'invalid_json_array.json', true)
-
-            sql "sync"
-            qt_select "select * from ${testTable}"
-
-        } finally {
-            try_sql("DROP TABLE IF EXISTS ${testTable}")
-        }
     }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to