This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 498d27c9058 [improve](json_reader) add prompt when all fields is null 
(#27630)
498d27c9058 is described below

commit 498d27c90580cc4486bc9f458c1e0516b9d71ad4
Author: HHoflittlefish777 <[email protected]>
AuthorDate: Wed Nov 29 18:26:42 2023 +0800

    [improve](json_reader) add prompt when all fields is null (#27630)
---
 be/src/vec/exec/format/json/new_json_reader.cpp    | 35 ++++++--
 .../data/load_p0/stream_load/test_json_error.json  |  1 +
 .../load_p0/stream_load/test_json_load.groovy      | 93 ++++++++++++++++++++++
 3 files changed, 122 insertions(+), 7 deletions(-)

diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp 
b/be/src/vec/exec/format/json/new_json_reader.cpp
index aee5e09be3a..f735b1a74ba 100644
--- a/be/src/vec/exec/format/json/new_json_reader.cpp
+++ b/be/src/vec/exec/format/json/new_json_reader.cpp
@@ -764,12 +764,19 @@ Status NewJsonReader::_set_column_value(rapidjson::Value& 
objectValue, Block& bl
     if (!has_valid_value) {
         // there is no valid value in json line but has filled with default 
value before
         // so remove this line in block
+        string col_names;
         for (int i = 0; i < block.columns(); ++i) {
             auto column = block.get_by_position(i).column->assume_mutable();
             column->pop_back(1);
         }
-        RETURN_IF_ERROR(_append_error_msg(objectValue, "All fields is null, 
this is a invalid row.",
-                                          "", valid));
+        for (auto* slot_desc : slot_descs) {
+            col_names.append(slot_desc->col_name() + ", ");
+        }
+        RETURN_IF_ERROR(_append_error_msg(objectValue,
+                                          "There is no column matching 
jsonpaths in the json file, "
+                                          "columns:[{}], jsonpaths:{}, please 
check columns "
+                                          "and jsonpaths",
+                                          col_names, valid));
         return Status::OK();
     }
     *valid = true;
@@ -1314,8 +1321,15 @@ Status 
NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val
         has_valid_value = true;
     }
     if (!has_valid_value) {
-        RETURN_IF_ERROR(
-                _append_error_msg(value, "All fields is null, this is a 
invalid row.", "", valid));
+        string col_names;
+        for (auto* slot_desc : slot_descs) {
+            col_names.append(slot_desc->col_name() + ", ");
+        }
+        RETURN_IF_ERROR(_append_error_msg(value,
+                                          "There is no column matching 
jsonpaths in the json file, "
+                                          "columns:[{}], jsonpaths:{}, please 
check columns "
+                                          "and jsonpaths",
+                                          col_names, valid));
         return Status::OK();
     }
 
@@ -1412,7 +1426,7 @@ Status 
NewJsonReader::_append_error_msg(simdjson::ondemand::object* obj, std::st
     std::string err_msg;
     if (!col_name.empty()) {
         fmt::memory_buffer error_buf;
-        fmt::format_to(error_buf, error_msg, col_name);
+        fmt::format_to(error_buf, error_msg, col_name, _jsonpaths);
         err_msg = fmt::to_string(error_buf);
     } else {
         err_msg = error_msg;
@@ -1610,12 +1624,19 @@ Status 
NewJsonReader::_simdjson_write_columns_by_jsonpath(
     if (!has_valid_value) {
         // there is no valid value in json line but has filled with default 
value before
         // so remove this line in block
+        string col_names;
         for (int i = 0; i < block.columns(); ++i) {
             auto column = block.get_by_position(i).column->assume_mutable();
             column->pop_back(1);
         }
-        RETURN_IF_ERROR(
-                _append_error_msg(value, "All fields is null, this is a 
invalid row.", "", valid));
+        for (auto* slot_desc : slot_descs) {
+            col_names.append(slot_desc->col_name() + ", ");
+        }
+        RETURN_IF_ERROR(_append_error_msg(value,
+                                          "There is no column matching 
jsonpaths in the json file, "
+                                          "columns:[{}], jsonpaths:{}, please 
check columns "
+                                          "and jsonpaths",
+                                          col_names, valid));
         return Status::OK();
     }
     *valid = true;
diff --git a/regression-test/data/load_p0/stream_load/test_json_error.json 
b/regression-test/data/load_p0/stream_load/test_json_error.json
new file mode 100644
index 00000000000..47bfb9237fa
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/test_json_error.json
@@ -0,0 +1 @@
+{"name":"Name1","age":21,"agent_id":"1"}
\ No newline at end of file
diff --git a/regression-test/suites/load_p0/stream_load/test_json_load.groovy 
b/regression-test/suites/load_p0/stream_load/test_json_load.groovy
index ea0ab0fd04a..f5dc4d97c1b 100644
--- a/regression-test/suites/load_p0/stream_load/test_json_load.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_json_load.groovy
@@ -128,6 +128,27 @@ suite("test_json_load", "p0") {
         assertTrue(result1[0].size() == 1)
         assertTrue(result1[0][0] == 0, "Create table should update 0 rows")
     }
+
+    def create_json_test_table = { testTablex ->
+                    sql """
+                        CREATE TABLE `${testTablex}` (
+                            `name` varchar(48) NULL,
+                            `age` bigint(20) NULL,
+                            `agent_id` varchar(256) NULL
+                            ) ENGINE=OLAP
+                            DUPLICATE KEY(`name`)
+                            COMMENT 'OLAP'
+                            DISTRIBUTED BY RANDOM BUCKETS 10
+                            PROPERTIES (
+                            "replication_allocation" = "tag.location.default: 
1",
+                            "is_being_synced" = "false",
+                            "storage_format" = "V2",
+                            "light_schema_change" = "true",
+                            "disable_auto_compaction" = "false",
+                            "enable_single_replica_compaction" = "false"
+                            ); 
+                        """
+    }
     
     def load_json_data = {table_name, label, strip_flag, read_flag, 
format_flag, exprs, json_paths, 
                         json_root, where_expr, fuzzy_flag, file_name, 
ignore_failure=false,
@@ -705,4 +726,76 @@ suite("test_json_load", "p0") {
         set_be_param.call("enable_simdjson_reader", "true")
         try_sql("DROP TABLE IF EXISTS ${testTable}")
     }
+
+    // test jsonpaths error
+    try {
+        sql "DROP TABLE IF EXISTS ${testTable}"
+
+        create_json_test_table.call(testTable)
+        streamLoad {
+            table "${testTable}"
+            set 'jsonpaths', '[\"Name\", \"Age\", \"Agent_id\"]'
+            set 'format', 'json'
+            file 'test_json_error.json' // import json file
+            time 10000 // limit inflight 10s
+
+            check { result, exception, startTime, endTime ->
+                if (exception != null) {
+                    throw exception
+                }
+                log.info("Stream load result: ${result}".toString())
+                def json = parseJson(result)
+                def url = json.ErrorURL.toString();
+                assertEquals("fail", json.Status.toLowerCase())
+
+                def command = "curl ${url}"
+                log.info("command: ${command}".toString())
+                def process = command.execute()
+                def code = process.waitFor()
+                def out = process.text
+                log.info("result: ${out}".toString())
+                def reason = "Reason: There is no column matching jsonpaths in 
the json file, columns:[name, age, agent_id, ], jsonpaths:[\"Name\", \"Age\", 
\"Agent_id\"], please check columns and jsonpaths. src line 
[{\"name\":\"Name1\",\"age\":21,\"agent_id\":\"1\"}]; \n"
+                assertEquals("${reason}", "${out}")
+            }
+        }
+
+    } finally {
+        try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
+
+    // test colunms error
+    try {
+        sql "DROP TABLE IF EXISTS ${testTable}"
+
+        create_json_test_table.call(testTable)
+        streamLoad {
+            table "${testTable}"
+            set 'columns', 'Name, Age, Agent_id'
+            set 'format', 'json'
+            file 'test_json_error.json' // import json file
+            time 10000 // limit inflight 10s
+
+            check { result, exception, startTime, endTime ->
+                if (exception != null) {
+                    throw exception
+                }
+                log.info("Stream load result: ${result}".toString())
+                def json = parseJson(result)
+                def url = json.ErrorURL.toString();
+                assertEquals("fail", json.Status.toLowerCase())
+
+                def command = "curl ${url}"
+                log.info("command: ${command}".toString())
+                def process = command.execute()
+                def code = process.waitFor()
+                def out = process.text
+                log.info("result: ${out}".toString())
+                def reason = "Reason: There is no column matching jsonpaths in 
the json file, columns:[Name, Age, Agent_id, ], jsonpaths:, please check 
columns and jsonpaths. src line 
[{\"name\":\"Name1\",\"age\":21,\"agent_id\":\"1\"}]; \n"
+                assertEquals("${reason}", "${out}")
+            }
+        }
+
+    } finally {
+        try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to