This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-c108335-hive-sql in repository https://gitbox.apache.org/repos/asf/doris.git
commit 7f3ef5f7d87cc33891eafec051466db492489ffb Author: morningman <morning...@163.com> AuthorDate: Wed Apr 16 08:13:47 2025 +0800 [tmp] support json path '0' --- be/src/util/string_util.h | 21 +++++++++++++++++++++ be/src/vec/functions/function_json.cpp | 16 ++++++---------- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/be/src/util/string_util.h b/be/src/util/string_util.h index a5837a538cc..361ebc4a756 100644 --- a/be/src/util/string_util.h +++ b/be/src/util/string_util.h @@ -146,9 +146,30 @@ auto get_json_token(T& path_string) { } } +template <typename T> +std::vector<std::string> get_json_token_vector(T& path_string) { + auto tok = get_json_token(path_string); + std::vector<std::string> paths(tok.begin(), tok.end()); + if (!paths.empty() && paths[0].front() == '$' && paths[0].length() > 1) { + // we need to split path like "$[0]" to "$" and "[0]" + // because "$[0]" is a valid json path, but previously we can only handle "$.[0]". + // So here we transform to make it work for both format. + std::string first_element = std::move(paths[0]); + std::string prefix = "$"; + std::string remainder = first_element.substr(1); + + paths.erase(paths.begin()); + paths.insert(paths.begin(), remainder); + paths.insert(paths.begin(), prefix); + } + return paths; +} + #ifdef USE_LIBCPP template <> auto get_json_token(std::string_view& path_string) = delete; +template <> +auto get_json_token_vector(std::string_view& path_string) = delete; #endif } // namespace doris diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp index 91bf15c3d41..1f1aeab11ec 100644 --- a/be/src/vec/functions/function_json.cpp +++ b/be/src/vec/functions/function_json.cpp @@ -240,12 +240,11 @@ rapidjson::Value* get_json_object(std::string_view json_string, std::string_view #ifdef USE_LIBCPP std::string s(path_string); - auto tok = get_json_token(s); + auto paths = get_json_token_vector(s); #else - auto tok = get_json_token(path_string); + auto paths = get_json_token_vector(path_string); #endif - std::vector<std::string> paths(tok.begin(), tok.end()); get_parsed_paths(paths, &tmp_parsed_paths); if (tmp_parsed_paths.empty()) { return document; @@ -880,12 +879,10 @@ struct FunctionJsonExtractImpl { #ifdef USE_LIBCPP std::string s(path_string); - auto tok = get_json_token(s); + auto paths = get_json_token_vector(s); #else - auto tok = get_json_token(path_string); + auto paths = get_json_token_vector(path_string); #endif - // TODO: here maybe could use std::vector<std::string_view> or std::span - std::vector<std::string> paths(tok.begin(), tok.end()); get_parsed_paths(paths, &parsed_paths); if (parsed_paths.empty()) { return nullptr; @@ -1392,11 +1389,10 @@ private: #ifdef USE_LIBCPP std::string s(path_string); - auto tok = get_json_token(s); + auto paths = get_json_token_vector(s); #else - auto tok = get_json_token(path_string); + auto paths = get_json_token_vector(path_string); #endif - std::vector<std::string> paths(tok.begin(), tok.end()); RETURN_IF_ERROR(get_parsed_paths_with_status(paths, &parsed_paths)); json_paths[col / 2].emplace_back(parsed_paths); } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org