comphead commented on code in PR #3747:
URL: https://github.com/apache/datafusion-comet/pull/3747#discussion_r3010496437


##########
native/spark-expr/src/string_funcs/get_json_object.rs:
##########
@@ -0,0 +1,536 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{Array, ArrayRef, StringArray, StringBuilder};
+use datafusion::common::{
+    cast::as_generic_string_array, exec_err, Result as DataFusionResult, 
ScalarValue,
+};
+use datafusion::logical_expr::ColumnarValue;
+use serde_json::Value;
+use std::sync::Arc;
+
+/// Spark-compatible `get_json_object` function.
+///
+/// Extracts a JSON value from a JSON string using a JSONPath expression.
+/// Returns the result as a string, or null if the path doesn't match or input 
is invalid.
+///
+/// Supported JSONPath syntax:
+/// - `$` — root element
+/// - `.name` or `['name']` — named child
+/// - `[n]` — array index (0-based)
+/// - `[*]` — array wildcard (iterates over array elements)
+pub fn spark_get_json_object(args: &[ColumnarValue]) -> 
DataFusionResult<ColumnarValue> {
+    if args.len() != 2 {
+        return exec_err!(
+            "get_json_object expects 2 arguments (json, path), got {}",
+            args.len()
+        );
+    }
+
+    match (&args[0], &args[1]) {
+        // Column json, scalar path (most common case)
+        (ColumnarValue::Array(json_array), ColumnarValue::Scalar(path_scalar)) 
=> {
+            let path_str = match path_scalar {
+                ScalarValue::Utf8(Some(p)) | ScalarValue::LargeUtf8(Some(p)) 
=> p.as_str(),
+                ScalarValue::Utf8(None) | ScalarValue::LargeUtf8(None) => {
+                    let null_array: ArrayRef = 
Arc::new(StringArray::new_null(json_array.len()));
+                    return Ok(ColumnarValue::Array(null_array));
+                }
+                _ => return exec_err!("get_json_object path must be a string"),
+            };
+
+            let parsed_path = match parse_json_path(path_str) {
+                Some(p) => p,
+                None => {
+                    let null_array: ArrayRef = 
Arc::new(StringArray::new_null(json_array.len()));
+                    return Ok(ColumnarValue::Array(null_array));
+                }
+            };
+
+            let json_strings = as_generic_string_array::<i32>(json_array)?;
+            let mut builder = StringBuilder::new();
+
+            for i in 0..json_strings.len() {
+                if json_strings.is_null(i) {
+                    builder.append_null();
+                } else {
+                    let json_str = json_strings.value(i);
+                    match evaluate_path(json_str, &parsed_path) {
+                        Some(result) => builder.append_value(&result),
+                        None => builder.append_null(),
+                    }
+                }
+            }
+
+            Ok(ColumnarValue::Array(Arc::new(builder.finish())))
+        }
+        // Scalar json, scalar path
+        (ColumnarValue::Scalar(json_scalar), 
ColumnarValue::Scalar(path_scalar)) => {
+            let json_str = match json_scalar {

Review Comment:
   maybe it is good to have an utility method to deduplicate code around 
`json_str` and `path_str`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to