gemini-code-assist[bot] commented on code in PR #38130:
URL: https://github.com/apache/beam/pull/38130#discussion_r3093329268


##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -210,78 +225,109 @@ def py_value_to_js_dict(py_value):
 def _expand_javascript_mapping_func(
     original_fields, expression=None, callable=None, path=None, name=None):
 
-  # Check for installed js2py package
-  if js2py is None:
+  # Check for installed quickjs package
+  if quickjs is None:
     raise ValueError(
-        "Javascript mapping functions are not supported on"
-        " Python 3.12 or later.")
-
-  # import remaining js2py objects
-  from js2py import base
-  from js2py.constructors import jsdate
-  from js2py.internals import simplex
-
-  js_array_type = (
-      base.PyJsArray,
-      base.PyJsArrayBuffer,
-      base.PyJsInt8Array,
-      base.PyJsUint8Array,
-      base.PyJsUint8ClampedArray,
-      base.PyJsInt16Array,
-      base.PyJsUint16Array,
-      base.PyJsInt32Array,
-      base.PyJsUint32Array,
-      base.PyJsFloat32Array,
-      base.PyJsFloat64Array)
-
-  def _js_object_to_py_object(obj):
-    if isinstance(obj, (base.PyJsNumber, base.PyJsString, base.PyJsBoolean)):
-      return base.to_python(obj)
-    elif isinstance(obj, js_array_type):
-      return [_js_object_to_py_object(value) for value in obj.to_list()]
-    elif isinstance(obj, jsdate.PyJsDate):
-      return obj.to_utc_dt()
-    elif isinstance(obj, (base.PyJsNull, base.PyJsUndefined)):
-      return None
-    elif isinstance(obj, base.PyJsError):
-      raise RuntimeError(obj['message'])
-    elif isinstance(obj, base.PyJsObject):
-      return {
-          key: _js_object_to_py_object(value['value'])
-          for (key, value) in obj.own.items()
-      }
-    elif isinstance(obj, base.JsObjectWrapper):
-      return _js_object_to_py_object(obj._obj)
-
-    return obj
+        "Javascript mapping functions require the 'quickjs' package.")
+
+  def make_bridge_source(func_name, call_expr):
+    # The bridge function facilitates high-performance data transfer from 
Python
+    # to QuickJS by reconstructing the row object in JS.
+    # To minimize JSON overhead, primitives are passed directly, while complex
+    # types (lists/dicts) are passed as JSON strings and parsed in JS.
+    # The 'flags' argument indicates which values need parsing.
+    keys_json = json.dumps(list(original_fields))
+    return (
+        f"function {func_name}(serialized_flags, ...values) {{ "
+        f"  const keys = {keys_json}; "
+        f"  const flags = serialized_flags.split(','); "
+        f"  const row = {{}}; "
+        f"  for (let i = 0; i < keys.length; i++) {{ "
+        f"    let val = values[i]; "
+        f"    if (flags[i] === '1') val = JSON.parse(val); "
+        f"    row[keys[i]] = val; "
+        f"  }} "
+        f"  const result = {call_expr}; "
+        f"  if (result instanceof Date) "
+        f"return {{__type__: 'date', value: result.toISOString()}}; "
+        f"  return result; "
+        f"}}")
 
   if expression:
-    source = '\n'.join(['function(__row__) {'] + [
-        f'  {name} = __row__.{name}'
-        for name in original_fields if name in expression
-    ] + ['  return (' + expression + ')'] + ['}'])
-    js_func = _CustomJsObjectWrapper(js2py.eval_js(source))
+    args = [
+        name for name in original_fields
+        if name.isidentifier() and name in expression
+    ]
+    parses = []
+    for i, arg in enumerate(args):
+      parses.append(f"  if (flags[{i}] === '1') {arg} = JSON.parse({arg});")
+
+    source = f"""
+function fn(serialized_flags, {", ".join(args)}) {{
+  const flags = serialized_flags.split(',');
+{chr(10).join(parses)}
+  const result = ({expression});
+  if (result instanceof Date) return {{__type__: 'date', value: 
result.toISOString()}};
+  return result;
+}}
+"""
+    js_func = _QuickJsCallable(source, "fn")
+    used_fields = args
 
   elif callable:
-    js_func = _CustomJsObjectWrapper(js2py.eval_js(callable))
+    source = make_bridge_source("fn", f"({callable})(row)")
+    js_func = _QuickJsCallable(source, "fn")
+    used_fields = None
 
   else:
     if not path.endswith('.js'):
       raise ValueError(f'File "{path}" is not a valid .js file.')
     udf_code = FileSystems.open(path).read().decode()
-    js = js2py.EvalJs()
-    js.eval(udf_code)
-    js_func = _CustomJsObjectWrapper(getattr(js, name))
+    bridge_source = udf_code + "\n" + make_bridge_source(
+        "bridge_fn", f"{name}(row)")
+    js_func = _QuickJsCallable(bridge_source, "bridge_fn")
+    used_fields = None
+
+  def _prepare_args(vals):
+    js_vals = []
+    flags = []
+    for val in vals:
+      if isinstance(val, (list, dict)):
+        js_vals.append(json.dumps(val))
+        flags.append('1')
+      else:
+        js_vals.append(val)
+        flags.append('0')
+    return ",".join(flags), js_vals
 
   def js_wrapper(row):
-    row_as_dict = py_value_to_js_dict(row)
+    # Prepare arguments for the JS function. We optimize performance by
+    # passing primitives directly and only serializing complex types (lists,
+    # dicts) to JSON strings. A string of flags ('0' or '1') is passed to
+    # inform the JS bridge which arguments need to be JSON.parsed.
+    if expression:
+      vals = [py_value_to_js_dict(getattr(row, name)) for name in used_fields]
+    else:
+      row_as_dict = py_value_to_js_dict(row)
+      vals = [row_as_dict.get(name) for name in original_fields]
+
+    flags_str, js_vals = _prepare_args(vals)
+
     try:
-      js_result = js_func(row_as_dict)
-    except simplex.JsException as exn:
+      js_result = js_func(flags_str, *js_vals)
+    except Exception as exn:
       raise RuntimeError(
-          f"Error evaluating javascript expression: "
-          f"{exn.mes['message']}") from exn
-    return dicts_to_rows(_js_object_to_py_object(js_result))
+          f"Error evaluating javascript expression: {exn}") from exn
+
+    if isinstance(js_result, quickjs.Object):
+      obj = json.loads(js_result.json())
+      if isinstance(obj, dict) and obj.get('__type__') == 'date':
+        import datetime
+        js_result = datetime.datetime.fromisoformat(obj['value'])

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   Remove the inline import of `datetime`. This function is called for every 
row, and while Python caches imports, the repeated lookup is unnecessary. Use 
the top-level import instead.
   
   ```suggestion
           js_result = datetime.datetime.fromisoformat(obj['value'])
   ```



##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -210,78 +225,109 @@ def py_value_to_js_dict(py_value):
 def _expand_javascript_mapping_func(
     original_fields, expression=None, callable=None, path=None, name=None):
 
-  # Check for installed js2py package
-  if js2py is None:
+  # Check for installed quickjs package
+  if quickjs is None:
     raise ValueError(
-        "Javascript mapping functions are not supported on"
-        " Python 3.12 or later.")
-
-  # import remaining js2py objects
-  from js2py import base
-  from js2py.constructors import jsdate
-  from js2py.internals import simplex
-
-  js_array_type = (
-      base.PyJsArray,
-      base.PyJsArrayBuffer,
-      base.PyJsInt8Array,
-      base.PyJsUint8Array,
-      base.PyJsUint8ClampedArray,
-      base.PyJsInt16Array,
-      base.PyJsUint16Array,
-      base.PyJsInt32Array,
-      base.PyJsUint32Array,
-      base.PyJsFloat32Array,
-      base.PyJsFloat64Array)
-
-  def _js_object_to_py_object(obj):
-    if isinstance(obj, (base.PyJsNumber, base.PyJsString, base.PyJsBoolean)):
-      return base.to_python(obj)
-    elif isinstance(obj, js_array_type):
-      return [_js_object_to_py_object(value) for value in obj.to_list()]
-    elif isinstance(obj, jsdate.PyJsDate):
-      return obj.to_utc_dt()
-    elif isinstance(obj, (base.PyJsNull, base.PyJsUndefined)):
-      return None
-    elif isinstance(obj, base.PyJsError):
-      raise RuntimeError(obj['message'])
-    elif isinstance(obj, base.PyJsObject):
-      return {
-          key: _js_object_to_py_object(value['value'])
-          for (key, value) in obj.own.items()
-      }
-    elif isinstance(obj, base.JsObjectWrapper):
-      return _js_object_to_py_object(obj._obj)
-
-    return obj
+        "Javascript mapping functions require the 'quickjs' package.")
+
+  def make_bridge_source(func_name, call_expr):
+    # The bridge function facilitates high-performance data transfer from 
Python
+    # to QuickJS by reconstructing the row object in JS.
+    # To minimize JSON overhead, primitives are passed directly, while complex
+    # types (lists/dicts) are passed as JSON strings and parsed in JS.
+    # The 'flags' argument indicates which values need parsing.
+    keys_json = json.dumps(list(original_fields))
+    return (
+        f"function {func_name}(serialized_flags, ...values) {{ "
+        f"  const keys = {keys_json}; "
+        f"  const flags = serialized_flags.split(','); "
+        f"  const row = {{}}; "
+        f"  for (let i = 0; i < keys.length; i++) {{ "
+        f"    let val = values[i]; "
+        f"    if (flags[i] === '1') val = JSON.parse(val); "
+        f"    row[keys[i]] = val; "
+        f"  }} "
+        f"  const result = {call_expr}; "
+        f"  if (result instanceof Date) "
+        f"return {{__type__: 'date', value: result.toISOString()}}; "
+        f"  return result; "
+        f"}}")
 
   if expression:
-    source = '\n'.join(['function(__row__) {'] + [
-        f'  {name} = __row__.{name}'
-        for name in original_fields if name in expression
-    ] + ['  return (' + expression + ')'] + ['}'])
-    js_func = _CustomJsObjectWrapper(js2py.eval_js(source))
+    args = [
+        name for name in original_fields
+        if name.isidentifier() and name in expression
+    ]
+    parses = []
+    for i, arg in enumerate(args):
+      parses.append(f"  if (flags[{i}] === '1') {arg} = JSON.parse({arg});")
+
+    source = f"""
+function fn(serialized_flags, {", ".join(args)}) {{

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   When `args` is empty, this generates a trailing comma in the function 
parameters (e.g., `function fn(serialized_flags, )`). While QuickJS supports 
this ES2017 feature, it's cleaner and more compatible to avoid it.
   
   ```suggestion
   function fn(serialized_flags{', ' + ', '.join(args) if args else ''}) {{
   ```



##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -178,18 +177,34 @@ def _check_mapping_arguments(
     raise ValueError(f'{transform_name} cannot specify "name" without "path"')
 
 
-# js2py's JsObjectWrapper object has a self-referencing __dict__ property
-# that cannot be pickled without implementing the __getstate__ and
-# __setstate__ methods.
-class _CustomJsObjectWrapper(JsObjectWrapper):
-  def __init__(self, js_obj):
-    super().__init__(js_obj.__dict__['_obj'])
+class _QuickJsCallable:
+  def __init__(self, source, name=None):
+    self.source = source
+    self.name = name
+    self._func = None
+
+  def _get_func(self):
+    if self._func is None:
+      if quickjs is None:
+        raise ValueError("quickjs is not installed.")
+      context = quickjs.Context()
+      if self.name:
+        context.eval(self.source)
+        self._func = context.get(self.name)
+      else:
+        self._func = context.eval(self.source)
+    return self._func

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   The `_get_func` method lacks thread safety. In multi-threaded environments 
like the `DirectRunner`, multiple threads might attempt to initialize 
`self._func` or use the `quickjs.Context` concurrently, which can lead to 
crashes or undefined behavior as QuickJS contexts are not thread-safe. Consider 
using a lock or thread-local storage for the context.



##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -17,6 +17,7 @@
 
 """This module defines the basic MapToFields operation."""
 import itertools
+import json

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   Add `import datetime` at the top level to avoid the overhead of importing it 
inside the per-row wrapper function.
   
   ```suggestion
   import datetime
   import json
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to