gemini-code-assist[bot] commented on code in PR #38130:
URL: https://github.com/apache/beam/pull/38130#discussion_r3093052974


##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -210,78 +225,106 @@ def py_value_to_js_dict(py_value):
 def _expand_javascript_mapping_func(
     original_fields, expression=None, callable=None, path=None, name=None):
 
-  # Check for installed js2py package
-  if js2py is None:
+  # Check for installed quickjs package
+  if quickjs is None:
     raise ValueError(
-        "Javascript mapping functions are not supported on"
-        " Python 3.12 or later.")
-
-  # import remaining js2py objects
-  from js2py import base
-  from js2py.constructors import jsdate
-  from js2py.internals import simplex
-
-  js_array_type = (
-      base.PyJsArray,
-      base.PyJsArrayBuffer,
-      base.PyJsInt8Array,
-      base.PyJsUint8Array,
-      base.PyJsUint8ClampedArray,
-      base.PyJsInt16Array,
-      base.PyJsUint16Array,
-      base.PyJsInt32Array,
-      base.PyJsUint32Array,
-      base.PyJsFloat32Array,
-      base.PyJsFloat64Array)
-
-  def _js_object_to_py_object(obj):
-    if isinstance(obj, (base.PyJsNumber, base.PyJsString, base.PyJsBoolean)):
-      return base.to_python(obj)
-    elif isinstance(obj, js_array_type):
-      return [_js_object_to_py_object(value) for value in obj.to_list()]
-    elif isinstance(obj, jsdate.PyJsDate):
-      return obj.to_utc_dt()
-    elif isinstance(obj, (base.PyJsNull, base.PyJsUndefined)):
-      return None
-    elif isinstance(obj, base.PyJsError):
-      raise RuntimeError(obj['message'])
-    elif isinstance(obj, base.PyJsObject):
-      return {
-          key: _js_object_to_py_object(value['value'])
-          for (key, value) in obj.own.items()
-      }
-    elif isinstance(obj, base.JsObjectWrapper):
-      return _js_object_to_py_object(obj._obj)
+        "Javascript mapping functions require the 'quickjs' package.")
 
-    return obj
+  def make_bridge_source(func_name, call_expr):
+    keys_json = json.dumps(list(original_fields))
+    return (
+        f"function {func_name}(serialized_flags, ...values) {{ "
+        f"  const keys = JSON.parse('{keys_json}'); "
+        f"  const flags = serialized_flags.split(','); "
+        f"  const row = {{}}; "
+        f"  for (let i = 0; i < keys.length; i++) {{ "
+        f"    let val = values[i]; "
+        f"    if (flags[i] === '1') val = JSON.parse(val); "
+        f"    row[keys[i]] = val; "
+        f"  }} "
+        f"  const result = {call_expr}; "
+        f"  return (typeof result === 'object' && result !== null) "
+        f"? '__json__:' + JSON.stringify(result) : result; "

Review Comment:
   ![high](https://www.gstatic.com/codereviewagent/high-priority.svg)
   
   The use of the `__json__:` prefix to signal serialized objects is prone to 
collisions. If the JavaScript function returns a string that happens to start 
with this prefix, it will be incorrectly parsed as JSON in Python, potentially 
leading to a `JSONDecodeError` or unexpected data transformation. Consider 
using a more unique sentinel or, since `quickjs` supports returning native 
Python dictionaries and lists, consider avoiding manual JSON serialization 
entirely for better performance and reliability.



##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -178,18 +177,34 @@ def _check_mapping_arguments(
     raise ValueError(f'{transform_name} cannot specify "name" without "path"')
 
 
-# js2py's JsObjectWrapper object has a self-referencing __dict__ property
-# that cannot be pickled without implementing the __getstate__ and
-# __setstate__ methods.
-class _CustomJsObjectWrapper(JsObjectWrapper):
-  def __init__(self, js_obj):
-    super().__init__(js_obj.__dict__['_obj'])
+class _QuickJsCallable:
+  def __init__(self, source, name=None):
+    self.source = source
+    self.name = name
+    self._func = None
+
+  def _get_func(self):
+    if self._func is None:
+      if quickjs is None:
+        raise ValueError("quickjs is not installed.")
+      context = quickjs.Context()
+      if self.name:
+        context.eval(self.source)
+        self._func = context.get(self.name)
+      else:
+        self._func = context.eval(self.source)
+    return self._func
+
+  def __call__(self, *args, **kwargs):
+    return self._get_func()(*args, **kwargs)

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   The `__call__` method accepts `**kwargs`, but the underlying `quickjs` 
function call typically only supports positional arguments. If keyword 
arguments are passed, they may be ignored or cause a `TypeError` depending on 
the `quickjs` wrapper implementation. It's safer to restrict this to `*args` 
unless keyword argument support is explicitly intended and verified.
   
   ```suggestion
     def __call__(self, *args):
       return self._get_func()(*args)
   ```



##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -210,78 +225,106 @@ def py_value_to_js_dict(py_value):
 def _expand_javascript_mapping_func(
     original_fields, expression=None, callable=None, path=None, name=None):
 
-  # Check for installed js2py package
-  if js2py is None:
+  # Check for installed quickjs package
+  if quickjs is None:
     raise ValueError(
-        "Javascript mapping functions are not supported on"
-        " Python 3.12 or later.")
-
-  # import remaining js2py objects
-  from js2py import base
-  from js2py.constructors import jsdate
-  from js2py.internals import simplex
-
-  js_array_type = (
-      base.PyJsArray,
-      base.PyJsArrayBuffer,
-      base.PyJsInt8Array,
-      base.PyJsUint8Array,
-      base.PyJsUint8ClampedArray,
-      base.PyJsInt16Array,
-      base.PyJsUint16Array,
-      base.PyJsInt32Array,
-      base.PyJsUint32Array,
-      base.PyJsFloat32Array,
-      base.PyJsFloat64Array)
-
-  def _js_object_to_py_object(obj):
-    if isinstance(obj, (base.PyJsNumber, base.PyJsString, base.PyJsBoolean)):
-      return base.to_python(obj)
-    elif isinstance(obj, js_array_type):
-      return [_js_object_to_py_object(value) for value in obj.to_list()]
-    elif isinstance(obj, jsdate.PyJsDate):
-      return obj.to_utc_dt()
-    elif isinstance(obj, (base.PyJsNull, base.PyJsUndefined)):
-      return None
-    elif isinstance(obj, base.PyJsError):
-      raise RuntimeError(obj['message'])
-    elif isinstance(obj, base.PyJsObject):
-      return {
-          key: _js_object_to_py_object(value['value'])
-          for (key, value) in obj.own.items()
-      }
-    elif isinstance(obj, base.JsObjectWrapper):
-      return _js_object_to_py_object(obj._obj)
+        "Javascript mapping functions require the 'quickjs' package.")
 
-    return obj
+  def make_bridge_source(func_name, call_expr):
+    keys_json = json.dumps(list(original_fields))
+    return (
+        f"function {func_name}(serialized_flags, ...values) {{ "
+        f"  const keys = JSON.parse('{keys_json}'); "
+        f"  const flags = serialized_flags.split(','); "
+        f"  const row = {{}}; "
+        f"  for (let i = 0; i < keys.length; i++) {{ "
+        f"    let val = values[i]; "
+        f"    if (flags[i] === '1') val = JSON.parse(val); "
+        f"    row[keys[i]] = val; "
+        f"  }} "
+        f"  const result = {call_expr}; "
+        f"  return (typeof result === 'object' && result !== null) "
+        f"? '__json__:' + JSON.stringify(result) : result; "
+        f"}}")
 
   if expression:
-    source = '\n'.join(['function(__row__) {'] + [
-        f'  {name} = __row__.{name}'
-        for name in original_fields if name in expression
-    ] + ['  return (' + expression + ')'] + ['}'])
-    js_func = _CustomJsObjectWrapper(js2py.eval_js(source))
+    args = [
+        name for name in original_fields
+        if name.isidentifier() and name in expression
+    ]
+    parses = []
+    for i, arg in enumerate(args):
+      parses.append(f"  if (flags[{i}] === '1') {arg} = JSON.parse({arg});")
+
+    source = f"""
+function fn(serialized_flags, {", ".join(args)}) {{
+  const flags = serialized_flags.split(',');
+{chr(10).join(parses)}
+  const result = ({expression});
+  return (typeof result === 'object' && result !== null) ?
+    "__json__:" + JSON.stringify(result) : result;
+}}
+"""
+    js_func = _QuickJsCallable(source, "fn")
+    used_fields = args
 
   elif callable:
-    js_func = _CustomJsObjectWrapper(js2py.eval_js(callable))
+    source = make_bridge_source("fn", f"({callable})(row)")
+    js_func = _QuickJsCallable(source, "fn")
+    used_fields = None
 
   else:
     if not path.endswith('.js'):
       raise ValueError(f'File "{path}" is not a valid .js file.')
     udf_code = FileSystems.open(path).read().decode()
-    js = js2py.EvalJs()
-    js.eval(udf_code)
-    js_func = _CustomJsObjectWrapper(getattr(js, name))
+    bridge_source = udf_code + "\n" + make_bridge_source(
+        "bridge_fn", f"{name}(row)")
+    js_func = _QuickJsCallable(bridge_source, "bridge_fn")
+    used_fields = None
 
   def js_wrapper(row):
-    row_as_dict = py_value_to_js_dict(row)
-    try:
-      js_result = js_func(row_as_dict)
-    except simplex.JsException as exn:
-      raise RuntimeError(
-          f"Error evaluating javascript expression: "
-          f"{exn.mes['message']}") from exn
-    return dicts_to_rows(_js_object_to_py_object(js_result))
+    if expression:
+      vals = [py_value_to_js_dict(getattr(row, name)) for name in used_fields]
+      js_vals = []
+      flags = []
+      for val in vals:
+        if isinstance(val, (list, dict)):
+          js_vals.append(json.dumps(val))
+          flags.append('1')
+        else:
+          js_vals.append(val)
+          flags.append('0')
+
+      flags_str = ",".join(flags)
+      try:
+        js_result = js_func(flags_str, *js_vals)
+      except Exception as exn:
+        raise RuntimeError(
+            f"Error evaluating javascript expression: {exn}") from exn
+    else:
+      row_as_dict = py_value_to_js_dict(row)
+      js_vals = []
+      flags = []
+      for name in original_fields:
+        val = row_as_dict.get(name)
+        if isinstance(val, (list, dict)):
+          js_vals.append(json.dumps(val))
+          flags.append('1')
+        else:
+          js_vals.append(val)
+          flags.append('0')
+
+      flags_str = ",".join(flags)
+      try:
+        js_result = js_func(flags_str, *js_vals)
+      except Exception as exn:
+        raise RuntimeError(
+            f"Error evaluating javascript expression: {exn}") from exn

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   There is significant code duplication in `js_wrapper` between the 
`expression` and `non-expression` cases for preparing `js_vals` and `flags`. 
This logic (iterating over fields, checking for `list`/`dict`, and 
JSON-encoding them) could be refactored into a helper function to improve 
maintainability and reduce the risk of logic drift.



##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -210,78 +225,106 @@ def py_value_to_js_dict(py_value):
 def _expand_javascript_mapping_func(
     original_fields, expression=None, callable=None, path=None, name=None):
 
-  # Check for installed js2py package
-  if js2py is None:
+  # Check for installed quickjs package
+  if quickjs is None:
     raise ValueError(
-        "Javascript mapping functions are not supported on"
-        " Python 3.12 or later.")
-
-  # import remaining js2py objects
-  from js2py import base
-  from js2py.constructors import jsdate
-  from js2py.internals import simplex
-
-  js_array_type = (
-      base.PyJsArray,
-      base.PyJsArrayBuffer,
-      base.PyJsInt8Array,
-      base.PyJsUint8Array,
-      base.PyJsUint8ClampedArray,
-      base.PyJsInt16Array,
-      base.PyJsUint16Array,
-      base.PyJsInt32Array,
-      base.PyJsUint32Array,
-      base.PyJsFloat32Array,
-      base.PyJsFloat64Array)
-
-  def _js_object_to_py_object(obj):
-    if isinstance(obj, (base.PyJsNumber, base.PyJsString, base.PyJsBoolean)):
-      return base.to_python(obj)
-    elif isinstance(obj, js_array_type):
-      return [_js_object_to_py_object(value) for value in obj.to_list()]
-    elif isinstance(obj, jsdate.PyJsDate):
-      return obj.to_utc_dt()
-    elif isinstance(obj, (base.PyJsNull, base.PyJsUndefined)):
-      return None
-    elif isinstance(obj, base.PyJsError):
-      raise RuntimeError(obj['message'])
-    elif isinstance(obj, base.PyJsObject):
-      return {
-          key: _js_object_to_py_object(value['value'])
-          for (key, value) in obj.own.items()
-      }
-    elif isinstance(obj, base.JsObjectWrapper):
-      return _js_object_to_py_object(obj._obj)
+        "Javascript mapping functions require the 'quickjs' package.")
 
-    return obj
+  def make_bridge_source(func_name, call_expr):
+    keys_json = json.dumps(list(original_fields))
+    return (
+        f"function {func_name}(serialized_flags, ...values) {{ "
+        f"  const keys = JSON.parse('{keys_json}'); "

Review Comment:
   ![high](https://www.gstatic.com/codereviewagent/high-priority.svg)
   
   Using `JSON.parse` with a single-quoted string literal for `keys_json` is 
fragile. If any field name in `original_fields` contains a single quote (e.g., 
`user's_name`), the resulting JavaScript will have a syntax error because the 
single quote in the field name will prematurely terminate the string literal in 
`JSON.parse('...')`. Since `keys_json` is already a valid JSON string (and thus 
a valid JavaScript array literal), you can embed it directly without 
`JSON.parse`.
   
   ```suggestion
       f"  const keys = {keys_json}; "
   ```



##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -210,78 +225,106 @@ def py_value_to_js_dict(py_value):
 def _expand_javascript_mapping_func(
     original_fields, expression=None, callable=None, path=None, name=None):
 
-  # Check for installed js2py package
-  if js2py is None:
+  # Check for installed quickjs package
+  if quickjs is None:
     raise ValueError(
-        "Javascript mapping functions are not supported on"
-        " Python 3.12 or later.")
-
-  # import remaining js2py objects
-  from js2py import base
-  from js2py.constructors import jsdate
-  from js2py.internals import simplex
-
-  js_array_type = (
-      base.PyJsArray,
-      base.PyJsArrayBuffer,
-      base.PyJsInt8Array,
-      base.PyJsUint8Array,
-      base.PyJsUint8ClampedArray,
-      base.PyJsInt16Array,
-      base.PyJsUint16Array,
-      base.PyJsInt32Array,
-      base.PyJsUint32Array,
-      base.PyJsFloat32Array,
-      base.PyJsFloat64Array)
-
-  def _js_object_to_py_object(obj):
-    if isinstance(obj, (base.PyJsNumber, base.PyJsString, base.PyJsBoolean)):
-      return base.to_python(obj)
-    elif isinstance(obj, js_array_type):
-      return [_js_object_to_py_object(value) for value in obj.to_list()]
-    elif isinstance(obj, jsdate.PyJsDate):
-      return obj.to_utc_dt()
-    elif isinstance(obj, (base.PyJsNull, base.PyJsUndefined)):
-      return None
-    elif isinstance(obj, base.PyJsError):
-      raise RuntimeError(obj['message'])
-    elif isinstance(obj, base.PyJsObject):
-      return {
-          key: _js_object_to_py_object(value['value'])
-          for (key, value) in obj.own.items()
-      }
-    elif isinstance(obj, base.JsObjectWrapper):
-      return _js_object_to_py_object(obj._obj)
+        "Javascript mapping functions require the 'quickjs' package.")
 
-    return obj
+  def make_bridge_source(func_name, call_expr):
+    keys_json = json.dumps(list(original_fields))
+    return (
+        f"function {func_name}(serialized_flags, ...values) {{ "
+        f"  const keys = JSON.parse('{keys_json}'); "
+        f"  const flags = serialized_flags.split(','); "
+        f"  const row = {{}}; "
+        f"  for (let i = 0; i < keys.length; i++) {{ "
+        f"    let val = values[i]; "
+        f"    if (flags[i] === '1') val = JSON.parse(val); "
+        f"    row[keys[i]] = val; "
+        f"  }} "
+        f"  const result = {call_expr}; "
+        f"  return (typeof result === 'object' && result !== null) "
+        f"? '__json__:' + JSON.stringify(result) : result; "
+        f"}}")
 
   if expression:
-    source = '\n'.join(['function(__row__) {'] + [
-        f'  {name} = __row__.{name}'
-        for name in original_fields if name in expression
-    ] + ['  return (' + expression + ')'] + ['}'])
-    js_func = _CustomJsObjectWrapper(js2py.eval_js(source))
+    args = [
+        name for name in original_fields
+        if name.isidentifier() and name in expression
+    ]
+    parses = []
+    for i, arg in enumerate(args):
+      parses.append(f"  if (flags[{i}] === '1') {arg} = JSON.parse({arg});")
+
+    source = f"""
+function fn(serialized_flags, {", ".join(args)}) {{
+  const flags = serialized_flags.split(',');
+{chr(10).join(parses)}
+  const result = ({expression});
+  return (typeof result === 'object' && result !== null) ?
+    "__json__:" + JSON.stringify(result) : result;
+}}
+"""
+    js_func = _QuickJsCallable(source, "fn")
+    used_fields = args
 
   elif callable:
-    js_func = _CustomJsObjectWrapper(js2py.eval_js(callable))
+    source = make_bridge_source("fn", f"({callable})(row)")
+    js_func = _QuickJsCallable(source, "fn")
+    used_fields = None
 
   else:
     if not path.endswith('.js'):
       raise ValueError(f'File "{path}" is not a valid .js file.')
     udf_code = FileSystems.open(path).read().decode()
-    js = js2py.EvalJs()
-    js.eval(udf_code)
-    js_func = _CustomJsObjectWrapper(getattr(js, name))
+    bridge_source = udf_code + "\n" + make_bridge_source(
+        "bridge_fn", f"{name}(row)")
+    js_func = _QuickJsCallable(bridge_source, "bridge_fn")
+    used_fields = None
 
   def js_wrapper(row):
-    row_as_dict = py_value_to_js_dict(row)
-    try:
-      js_result = js_func(row_as_dict)
-    except simplex.JsException as exn:
-      raise RuntimeError(
-          f"Error evaluating javascript expression: "
-          f"{exn.mes['message']}") from exn
-    return dicts_to_rows(_js_object_to_py_object(js_result))
+    if expression:
+      vals = [py_value_to_js_dict(getattr(row, name)) for name in used_fields]
+      js_vals = []
+      flags = []
+      for val in vals:
+        if isinstance(val, (list, dict)):
+          js_vals.append(json.dumps(val))
+          flags.append('1')
+        else:
+          js_vals.append(val)
+          flags.append('0')
+
+      flags_str = ",".join(flags)
+      try:
+        js_result = js_func(flags_str, *js_vals)
+      except Exception as exn:
+        raise RuntimeError(
+            f"Error evaluating javascript expression: {exn}") from exn
+    else:
+      row_as_dict = py_value_to_js_dict(row)
+      js_vals = []
+      flags = []
+      for name in original_fields:
+        val = row_as_dict.get(name)
+        if isinstance(val, (list, dict)):
+          js_vals.append(json.dumps(val))
+          flags.append('1')
+        else:
+          js_vals.append(val)
+          flags.append('0')
+
+      flags_str = ",".join(flags)
+      try:
+        js_result = js_func(flags_str, *js_vals)
+      except Exception as exn:
+        raise RuntimeError(
+            f"Error evaluating javascript expression: {exn}") from exn
+
+    if isinstance(js_result, str) and js_result.startswith("__json__:"):
+      js_result = json.loads(js_result[9:])

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   The switch to JSON-based bridging for complex types results in a loss of 
automatic type conversion for certain objects, such as JavaScript `Date` 
objects. In the previous `js2py` implementation, `Date` objects were converted 
to Python `datetime` objects. With the current `JSON.stringify` approach, they 
will be converted to ISO strings in Python. If this change in behavior is 
unintended, you may need to add explicit handling for date types in the bridge.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to