gemini-code-assist[bot] commented on code in PR #38473:
URL: https://github.com/apache/beam/pull/38473#discussion_r3229855155


##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -17,7 +17,9 @@
 
 """This module defines the basic MapToFields operation."""
 import itertools
+import json
 import re
+import threading

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   Define the thread-local cache at the module level to avoid redundant global 
lookups and non-idiomatic lazy initialization within the class method.
   
   ```suggestion
   import threading
   
   _THREAD_LOCAL_JS_CACHE = threading.local()
   ```



##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -178,18 +178,40 @@ def _check_mapping_arguments(
     raise ValueError(f'{transform_name} cannot specify "name" without "path"')
 
 
-# js2py's JsObjectWrapper object has a self-referencing __dict__ property
-# that cannot be pickled without implementing the __getstate__ and
-# __setstate__ methods.
-class _CustomJsObjectWrapper(JsObjectWrapper):
-  def __init__(self, js_obj):
-    super().__init__(js_obj.__dict__['_obj'])
+class _JsWrapper:
+  def __init__(self, source_code, entrypoint_name):
+    self.source_code = source_code
+    self.entrypoint_name = entrypoint_name
 
-  def __getstate__(self):
-    return self.__dict__.copy()
+  def _get_wrapper_fn(self):
+    global _THREAD_LOCAL_JS_CACHE
+    if '_THREAD_LOCAL_JS_CACHE' not in globals():
+      globals()['_THREAD_LOCAL_JS_CACHE'] = threading.local()
 
-  def __setstate__(self, state):
-    self.__dict__.update(state)
+    cache = globals()['_THREAD_LOCAL_JS_CACHE']
+    if not hasattr(cache, 'functions'):
+      cache.functions = {}

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   Simplify the cache access by using the module-level variable defined above. 
This avoids the use of `globals()` and the `global` keyword, which is cleaner 
and more idiomatic.
   
   ```suggestion
       if not hasattr(_THREAD_LOCAL_JS_CACHE, 'functions'):
         _THREAD_LOCAL_JS_CACHE.functions = {}
       cache = _THREAD_LOCAL_JS_CACHE
   ```



##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -178,18 +178,40 @@ def _check_mapping_arguments(
     raise ValueError(f'{transform_name} cannot specify "name" without "path"')
 
 
-# js2py's JsObjectWrapper object has a self-referencing __dict__ property
-# that cannot be pickled without implementing the __getstate__ and
-# __setstate__ methods.
-class _CustomJsObjectWrapper(JsObjectWrapper):
-  def __init__(self, js_obj):
-    super().__init__(js_obj.__dict__['_obj'])
+class _JsWrapper:
+  def __init__(self, source_code, entrypoint_name):
+    self.source_code = source_code
+    self.entrypoint_name = entrypoint_name
 
-  def __getstate__(self):
-    return self.__dict__.copy()
+  def _get_wrapper_fn(self):
+    global _THREAD_LOCAL_JS_CACHE
+    if '_THREAD_LOCAL_JS_CACHE' not in globals():
+      globals()['_THREAD_LOCAL_JS_CACHE'] = threading.local()
 
-  def __setstate__(self, state):
-    self.__dict__.update(state)
+    cache = globals()['_THREAD_LOCAL_JS_CACHE']
+    if not hasattr(cache, 'functions'):
+      cache.functions = {}
+
+    cache_key = (self.source_code, self.entrypoint_name)
+    if cache_key not in cache.functions:
+      import quickjs

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   The `quickjs` module is already imported at the top of the file (line 60). 
This local import is redundant.



##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -210,80 +232,54 @@ def py_value_to_js_dict(py_value):
 def _expand_javascript_mapping_func(
     original_fields, expression=None, callable=None, path=None, name=None):
 
-  # Check for installed js2py package
-  if js2py is None:
+  if quickjs is None:
     raise ValueError(
-        "Javascript mapping functions are not supported on"
-        " Python 3.12 or later.")
-
-  # import remaining js2py objects
-  from js2py import base
-  from js2py.constructors import jsdate
-  from js2py.internals import simplex
-
-  js_array_type = (
-      base.PyJsArray,
-      base.PyJsArrayBuffer,
-      base.PyJsInt8Array,
-      base.PyJsUint8Array,
-      base.PyJsUint8ClampedArray,
-      base.PyJsInt16Array,
-      base.PyJsUint16Array,
-      base.PyJsInt32Array,
-      base.PyJsUint32Array,
-      base.PyJsFloat32Array,
-      base.PyJsFloat64Array)
-
-  def _js_object_to_py_object(obj):
-    if isinstance(obj, (base.PyJsNumber, base.PyJsString, base.PyJsBoolean)):
-      return base.to_python(obj)
-    elif isinstance(obj, js_array_type):
-      return [_js_object_to_py_object(value) for value in obj.to_list()]
-    elif isinstance(obj, jsdate.PyJsDate):
-      return obj.to_utc_dt()
-    elif isinstance(obj, (base.PyJsNull, base.PyJsUndefined)):
-      return None
-    elif isinstance(obj, base.PyJsError):
-      raise RuntimeError(obj['message'])
-    elif isinstance(obj, base.PyJsObject):
-      return {
-          key: _js_object_to_py_object(value['value'])
-          for (key, value) in obj.own.items()
-      }
-    elif isinstance(obj, base.JsObjectWrapper):
-      return _js_object_to_py_object(obj._obj)
-
-    return obj
+        "Javascript mapping functions are not supported because the "
+        "quickjs-ng library is not installed.")
 
   if expression:
-    source = '\n'.join(['function(__row__) {'] + [
-        f'  {name} = __row__.{name}'
-        for name in original_fields if name in expression
-    ] + ['  return (' + expression + ')'] + ['}'])
-    js_func = _CustomJsObjectWrapper(js2py.eval_js(source))
+    unpacking_code = '\n'.join([
+        f'  var {name} = __row__.{name};' for name in original_fields
+        if name in expression
+    ])
+    source_code = f"""
+    const udf = function(__row__) {{
+    {unpacking_code}
+      return ({expression});
+    }};
+    function wrapper(json_str) {{
+      return udf(JSON.parse(json_str));
+    }}
+    """
+    entrypoint = 'wrapper'
 
   elif callable:
-    js_func = _CustomJsObjectWrapper(js2py.eval_js(callable))
+    match = re.search(r'function\s+([a-zA-Z0-9_]+)\s*\(', callable)

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   The current regex for finding the function name is quite restrictive. It 
won't match `async` functions or functions with different spacing. A more 
robust regex would improve compatibility with various JavaScript function 
declaration styles.
   
   ```suggestion
       match = re.search(r'(?:async\s+)?function\s+([a-zA-Z0-9_]+)', callable)
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to