gemini-code-assist[bot] commented on code in PR #38130:
URL: https://github.com/apache/beam/pull/38130#discussion_r3083410525
##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -210,78 +224,49 @@ def py_value_to_js_dict(py_value):
def _expand_javascript_mapping_func(
original_fields, expression=None, callable=None, path=None, name=None):
- # Check for installed js2py package
- if js2py is None:
+ # Check for installed quickjs package
+ if quickjs is None:
raise ValueError(
- "Javascript mapping functions are not supported on"
- " Python 3.12 or later.")
-
- # import remaining js2py objects
- from js2py import base
- from js2py.constructors import jsdate
- from js2py.internals import simplex
-
- js_array_type = (
- base.PyJsArray,
- base.PyJsArrayBuffer,
- base.PyJsInt8Array,
- base.PyJsUint8Array,
- base.PyJsUint8ClampedArray,
- base.PyJsInt16Array,
- base.PyJsUint16Array,
- base.PyJsInt32Array,
- base.PyJsUint32Array,
- base.PyJsFloat32Array,
- base.PyJsFloat64Array)
-
- def _js_object_to_py_object(obj):
- if isinstance(obj, (base.PyJsNumber, base.PyJsString, base.PyJsBoolean)):
- return base.to_python(obj)
- elif isinstance(obj, js_array_type):
- return [_js_object_to_py_object(value) for value in obj.to_list()]
- elif isinstance(obj, jsdate.PyJsDate):
- return obj.to_utc_dt()
- elif isinstance(obj, (base.PyJsNull, base.PyJsUndefined)):
- return None
- elif isinstance(obj, base.PyJsError):
- raise RuntimeError(obj['message'])
- elif isinstance(obj, base.PyJsObject):
- return {
- key: _js_object_to_py_object(value['value'])
- for (key, value) in obj.own.items()
- }
- elif isinstance(obj, base.JsObjectWrapper):
- return _js_object_to_py_object(obj._obj)
+ "Javascript mapping functions require the 'quickjs' package.")
- return obj
+ import json
if expression:
- source = '\n'.join(['function(__row__) {'] + [
- f' {name} = __row__.{name}'
- for name in original_fields if name in expression
- ] + [' return (' + expression + ')'] + ['}'])
- js_func = _CustomJsObjectWrapper(js2py.eval_js(source))
+ source = '\n'.join(
+ ['function fn(json_row) {', ' const __row__ = JSON.parse(json_row);']
+
+ [
+ f' const {name} = __row__.{name};'
+ for name in original_fields if name in expression
+ ] + [' return JSON.stringify(' + expression + ');'] + ['}'])
+ js_func = _QuickJsCallable(source, "fn")
elif callable:
- js_func = _CustomJsObjectWrapper(js2py.eval_js(callable))
+ # Wrap the callable in a named function to use quickjs.Function
+ source = (
+ f"function fn(json_row) {{ "
+ f"const row = JSON.parse(json_row); "
+ f"return JSON.stringify(({callable})(row)); }}")
+ js_func = _QuickJsCallable(source, "fn")
else:
if not path.endswith('.js'):
raise ValueError(f'File "{path}" is not a valid .js file.')
udf_code = FileSystems.open(path).read().decode()
- js = js2py.EvalJs()
- js.eval(udf_code)
- js_func = _CustomJsObjectWrapper(getattr(js, name))
+ bridge_source = (
+ udf_code + f"\nfunction bridge_fn(json_row) {{ "
+ f"return JSON.stringify({name}(JSON.parse(json_row))); }}")
+ js_func = _QuickJsCallable(bridge_source, "bridge_fn")
Review Comment:

Manual JSON serialization in the bridge function is unnecessary. Passing the
object directly is more efficient.
```python
bridge_source = (
udf_code + f"\nfunction bridge_fn(row) {{ "
f"return {name}(row); }}")
js_func = _QuickJsCallable(bridge_source, "bridge_fn")
```
##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -210,78 +224,49 @@ def py_value_to_js_dict(py_value):
def _expand_javascript_mapping_func(
original_fields, expression=None, callable=None, path=None, name=None):
- # Check for installed js2py package
- if js2py is None:
+ # Check for installed quickjs package
+ if quickjs is None:
raise ValueError(
- "Javascript mapping functions are not supported on"
- " Python 3.12 or later.")
-
- # import remaining js2py objects
- from js2py import base
- from js2py.constructors import jsdate
- from js2py.internals import simplex
-
- js_array_type = (
- base.PyJsArray,
- base.PyJsArrayBuffer,
- base.PyJsInt8Array,
- base.PyJsUint8Array,
- base.PyJsUint8ClampedArray,
- base.PyJsInt16Array,
- base.PyJsUint16Array,
- base.PyJsInt32Array,
- base.PyJsUint32Array,
- base.PyJsFloat32Array,
- base.PyJsFloat64Array)
-
- def _js_object_to_py_object(obj):
- if isinstance(obj, (base.PyJsNumber, base.PyJsString, base.PyJsBoolean)):
- return base.to_python(obj)
- elif isinstance(obj, js_array_type):
- return [_js_object_to_py_object(value) for value in obj.to_list()]
- elif isinstance(obj, jsdate.PyJsDate):
- return obj.to_utc_dt()
- elif isinstance(obj, (base.PyJsNull, base.PyJsUndefined)):
- return None
- elif isinstance(obj, base.PyJsError):
- raise RuntimeError(obj['message'])
- elif isinstance(obj, base.PyJsObject):
- return {
- key: _js_object_to_py_object(value['value'])
- for (key, value) in obj.own.items()
- }
- elif isinstance(obj, base.JsObjectWrapper):
- return _js_object_to_py_object(obj._obj)
+ "Javascript mapping functions require the 'quickjs' package.")
- return obj
+ import json
if expression:
- source = '\n'.join(['function(__row__) {'] + [
- f' {name} = __row__.{name}'
- for name in original_fields if name in expression
- ] + [' return (' + expression + ')'] + ['}'])
- js_func = _CustomJsObjectWrapper(js2py.eval_js(source))
+ source = '\n'.join(
+ ['function fn(json_row) {', ' const __row__ = JSON.parse(json_row);']
+
+ [
+ f' const {name} = __row__.{name};'
+ for name in original_fields if name in expression
+ ] + [' return JSON.stringify(' + expression + ');'] + ['}'])
+ js_func = _QuickJsCallable(source, "fn")
elif callable:
- js_func = _CustomJsObjectWrapper(js2py.eval_js(callable))
+ # Wrap the callable in a named function to use quickjs.Function
+ source = (
+ f"function fn(json_row) {{ "
+ f"const row = JSON.parse(json_row); "
+ f"return JSON.stringify(({callable})(row)); }}")
+ js_func = _QuickJsCallable(source, "fn")
else:
if not path.endswith('.js'):
raise ValueError(f'File "{path}" is not a valid .js file.')
udf_code = FileSystems.open(path).read().decode()
- js = js2py.EvalJs()
- js.eval(udf_code)
- js_func = _CustomJsObjectWrapper(getattr(js, name))
+ bridge_source = (
+ udf_code + f"\nfunction bridge_fn(json_row) {{ "
+ f"return JSON.stringify({name}(JSON.parse(json_row))); }}")
+ js_func = _QuickJsCallable(bridge_source, "bridge_fn")
def js_wrapper(row):
row_as_dict = py_value_to_js_dict(row)
+ row_json = json.dumps(row_as_dict)
try:
- js_result = js_func(row_as_dict)
- except simplex.JsException as exn:
+ js_result_json = js_func(row_json)
+ js_result = json.loads(js_result_json)
Review Comment:

By removing the manual JSON serialization in the JavaScript source, you can
pass the dictionary directly to the `js_func` and receive a Python object back,
avoiding the overhead of `json.dumps` and `json.loads`.
```suggestion
try:
js_result = js_func(row_as_dict)
except Exception as exn:
```
##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -210,78 +224,49 @@ def py_value_to_js_dict(py_value):
def _expand_javascript_mapping_func(
original_fields, expression=None, callable=None, path=None, name=None):
- # Check for installed js2py package
- if js2py is None:
+ # Check for installed quickjs package
+ if quickjs is None:
raise ValueError(
- "Javascript mapping functions are not supported on"
- " Python 3.12 or later.")
-
- # import remaining js2py objects
- from js2py import base
- from js2py.constructors import jsdate
- from js2py.internals import simplex
-
- js_array_type = (
- base.PyJsArray,
- base.PyJsArrayBuffer,
- base.PyJsInt8Array,
- base.PyJsUint8Array,
- base.PyJsUint8ClampedArray,
- base.PyJsInt16Array,
- base.PyJsUint16Array,
- base.PyJsInt32Array,
- base.PyJsUint32Array,
- base.PyJsFloat32Array,
- base.PyJsFloat64Array)
-
- def _js_object_to_py_object(obj):
- if isinstance(obj, (base.PyJsNumber, base.PyJsString, base.PyJsBoolean)):
- return base.to_python(obj)
- elif isinstance(obj, js_array_type):
- return [_js_object_to_py_object(value) for value in obj.to_list()]
- elif isinstance(obj, jsdate.PyJsDate):
- return obj.to_utc_dt()
- elif isinstance(obj, (base.PyJsNull, base.PyJsUndefined)):
- return None
- elif isinstance(obj, base.PyJsError):
- raise RuntimeError(obj['message'])
- elif isinstance(obj, base.PyJsObject):
- return {
- key: _js_object_to_py_object(value['value'])
- for (key, value) in obj.own.items()
- }
- elif isinstance(obj, base.JsObjectWrapper):
- return _js_object_to_py_object(obj._obj)
+ "Javascript mapping functions require the 'quickjs' package.")
- return obj
+ import json
if expression:
- source = '\n'.join(['function(__row__) {'] + [
- f' {name} = __row__.{name}'
- for name in original_fields if name in expression
- ] + [' return (' + expression + ')'] + ['}'])
- js_func = _CustomJsObjectWrapper(js2py.eval_js(source))
+ source = '\n'.join(
+ ['function fn(json_row) {', ' const __row__ = JSON.parse(json_row);']
+
+ [
+ f' const {name} = __row__.{name};'
+ for name in original_fields if name in expression
+ ] + [' return JSON.stringify(' + expression + ');'] + ['}'])
+ js_func = _QuickJsCallable(source, "fn")
Review Comment:

The use of `JSON.parse` and `JSON.stringify` inside the JavaScript code,
combined with `json.dumps` and `json.loads` in Python, introduces significant
overhead for every row processed. The `quickjs` library supports direct
conversion between Python dictionaries/lists and JavaScript objects. Removing
this manual serialization will improve performance and robustness (e.g.,
handling of `NaN` or `Infinity` which are not standard JSON). Additionally,
using bracket notation for field access is safer for field names that might
contain special characters.
```python
if expression:
source = '\n'.join(
['function fn(__row__) {'] +
[
f' const {name} = __row__["{name}"];'
for name in original_fields if name.isidentifier() and name in
expression
] + [' return (' + expression + ');'] + ['}'])
js_func = _QuickJsCallable(source, "fn")
```
##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -210,78 +224,49 @@ def py_value_to_js_dict(py_value):
def _expand_javascript_mapping_func(
original_fields, expression=None, callable=None, path=None, name=None):
- # Check for installed js2py package
- if js2py is None:
+ # Check for installed quickjs package
+ if quickjs is None:
raise ValueError(
- "Javascript mapping functions are not supported on"
- " Python 3.12 or later.")
-
- # import remaining js2py objects
- from js2py import base
- from js2py.constructors import jsdate
- from js2py.internals import simplex
-
- js_array_type = (
- base.PyJsArray,
- base.PyJsArrayBuffer,
- base.PyJsInt8Array,
- base.PyJsUint8Array,
- base.PyJsUint8ClampedArray,
- base.PyJsInt16Array,
- base.PyJsUint16Array,
- base.PyJsInt32Array,
- base.PyJsUint32Array,
- base.PyJsFloat32Array,
- base.PyJsFloat64Array)
-
- def _js_object_to_py_object(obj):
- if isinstance(obj, (base.PyJsNumber, base.PyJsString, base.PyJsBoolean)):
- return base.to_python(obj)
- elif isinstance(obj, js_array_type):
- return [_js_object_to_py_object(value) for value in obj.to_list()]
- elif isinstance(obj, jsdate.PyJsDate):
- return obj.to_utc_dt()
- elif isinstance(obj, (base.PyJsNull, base.PyJsUndefined)):
- return None
- elif isinstance(obj, base.PyJsError):
- raise RuntimeError(obj['message'])
- elif isinstance(obj, base.PyJsObject):
- return {
- key: _js_object_to_py_object(value['value'])
- for (key, value) in obj.own.items()
- }
- elif isinstance(obj, base.JsObjectWrapper):
- return _js_object_to_py_object(obj._obj)
+ "Javascript mapping functions require the 'quickjs' package.")
- return obj
+ import json
if expression:
- source = '\n'.join(['function(__row__) {'] + [
- f' {name} = __row__.{name}'
- for name in original_fields if name in expression
- ] + [' return (' + expression + ')'] + ['}'])
- js_func = _CustomJsObjectWrapper(js2py.eval_js(source))
+ source = '\n'.join(
+ ['function fn(json_row) {', ' const __row__ = JSON.parse(json_row);']
+
+ [
+ f' const {name} = __row__.{name};'
+ for name in original_fields if name in expression
+ ] + [' return JSON.stringify(' + expression + ');'] + ['}'])
+ js_func = _QuickJsCallable(source, "fn")
elif callable:
- js_func = _CustomJsObjectWrapper(js2py.eval_js(callable))
+ # Wrap the callable in a named function to use quickjs.Function
+ source = (
+ f"function fn(json_row) {{ "
+ f"const row = JSON.parse(json_row); "
+ f"return JSON.stringify(({callable})(row)); }}")
+ js_func = _QuickJsCallable(source, "fn")
Review Comment:

Manual JSON serialization here is unnecessary and impacts performance.
`quickjs` can handle the Python dictionary directly.
```suggestion
source = (
f"function fn(row) {{ "
f"return ({callable})(row); }}")
js_func = _QuickJsCallable(source, "fn")
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]