gemini-code-assist[bot] commented on code in PR #38473:
URL: https://github.com/apache/beam/pull/38473#discussion_r3259419558
##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -199,6 +212,12 @@ def py_value_to_js_dict(py_value):
py_value = py_value._asdict()
if isinstance(py_value, dict):
return {key: py_value_to_js_dict(value) for key, value in py_value.items()}
+ elif isinstance(py_value, bytes):
+ return py_value.decode('utf-8', errors='replace')
Review Comment:

Decoding `bytes` using `utf-8` with `errors='replace'` can lead to silent
data corruption if the input contains arbitrary binary data. While this
facilitates treating bytes as strings in JavaScript, it's destructive for
non-textual data. Consider using `errors='strict'` or providing a way for users
to handle binary data explicitly (e.g., as `Uint8Array` in JS).
##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -210,80 +229,41 @@ def py_value_to_js_dict(py_value):
def _expand_javascript_mapping_func(
original_fields, expression=None, callable=None, path=None, name=None):
- # Check for installed js2py package
- if js2py is None:
+ if quickjs is None:
raise ValueError(
- "Javascript mapping functions are not supported on"
- " Python 3.12 or later.")
-
- # import remaining js2py objects
- from js2py import base
- from js2py.constructors import jsdate
- from js2py.internals import simplex
-
- js_array_type = (
- base.PyJsArray,
- base.PyJsArrayBuffer,
- base.PyJsInt8Array,
- base.PyJsUint8Array,
- base.PyJsUint8ClampedArray,
- base.PyJsInt16Array,
- base.PyJsUint16Array,
- base.PyJsInt32Array,
- base.PyJsUint32Array,
- base.PyJsFloat32Array,
- base.PyJsFloat64Array)
-
- def _js_object_to_py_object(obj):
- if isinstance(obj, (base.PyJsNumber, base.PyJsString, base.PyJsBoolean)):
- return base.to_python(obj)
- elif isinstance(obj, js_array_type):
- return [_js_object_to_py_object(value) for value in obj.to_list()]
- elif isinstance(obj, jsdate.PyJsDate):
- return obj.to_utc_dt()
- elif isinstance(obj, (base.PyJsNull, base.PyJsUndefined)):
- return None
- elif isinstance(obj, base.PyJsError):
- raise RuntimeError(obj['message'])
- elif isinstance(obj, base.PyJsObject):
- return {
- key: _js_object_to_py_object(value['value'])
- for (key, value) in obj.own.items()
- }
- elif isinstance(obj, base.JsObjectWrapper):
- return _js_object_to_py_object(obj._obj)
-
- return obj
+ "Javascript mapping functions are not supported because the "
+ "quickjs-ng library is not installed.")
if expression:
- source = '\n'.join(['function(__row__) {'] + [
- f' {name} = __row__.{name}'
- for name in original_fields if name in expression
- ] + [' return (' + expression + ')'] + ['}'])
- js_func = _CustomJsObjectWrapper(js2py.eval_js(source))
+ unpacking_code = '\n'.join([
+ f" var {name} = __row__['{name}'];" for name in original_fields
+ if name in expression and name.isidentifier()
+ ])
+ source_code = f"""
+ function udf(__row__) {{
+ {unpacking_code}
+ return ({expression});
+ }}
+ """
+ entrypoint = 'udf'
elif callable:
- js_func = _CustomJsObjectWrapper(js2py.eval_js(callable))
+ match = re.search(r'(?:async\s+)?function\s+([a-zA-Z0-9_]+)', callable)
Review Comment:

The regular expression used to extract the function name is quite
restrictive and will fail for arrow functions (e.g., `(x) => x.foo`) or
anonymous function expressions assigned to variables. A more robust approach
would be to wrap the `callable` string in a way that makes it a valid
expression for `quickjs.Function` regardless of its format.
```python
source_code = f"const __udf__ = ({callable});"
entrypoint = '__udf__'
```
##########
sdks/python/apache_beam/yaml/yaml_mapping.py:
##########
@@ -199,6 +212,12 @@ def py_value_to_js_dict(py_value):
py_value = py_value._asdict()
if isinstance(py_value, dict):
return {key: py_value_to_js_dict(value) for key, value in py_value.items()}
+ elif isinstance(py_value, bytes):
+ return py_value.decode('utf-8', errors='replace')
+ elif isinstance(py_value, (datetime.datetime, datetime.date, datetime.time)):
+ return py_value.isoformat()
+ elif isinstance(py_value, Decimal):
+ return float(py_value)
Review Comment:

Converting `Decimal` to `float` can result in a loss of precision, which is
often the reason for using `Decimal` in the first place. Since JavaScript
numbers are 64-bit floats, this might be unavoidable for standard numeric
operations, but it should be noted that high-precision values may be altered.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]