gemini-code-assist[bot] commented on code in PR #38769:
URL: https://github.com/apache/beam/pull/38769#discussion_r3341187524


##########
sdks/python/apache_beam/ml/transforms/base_test.py:
##########
@@ -841,6 +843,46 @@ def test_save_and_load_run_inference(self):
         self.assertListEqual(
             get_keys(model_handler), get_keys(loaded_model_handler))
 
+  @parameterized.expand([
+      # Pipelines pinned to a version older than 2.75.0 keep the pre-2.75.0
+      # jsonpickle behavior (safe=False, which permits eval-based decoding).
+      param(update_compatibility_version='2.74.0', expected_safe=False),
+      # The breaking-change version itself and newer decode securely.
+      param(update_compatibility_version='2.75.0', expected_safe=True),
+      # Pipelines that do not set the option (the common case) decode securely.
+      param(update_compatibility_version=None, expected_safe=True),
+  ])
+  def test_load_attributes_safe_flag_follows_compat_version(
+      self, update_compatibility_version, expected_safe):
+    data = [{'x': 'Hello world'}, {'x': 'Apache Beam'}]
+    with beam.Pipeline() as p:
+      _ = (
+          p
+          | beam.Create(data)
+          | base.MLTransform(
+              write_artifact_location=self.artifact_location).with_transform(
+                  FakeEmbeddingsManager(columns=['x'])))
+
+    # FakeEmbeddingsManager reverses the values of the embedded columns.
+    expected_data = [{'x': d['x'][::-1]} for d in data]
+
+    options = PipelineOptions(
+        update_compatibility_version=update_compatibility_version)
+    with mock.patch.object(base.jsonpickle,
+                           'decode',
+                           wraps=base.jsonpickle.decode) as mock_decode:
+      with beam.Pipeline(options=options) as p:
+        result = (
+            p
+            | beam.Create(data)
+            | base.MLTransform(read_artifact_location=self.artifact_location))
+        assert_that(result, equal_to(expected_data))
+
+    safe_flags = [
+        call.kwargs.get('safe') for call in mock_decode.call_args_list
+    ]
+    self.assertEqual(safe_flags, [expected_safe])

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   To prevent test failures on environments running `jsonpickle < 4.0.0`, we 
should conditionally assert the `safe` flag only if the parameter is supported 
by the installed version of `jsonpickle`.
   
   ```python
       import inspect
       if 'safe' in inspect.signature(base.jsonpickle.decode).parameters:
         safe_flags = [
             call.kwargs.get('safe') for call in mock_decode.call_args_list
         ]
         self.assertEqual(safe_flags, [expected_safe])
       else:
         self.assertTrue(mock_decode.called)
   ```



##########
sdks/python/apache_beam/ml/transforms/base.py:
##########
@@ -591,7 +592,18 @@ def save_attributes(
   def load_attributes(artifact_location):
     with FileSystems.open(os.path.join(artifact_location, 
_ATTRIBUTE_FILE_NAME),
                           'rb') as f:
-      return jsonpickle.decode(f.read())
+      # load_attributes runs eagerly during MLTransform.expand() at pipeline
+      # construction time, so the pipeline's options are available via the
+      # construction-time context.
+      pipeline_options = get_pipeline_options()
+      safe = True
+      if (pipeline_options is not None and
+          pipeline_options.is_compat_version_prior_to("2.75.0")):
+        # Keep the pre-2.75.0 jsonpickle behavior (safe=False permits
+        # eval-based decoding) for backwards compatibility with already-staged
+        # artifacts.
+        safe = False
+      return jsonpickle.decode(f.read(), safe=safe)

Review Comment:
   ![high](https://www.gstatic.com/codereviewagent/high-priority.svg)
   
   Since `jsonpickle` versions `< 4.0.0` are still supported (as specified in 
`setup.py` with `jsonpickle>=3.0.0,<5.0.0`), calling `jsonpickle.decode` with 
the `safe` keyword argument will raise a `TypeError` on environments running 
older versions of `jsonpickle`. We should conditionally pass the `safe` 
argument only if it is supported by the installed version of `jsonpickle`.
   
   ```suggestion
         pipeline_options = get_pipeline_options()
         safe = True
         if (pipeline_options is not None and
             pipeline_options.is_compat_version_prior_to("2.75.0")):
           # Keep the pre-2.75.0 jsonpickle behavior (safe=False permits
           # eval-based decoding) for backwards compatibility with 
already-staged
           # artifacts.
           safe = False
         import inspect
         kwargs = {}
         if 'safe' in inspect.signature(jsonpickle.decode).parameters:
           kwargs['safe'] = safe
         return jsonpickle.decode(f.read(), **kwargs)
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to