gemini-code-assist[bot] commented on code in PR #38429:
URL: https://github.com/apache/beam/pull/38429#discussion_r3214662452


##########
sdks/python/apache_beam/runners/common.py:
##########
@@ -1667,6 +1676,13 @@ def handle_process_outputs(
     if results is None:
       results = []
 
+    if self._check_user_dofn_output and isinstance(results, (str, bytes, 
dict)):
+      object_type = type(results).__name__
+      raise TypeError(
+          'Returning a %s from a ParDo or FlatMap is discouraged. '
+          'Please use list("%s") if you really want this behavior.' %
+          (object_type, results))

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   The error message suggestion uses hardcoded double quotes around the `%s` 
placeholder for `results`. This produces incorrect Python code in the 
suggestion for `dict` and `bytes` types, or if a string contains quotes. For 
example, for a dictionary, it would suggest `list("{'a': 1}")` (which iterates 
over characters of the string representation) instead of `list({'a': 1})` 
(which iterates over keys). 
   
   Using `%r` (repr) without the extra quotes is more robust as it correctly 
represents the value and handles escaping for all supported types.
   
   ```suggestion
         raise TypeError(
             'Returning a %s from a ParDo or FlatMap is discouraged. '
             'Please use list(%r) if you really want this behavior.' %
             (object_type, results))
   ```



##########
sdks/python/apache_beam/runners/common_test.py:
##########
@@ -154,6 +154,43 @@ def process(self, element, mykey=DoFn.KeyParam):
       test_stream = (TestStream().advance_watermark_to(10).add_elements([1, 
2]))
       (p | test_stream | beam.ParDo(DoFnProcessWithKeyparam()))
 
+  def test_dofn_returning_str_raises_clear_error(self):
+    """Regression test for https://github.com/apache/beam/issues/18712.
+
+    A DoFn returning a str instead of an iterable wrapping one used to
+    silently iterate per-character. It should now raise a clear TypeError.
+    """
+    class BadDoFn(DoFn):
+      def process(self, element):
+        return 'hello'
+
+    with self.assertRaisesRegex(
+        Exception, 'Returning a str from a ParDo or FlatMap is discouraged'):
+      with TestPipeline() as p:
+        _ = p | beam.Create([0]) | beam.ParDo(BadDoFn())
+
+  def test_dofn_returning_bytes_raises_clear_error(self):
+    """Regression test for https://github.com/apache/beam/issues/18712.""";
+    class BadDoFn(DoFn):
+      def process(self, element):
+        return b'hello'
+
+    with self.assertRaisesRegex(
+        Exception, 'Returning a bytes from a ParDo or FlatMap is discouraged'):
+      with TestPipeline() as p:
+        _ = p | beam.Create([0]) | beam.ParDo(BadDoFn())
+
+  def test_dofn_returning_dict_raises_clear_error(self):
+    """Regression test for https://github.com/apache/beam/issues/18712.""";
+    class BadDoFn(DoFn):
+      def process(self, element):
+        return {'k': 'v'}
+
+    with self.assertRaisesRegex(
+        Exception, 'Returning a dict from a ParDo or FlatMap is discouraged'):

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   It is recommended to assert the specific exception type (`TypeError`) 
instead of the base `Exception`.
   
   ```suggestion
       with self.assertRaisesRegex(
           TypeError, 'Returning a dict from a ParDo or FlatMap is 
discouraged'):
   ```



##########
sdks/python/apache_beam/runners/common_test.py:
##########
@@ -154,6 +154,43 @@ def process(self, element, mykey=DoFn.KeyParam):
       test_stream = (TestStream().advance_watermark_to(10).add_elements([1, 
2]))
       (p | test_stream | beam.ParDo(DoFnProcessWithKeyparam()))
 
+  def test_dofn_returning_str_raises_clear_error(self):
+    """Regression test for https://github.com/apache/beam/issues/18712.
+
+    A DoFn returning a str instead of an iterable wrapping one used to
+    silently iterate per-character. It should now raise a clear TypeError.
+    """
+    class BadDoFn(DoFn):
+      def process(self, element):
+        return 'hello'
+
+    with self.assertRaisesRegex(
+        Exception, 'Returning a str from a ParDo or FlatMap is discouraged'):

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   It is recommended to assert the specific exception type (`TypeError`) 
instead of the base `Exception` to ensure the test is verifying the expected 
error condition.
   
   ```suggestion
       with self.assertRaisesRegex(
           TypeError, 'Returning a str from a ParDo or FlatMap is discouraged'):
   ```



##########
sdks/python/apache_beam/runners/common_test.py:
##########
@@ -154,6 +154,43 @@ def process(self, element, mykey=DoFn.KeyParam):
       test_stream = (TestStream().advance_watermark_to(10).add_elements([1, 
2]))
       (p | test_stream | beam.ParDo(DoFnProcessWithKeyparam()))
 
+  def test_dofn_returning_str_raises_clear_error(self):
+    """Regression test for https://github.com/apache/beam/issues/18712.
+
+    A DoFn returning a str instead of an iterable wrapping one used to
+    silently iterate per-character. It should now raise a clear TypeError.
+    """
+    class BadDoFn(DoFn):
+      def process(self, element):
+        return 'hello'
+
+    with self.assertRaisesRegex(
+        Exception, 'Returning a str from a ParDo or FlatMap is discouraged'):
+      with TestPipeline() as p:
+        _ = p | beam.Create([0]) | beam.ParDo(BadDoFn())
+
+  def test_dofn_returning_bytes_raises_clear_error(self):
+    """Regression test for https://github.com/apache/beam/issues/18712.""";
+    class BadDoFn(DoFn):
+      def process(self, element):
+        return b'hello'
+
+    with self.assertRaisesRegex(
+        Exception, 'Returning a bytes from a ParDo or FlatMap is discouraged'):

Review Comment:
   ![medium](https://www.gstatic.com/codereviewagent/medium-priority.svg)
   
   It is recommended to assert the specific exception type (`TypeError`) 
instead of the base `Exception`.
   
   ```suggestion
       with self.assertRaisesRegex(
           TypeError, 'Returning a bytes from a ParDo or FlatMap is 
discouraged'):
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to