This is an automated email from the ASF dual-hosted git repository.

derrickaw pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 30e98f7fd52 yaml_transform_test - add some debug logs and increase row 
count (#38367)
30e98f7fd52 is described below

commit 30e98f7fd520f99d29c3f978c0802d19f5bdf5f7
Author: Derrick Williams <[email protected]>
AuthorDate: Fri May 8 09:59:07 2026 -0400

    yaml_transform_test - add some debug logs and increase row count (#38367)
    
    * add some debug logs and increase row count
    
    * Update sdks/python/apache_beam/yaml/yaml_transform_test.py
    
    Co-authored-by: gemini-code-assist[bot] 
<176961590+gemini-code-assist[bot]@users.noreply.github.com>
    
    ---------
    
    Co-authored-by: gemini-code-assist[bot] 
<176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 .../python/apache_beam/yaml/yaml_transform_test.py | 24 +++++++++-------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/sdks/python/apache_beam/yaml/yaml_transform_test.py 
b/sdks/python/apache_beam/yaml/yaml_transform_test.py
index a4da97f7f50..bbb60b185c0 100644
--- a/sdks/python/apache_beam/yaml/yaml_transform_test.py
+++ b/sdks/python/apache_beam/yaml/yaml_transform_test.py
@@ -253,17 +253,8 @@ class YamlTransformE2ETest(unittest.TestCase):
       raise unittest.SkipTest('Pandas not available.')
 
     with tempfile.TemporaryDirectory() as tmpdir:
-      data = pd.DataFrame([
-          {
-              'label': '11a', 'rank': 0
-          },
-          {
-              'label': '37a', 'rank': 1
-          },
-          {
-              'label': '389a', 'rank': 2
-          },
-      ])
+      data = pd.DataFrame([{'label': f'{i}a', 'rank': i} for i in range(1024)])
+
       input = os.path.join(tmpdir, 'input.csv')
       output = os.path.join(tmpdir, 'output.json')
       data.to_csv(input, index=False)
@@ -286,9 +277,14 @@ class YamlTransformE2ETest(unittest.TestCase):
                     num_shards: 1
               - type: LogForTesting
             ''' % (repr(input), repr(output)))
-      all_output = list(glob.glob(output + "*"))
-      self.assertEqual(len(all_output), 1)
-      output_shard = list(glob.glob(output + "*"))[0]
+      all_output = list(glob.glob(output + "-*"))
+      file_and_size = {f: os.path.getsize(f) for f in all_output}
+      self.assertEqual(
+          len(all_output),
+          1,
+          msg=f"Expected 1 shard file, but found {len(all_output)}. "
+          f"Files & sizes (bytes): {file_and_size}")
+      output_shard = all_output[0]
       result = pd.read_json(
           output_shard, orient='records',
           lines=True).sort_values('rank').reindex()

Reply via email to