This is an automated email from the ASF dual-hosted git repository.
jrmccluskey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 3c3e2024d98 add some debug checks in test case and remove an extra
line (#37237)
3c3e2024d98 is described below
commit 3c3e2024d98bf25b82d0b69f0765f4d444b25864
Author: Derrick Williams <[email protected]>
AuthorDate: Wed Jan 7 10:20:19 2026 -0500
add some debug checks in test case and remove an extra line (#37237)
---
sdks/python/apache_beam/dataframe/io.py | 1 -
sdks/python/apache_beam/yaml/yaml_transform_test.py | 12 ++++++++++--
2 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/sdks/python/apache_beam/dataframe/io.py
b/sdks/python/apache_beam/dataframe/io.py
index d16191bc4cc..02423f517ee 100644
--- a/sdks/python/apache_beam/dataframe/io.py
+++ b/sdks/python/apache_beam/dataframe/io.py
@@ -793,7 +793,6 @@ class ReadViaPandas(beam.PTransform):
if format == 'csv':
kwargs['filename_column'] = filename_column
self._reader = globals()['read_%s' % format](*args, **kwargs)
- self._reader = globals()['read_%s' % format](*args, **kwargs)
self._include_indexes = include_indexes
self._objects_as_strings = objects_as_strings
self._filename_column = filename_column
diff --git a/sdks/python/apache_beam/yaml/yaml_transform_test.py
b/sdks/python/apache_beam/yaml/yaml_transform_test.py
index 89e4dc8b951..5cf2fa00f15 100644
--- a/sdks/python/apache_beam/yaml/yaml_transform_test.py
+++ b/sdks/python/apache_beam/yaml/yaml_transform_test.py
@@ -34,6 +34,8 @@ try:
except ImportError:
jsonschema = None
+_LOGGER = logging.getLogger(__name__)
+
class CreateTimestamped(beam.PTransform):
_yaml_requires_inputs = False
@@ -244,6 +246,10 @@ class YamlTransformE2ETest(unittest.TestCase):
input = os.path.join(tmpdir, 'input.csv')
output = os.path.join(tmpdir, 'output.json')
data.to_csv(input, index=False)
+ with open(input, 'r') as f:
+ lines = f.readlines()
+ _LOGGER.debug("input.csv has these {lines} lines.")
+ self.assertEqual(len(lines), len(data) + 1) # +1 for header
with beam.Pipeline() as p:
result = p | YamlTransform(
@@ -256,9 +262,11 @@ class YamlTransformE2ETest(unittest.TestCase):
- type: WriteToJson
config:
path: %s
- num_shards: 1
+ num_shards: 1
+ - type: LogForTesting
''' % (repr(input), repr(output)))
-
+ all_output = list(glob.glob(output + "*"))
+ self.assertEqual(len(all_output), 1)
output_shard = list(glob.glob(output + "*"))[0]
result = pd.read_json(
output_shard, orient='records',