[
https://issues.apache.org/jira/browse/BEAM-5626?focusedWorklogId=152439&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-152439
]
ASF GitHub Bot logged work on BEAM-5626:
----------------------------------------
Author: ASF GitHub Bot
Created on: 08/Oct/18 22:11
Start Date: 08/Oct/18 22:11
Worklog Time Spent: 10m
Work Description: charlesccychen closed pull request #6587: [BEAM-5626]
Fix hadoop filesystem test for py3.
URL: https://github.com/apache/beam/pull/6587
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/sdks/python/apache_beam/io/hadoopfilesystem_test.py
b/sdks/python/apache_beam/io/hadoopfilesystem_test.py
index a943a12bb4d..8421c43e629 100644
--- a/sdks/python/apache_beam/io/hadoopfilesystem_test.py
+++ b/sdks/python/apache_beam/io/hadoopfilesystem_test.py
@@ -22,6 +22,7 @@
import io
import logging
import posixpath
+import sys
import unittest
from builtins import object
@@ -153,7 +154,7 @@ def delete(self, path, recursive=True):
_ = self.status(path)
- for filepath in self.files.keys(): # pylint:
disable=consider-iterating-dictionary
+ for filepath in list(self.files):
if filepath.startswith(path):
del self.files[filepath]
@@ -197,6 +198,12 @@ def checksum(self, path):
class HadoopFileSystemTest(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ # Method has been renamed in Python 3
+ if sys.version_info[0] < 3:
+ cls.assertCountEqual = cls.assertItemsEqual
+
def setUp(self):
self._fake_hdfs = FakeHdfs()
hdfs.hdfs.InsecureClient = (
@@ -258,7 +265,7 @@ def test_match_file(self):
returned_files = [f.path
for match_result in result
for f in match_result.metadata_list]
- self.assertItemsEqual(expected_files, returned_files)
+ self.assertCountEqual(expected_files, returned_files)
def test_match_file_with_limits(self):
expected_files = [self.fs.join(self.tmpdir, filename)
@@ -296,7 +303,7 @@ def test_match_directory(self):
# structure, so listing without a '/' will return no results.
result = self.fs.match([self.tmpdir + '/'])[0]
files = [f.path for f in result.metadata_list]
- self.assertItemsEqual(files, expected_files)
+ self.assertCountEqual(files, expected_files)
def test_match_directory_trailing_slash(self):
expected_files = [self.fs.join(self.tmpdir, filename)
@@ -304,7 +311,7 @@ def test_match_directory_trailing_slash(self):
result = self.fs.match([self.tmpdir + '/'])[0]
files = [f.path for f in result.metadata_list]
- self.assertItemsEqual(files, expected_files)
+ self.assertCountEqual(files, expected_files)
def test_create_success(self):
url = self.fs.join(self.tmpdir, 'new_file')
@@ -322,7 +329,7 @@ def test_create_write_read_compressed(self):
path = self.fs._parse_url(url)
expected_file = FakeFile(path, 'wb')
self.assertEqual(self._fake_hdfs.files[path], expected_file)
- data = 'abc' * 10
+ data = b'abc' * 10
handle.write(data)
# Compressed data != original data
self.assertNotEquals(data, self._fake_hdfs.files[path].getvalue())
@@ -336,7 +343,7 @@ def test_create_write_read_compressed(self):
def test_open(self):
url = self.fs.join(self.tmpdir, 'old_file1')
handle = self.fs.open(url)
- expected_data = ''
+ expected_data = b''
data = handle.read()
self.assertEqual(data, expected_data)
@@ -356,7 +363,7 @@ def test_copy_file(self):
url2 = self.fs.join(self.tmpdir, 'new_file2')
url3 = self.fs.join(self.tmpdir, 'new_file3')
with self.fs.create(url1) as f1:
- f1.write('Hello')
+ f1.write(b'Hello')
self.fs.copy([url1, url1], [url2, url3])
self.assertTrue(self._cmpfiles(url1, url2))
self.assertTrue(self._cmpfiles(url1, url3))
@@ -365,9 +372,9 @@ def test_copy_file_overwrite_error(self):
url1 = self.fs.join(self.tmpdir, 'new_file1')
url2 = self.fs.join(self.tmpdir, 'new_file2')
with self.fs.create(url1) as f1:
- f1.write('Hello')
+ f1.write(b'Hello')
with self.fs.create(url2) as f2:
- f2.write('nope')
+ f2.write(b'nope')
with self.assertRaisesRegexp(
BeamIOError, r'already exists.*%s' % posixpath.basename(url2)):
self.fs.copy([url1], [url2])
@@ -378,7 +385,7 @@ def test_copy_file_error(self):
url3 = self.fs.join(self.tmpdir, 'new_file3')
url4 = self.fs.join(self.tmpdir, 'new_file4')
with self.fs.create(url3) as f:
- f.write('Hello')
+ f.write(b'Hello')
with self.assertRaisesRegexp(
BeamIOError, r'^Copy operation failed .*%s.*%s.* not found' % (
url1, url2)):
@@ -397,7 +404,7 @@ def test_copy_directory(self):
url1 = self.fs.join(url_t1_inner, 'f1')
url2 = self.fs.join(url_t2_inner, 'f1')
with self.fs.create(url1) as f:
- f.write('Hello')
+ f.write(b'Hello')
self.fs.copy([url_t1], [url_t2])
self.assertTrue(self._cmpfiles(url1, url2))
@@ -419,9 +426,9 @@ def test_copy_directory_overwrite_error(self):
url3_inner = self.fs.join(url_t2_inner, 'f3')
for url in [url1, url1_inner, url3_inner]:
with self.fs.create(url) as f:
- f.write('Hello')
+ f.write(b'Hello')
with self.fs.create(url2) as f:
- f.write('nope')
+ f.write(b'nope')
with self.assertRaisesRegexp(BeamIOError, r'already exists'):
self.fs.copy([url_t1], [url_t2])
@@ -430,7 +437,7 @@ def test_rename_file(self):
url1 = self.fs.join(self.tmpdir, 'f1')
url2 = self.fs.join(self.tmpdir, 'f2')
with self.fs.create(url1) as f:
- f.write('Hello')
+ f.write(b'Hello')
self.fs.rename([url1], [url2])
self.assertFalse(self.fs.exists(url1))
@@ -442,7 +449,7 @@ def test_rename_file_error(self):
url3 = self.fs.join(self.tmpdir, 'f3')
url4 = self.fs.join(self.tmpdir, 'f4')
with self.fs.create(url3) as f:
- f.write('Hello')
+ f.write(b'Hello')
with self.assertRaisesRegexp(
BeamIOError, r'^Rename operation failed .*%s.*%s' % (url1, url2)):
@@ -457,7 +464,7 @@ def test_rename_directory(self):
url1 = self.fs.join(url_t1, 'f1')
url2 = self.fs.join(url_t2, 'f1')
with self.fs.create(url1) as f:
- f.write('Hello')
+ f.write(b'Hello')
self.fs.rename([url_t1], [url_t2])
self.assertFalse(self.fs.exists(url_t1))
@@ -474,13 +481,13 @@ def test_exists(self):
def test_size(self):
url = self.fs.join(self.tmpdir, 'f1')
with self.fs.create(url) as f:
- f.write('Hello')
+ f.write(b'Hello')
self.assertEqual(5, self.fs.size(url))
def test_checksum(self):
url = self.fs.join(self.tmpdir, 'f1')
with self.fs.create(url) as f:
- f.write('Hello')
+ f.write(b'Hello')
self.assertEqual('fake_algo-5-checksum_byte_sequence',
self.fs.checksum(url))
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 152439)
Time Spent: 4h (was: 3h 50m)
> Several IO tests fail in Python 3 with RuntimeError('dictionary changed size
> during iteration',)}
> -------------------------------------------------------------------------------------------------
>
> Key: BEAM-5626
> URL: https://issues.apache.org/jira/browse/BEAM-5626
> Project: Beam
> Issue Type: Sub-task
> Components: sdk-py-core
> Reporter: Valentyn Tymofieiev
> Assignee: Ruoyun Huang
> Priority: Major
> Time Spent: 4h
> Remaining Estimate: 0h
>
> ERROR: test_delete_dir
> (apache_beam.io.hadoopfilesystem_test.HadoopFileSystemTest)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
> File
> "/usr/local/google/home/valentyn/projects/beam/clean_head/beam/sdks/python/apache_beam/io/hadoopfilesystem_test.py",
> line 506, in test_delete_dir
> self.fs.delete([url_t1])
> File
> "/usr/local/google/home/valentyn/projects/beam/clean_head/beam/sdks/python/apache_beam/io/hadoopfilesystem.py",
> line 370, in delete
> raise BeamIOError("Delete operation failed", exceptions)
> apache_beam.io.filesystem.BeamIOError: Delete operation failed with
> exceptions {'hdfs://test_dir/new_dir1': RuntimeError('dictionary changed size
> during iteration', )}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)