XZise has uploaded a new change for review.
https://gerrit.wikimedia.org/r/209989
Change subject: [FEAT] open_compressed: Read magic number
......................................................................
[FEAT] open_compressed: Read magic number
Instead of relying on the filename it's reading the magic number (up to
the first 8 bytes) and deciding the strategy with that.
Change-Id: I5a8a66877e779eac5ea7de2b497f87cd75feb3a1
---
M pywikibot/tools/__init__.py
M tests/tools_tests.py
2 files changed, 24 insertions(+), 7 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core
refs/changes/89/209989/1
diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py
index 41714aa..09e5bc3 100644
--- a/pywikibot/tools/__init__.py
+++ b/pywikibot/tools/__init__.py
@@ -702,7 +702,7 @@
setattr(self._wrapped, name, value)
-def open_compressed(filename):
+def open_compressed(filename, use_extension=False):
"""
Open a file and uncompress it if needed.
@@ -731,11 +731,27 @@
else:
return wrapped
- if filename.endswith('.bz2'):
- return wrap(bz2.BZ2File(filename))
- elif filename.endswith('.gz'):
- return wrap(gzip.open(filename))
- elif filename.endswith('.7z'):
+ if use_extension:
+ # if '.' not in filename, it'll be 1 character long but otherwise
+ # contain the period
+ extension = filename[filename.rfind('.'):][1:]
+ else:
+ with open(filename, 'rb') as f:
+ magic_number = f.read(8)
+ if magic_number.startswith(b'BZh'):
+ extension = 'bz2'
+ elif magic_number.startswith(b'\x1F\x8B\x08'):
+ extension = 'gz'
+ elif magic_number.startswith(b"7z\xBC\xAF'\x1C"):
+ extension = '7z'
+ else:
+ extension = ''
+
+ if extension == 'bz2':
+ return ContextManagerWrapper.create(bz2.BZ2File(filename))
+ elif extension == 'gz':
+ return ContextManagerWrapper.create(gzip.open(filename))
+ elif extension == '7z':
try:
process = subprocess.Popen(['7za', 'e', '-bd', '-so', filename],
stdout=subprocess.PIPE,
diff --git a/tests/tools_tests.py b/tests/tools_tests.py
index c143a70..b91cd2f 100644
--- a/tests/tools_tests.py
+++ b/tests/tools_tests.py
@@ -86,6 +86,7 @@
self.assertEqual(self._get_content(self.base_file),
self.original_content)
self.assertEqual(self._get_content(self.base_file + '.bz2'),
self.original_content)
self.assertEqual(self._get_content(self.base_file + '.gz'),
self.original_content)
+ self.assertEqual(self._get_content(self.base_file + '.bz2', True),
self.original_content)
def test_open_compressed_7z(self):
"""Test open_compressed with 7za if installed."""
@@ -94,7 +95,7 @@
except OSError:
raise unittest.SkipTest('7za not installed')
self.assertEqual(self._get_content(self.base_file + '.7z'),
self.original_content)
- self.assertRaises(OSError, self._get_content, self.base_file +
'_invalid.7z')
+ self.assertRaises(OSError, self._get_content, self.base_file +
'_invalid.7z', True)
if __name__ == '__main__':
--
To view, visit https://gerrit.wikimedia.org/r/209989
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I5a8a66877e779eac5ea7de2b497f87cd75feb3a1
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits