XZise has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/209989

Change subject: [FEAT] open_compressed: Read magic number
......................................................................

[FEAT] open_compressed: Read magic number

Instead of relying on the filename it's reading the magic number (up to
the first 8 bytes) and deciding the strategy with that.

Change-Id: I5a8a66877e779eac5ea7de2b497f87cd75feb3a1
---
M pywikibot/tools/__init__.py
M tests/tools_tests.py
2 files changed, 24 insertions(+), 7 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/89/209989/1

diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py
index 41714aa..09e5bc3 100644
--- a/pywikibot/tools/__init__.py
+++ b/pywikibot/tools/__init__.py
@@ -702,7 +702,7 @@
         setattr(self._wrapped, name, value)
 
 
-def open_compressed(filename):
+def open_compressed(filename, use_extension=False):
     """
     Open a file and uncompress it if needed.
 
@@ -731,11 +731,27 @@
         else:
             return wrapped
 
-    if filename.endswith('.bz2'):
-        return wrap(bz2.BZ2File(filename))
-    elif filename.endswith('.gz'):
-        return wrap(gzip.open(filename))
-    elif filename.endswith('.7z'):
+    if use_extension:
+        # if '.' not in filename, it'll be 1 character long but otherwise
+        # contain the period
+        extension = filename[filename.rfind('.'):][1:]
+    else:
+        with open(filename, 'rb') as f:
+            magic_number = f.read(8)
+        if magic_number.startswith(b'BZh'):
+            extension = 'bz2'
+        elif magic_number.startswith(b'\x1F\x8B\x08'):
+            extension = 'gz'
+        elif magic_number.startswith(b"7z\xBC\xAF'\x1C"):
+            extension = '7z'
+        else:
+            extension = ''
+
+    if extension == 'bz2':
+        return ContextManagerWrapper.create(bz2.BZ2File(filename))
+    elif extension == 'gz':
+        return ContextManagerWrapper.create(gzip.open(filename))
+    elif extension == '7z':
         try:
             process = subprocess.Popen(['7za', 'e', '-bd', '-so', filename],
                                        stdout=subprocess.PIPE,
diff --git a/tests/tools_tests.py b/tests/tools_tests.py
index c143a70..b91cd2f 100644
--- a/tests/tools_tests.py
+++ b/tests/tools_tests.py
@@ -86,6 +86,7 @@
         self.assertEqual(self._get_content(self.base_file), 
self.original_content)
         self.assertEqual(self._get_content(self.base_file + '.bz2'), 
self.original_content)
         self.assertEqual(self._get_content(self.base_file + '.gz'), 
self.original_content)
+        self.assertEqual(self._get_content(self.base_file + '.bz2', True), 
self.original_content)
 
     def test_open_compressed_7z(self):
         """Test open_compressed with 7za if installed."""
@@ -94,7 +95,7 @@
         except OSError:
             raise unittest.SkipTest('7za not installed')
         self.assertEqual(self._get_content(self.base_file + '.7z'), 
self.original_content)
-        self.assertRaises(OSError, self._get_content, self.base_file + 
'_invalid.7z')
+        self.assertRaises(OSError, self._get_content, self.base_file + 
'_invalid.7z', True)
 
 
 if __name__ == '__main__':

-- 
To view, visit https://gerrit.wikimedia.org/r/209989
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I5a8a66877e779eac5ea7de2b497f87cd75feb3a1
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to