Mpaa has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/339317 )

Change subject: tools: make general function to compute file sha
......................................................................

tools: make general function to compute file sha

It can be reused in several places:
- site.upload()
- Filepage.download() [if/when it will be merged]

Change-Id: I756c4d127274f7f6031920127850f30de3964597
---
M pywikibot/site.py
M pywikibot/tools/__init__.py
M tests/tools_tests.py
3 files changed, 89 insertions(+), 10 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/17/339317/1

diff --git a/pywikibot/site.py b/pywikibot/site.py
index bdf1fb4..52e6927 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -18,7 +18,6 @@
 import copy
 import datetime
 import functools
-import hashlib
 import heapq
 import itertools
 import json
@@ -68,6 +67,7 @@
 from pywikibot.family import WikimediaFamily
 from pywikibot.throttle import Throttle
 from pywikibot.tools import (
+    compute_file_hash,
     itergroup, UnicodeMixin, ComparableMixin, SelfCallMixin, SelfCallString,
     deprecated, deprecate_arg, deprecated_args, remove_last_args,
     redirect_func, issue_deprecation_warning,
@@ -6027,15 +6027,7 @@
                 # The SHA1 was also requested so calculate and compare it
                 assert 'sha1' in stash_info, \
                     'sha1 not in stash info: {0}'.format(stash_info)
-                sha1 = hashlib.sha1()
-                bytes_to_read = offset
-                with open(source_filename, 'rb') as f:
-                    while bytes_to_read > 0:
-                        read_bytes = f.read(min(bytes_to_read, 1 << 20))
-                        assert read_bytes  # make sure we actually read bytes
-                        bytes_to_read -= len(read_bytes)
-                        sha1.update(read_bytes)
-                sha1 = sha1.hexdigest()
+                sha1 = compute_file_hash(source_filename, bytes_to_read=offset)
                 if sha1 != stash_info['sha1']:
                     raise ValueError(
                         'The SHA1 of {0} bytes of the stashed "{1}" is {2} '
diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py
index 526fd93..261f5ac 100644
--- a/pywikibot/tools/__init__.py
+++ b/pywikibot/tools/__init__.py
@@ -10,6 +10,7 @@
 
 import collections
 import gzip
+import hashlib
 import inspect
 import itertools
 import os
@@ -1714,3 +1715,41 @@
         # re-read and check changes
         if os.stat(filename).st_mode != st_mode:
             warn(warn_str.format(filename, st_mode - stat.S_IFREG, mode))
+
+
+def compute_file_hash(filename, sha='sha1', bytes_to_read=None):
+    """Compute file hash.
+
+    Result is expressed as hexdigest().
+
+    @param filename: filename path
+    @type filename: basestring
+
+    @param func: hashing function among the following in hashlib:
+        md5(), sha1(), sha224(), sha256(), sha384(), and sha512()
+        function name shall be passed as string, e.g. 'sha1'.
+    @type filename: basestring
+
+    @param bytes_to_read: only the first bytes_to_read will be considered;
+        if file size is smaller, the whole file will be considered.
+    @type bytes_to_read: None or int
+
+    """
+    size = os.path.getsize(filename)
+    if bytes_to_read is None:
+        bytes_to_read = size
+    else:
+        bytes_to_read = min(bytes_to_read, size)
+    step = 1 << 20
+
+    shas = ['md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512']
+    assert sha in shas
+    sha = getattr(hashlib, sha)()  # sha instance
+
+    with open(filename, 'rb') as f:
+        while bytes_to_read > 0:
+            read_bytes = f.read(min(bytes_to_read, step))
+            assert read_bytes  # make sure we actually read bytes
+            bytes_to_read -= len(read_bytes)
+            sha.update(read_bytes)
+    return sha.hexdigest()
diff --git a/tests/tools_tests.py b/tests/tools_tests.py
index 6e97772..c450966 100644
--- a/tests/tools_tests.py
+++ b/tests/tools_tests.py
@@ -754,6 +754,54 @@
         self.chmod.assert_called_once_with(self.file, 0o600)
 
 
+class TestFileShaCalculator(TestCase):
+
+    """Test calculator of sha of a file."""
+
+    net = False
+
+    filename = join_xml_data_path('article-pear-0.10.xml')
+
+    def setUp(self):
+        """Setup tests."""
+        super(TestFileShaCalculator, self).setUp()
+
+    def test_md5_complete_calculation(self):
+        """"Test md5 of complete file."""
+        res = tools.compute_file_hash(self.filename, sha='md5')
+        self.assertEqual(res, '5d7265e290e6733e1e2020630262a6f3')
+
+    def test_md5_partial_calculation(self):
+        """"Test md5 of partial file (1024 bytes)."""
+        res = tools.compute_file_hash(self.filename, sha='md5',
+                                      bytes_to_read=1024)
+        self.assertEqual(res, 'edf6e1accead082b6b831a0a600704bc')
+
+    def test_sha1_complete_calculation(self):
+        """"Test sha1 of complete file."""
+        res = tools.compute_file_hash(self.filename, sha='sha1')
+        self.assertEqual(res, '1c12696e1119493a625aa818a35c41916ce32d0c')
+
+    def test_sha1_partial_calculation(self):
+        """"Test sha1 of partial file (1024 bytes)."""
+        res = tools.compute_file_hash(self.filename, sha='sha1',
+                                      bytes_to_read=1024)
+        self.assertEqual(res, 'e56fa7bd5cfdf6bb7e2d8649dd9216c03e7271e6')
+
+    def test_sha224_complete_calculation(self):
+        """"Test sha224 of complete file."""
+        res = tools.compute_file_hash(self.filename, sha='sha224')
+        self.assertEqual(
+            res, '3d350d9d9eca074bd299cb5ffe1b325a9f589b2bcd7ba1c033ab4d33')
+
+    def test_sha224_partial_calculation(self):
+        """"Test sha224 of partial file (1024 bytes)."""
+        res = tools.compute_file_hash(self.filename, sha='sha224',
+                                      bytes_to_read=1024)
+        self.assertEqual(
+            res, 'affa8cb79656a9b6244a079f8af91c9271e382aa9d5aa412b599e169')
+
+
 class Foo(object):
 
     """Test class to verify classproperty decorator."""

-- 
To view, visit https://gerrit.wikimedia.org/r/339317
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I756c4d127274f7f6031920127850f30de3964597
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Mpaa <mpaa.w...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to