[MediaWiki-commits] [Gerrit] pywikibot/core[master]: Made download_dump.py download process atomic

2017-12-28 Thread Ryan10145 (Code Review)
Ryan10145 has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/400616 )

Change subject: Made download_dump.py download process atomic
..

Made download_dump.py download process atomic

Bug: T183675
Change-Id: I142629bb89ffc1c810adcf8f1417ecc824594e41
---
M scripts/maintenance/download_dump.py
1 file changed, 34 insertions(+), 9 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/16/400616/1

diff --git a/scripts/maintenance/download_dump.py 
b/scripts/maintenance/download_dump.py
index d3a5123..ee6ee23 100644
--- a/scripts/maintenance/download_dump.py
+++ b/scripts/maintenance/download_dump.py
@@ -18,10 +18,26 @@
 #
 from __future__ import absolute_import, division, unicode_literals
 
+from datetime import datetime
+
 import os.path
 import sys
 
-from os import remove, symlink
+from os import remove, symlink, fsync
+
+try:
+from os import replace
+except ImportError:   # py2
+if sys.platform == 'win32':
+import os
+def replace(src, dst):
+try:
+os.rename(src, dst)
+except OSError:
+os.remove(dst)
+os.rename(src, dst)
+else:
+from os import rename as replace
 
 import pywikibot
 
@@ -63,7 +79,12 @@
 
 download_filename = self.getOption('wikiname') + \
 '-latest-' + self.getOption('filename')
-file_storepath = os.path.join(
+store_filename = download_filename + '-' + \
+str(datetime.now().strftime('%Y-%m-%d-%H-%M-%S')) + '.part'
+
+file_temp_storepath = os.path.join(
+self.getOption('storepath'), store_filename)
+file_final_storepath = os.path.join(
 self.getOption('storepath'), download_filename)
 
 # https://wikitech.wikimedia.org/wiki/Help:Toolforge#Dumps
@@ -71,28 +92,32 @@
 self.getOption('wikiname'), self.getOption('filename'))
 if toolforge_dump_filepath:
 pywikibot.output('Symlinking file from ' + toolforge_dump_filepath)
-if os.path.exists(file_storepath):
-remove(file_storepath)
+if os.path.exists(file_temp_storepath):
+remove(file_temp_storepath)
 
-symlink(toolforge_dump_filepath, file_storepath)
+symlink(toolforge_dump_filepath, file_temp_storepath)
 else:
 url = 'https://dumps.wikimedia.org/' + \
-os.path.join(self.getOption('wikiname'),
- 'latest', download_filename)
+self.getOption('wikiname') + '/latest/' + download_filename
 pywikibot.output('Downloading file from ' + url)
 response = fetch(url, stream=True)
 if response.status == 200:
 try:
-with open(file_storepath, 'wb') as result_file:
+with open(file_temp_storepath, 'wb') as result_file:
 for chunk in response.data.iter_content(100 * 1024):
 result_file.write(chunk)
+
+result_file.flush()
+fsync(result_file.fileno())
 except IOError:
 pywikibot.exception()
 return False
 else:
 return
 
-pywikibot.output('Done! File stored as ' + file_storepath)
+replace(file_temp_storepath, file_final_storepath)
+
+pywikibot.output('Done! File stored as ' + file_final_storepath)
 return
 
 

-- 
To view, visit https://gerrit.wikimedia.org/r/400616
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I142629bb89ffc1c810adcf8f1417ecc824594e41
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Ryan10145 

___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits


[MediaWiki-commits] [Gerrit] pywikibot/core[master]: Made download_dump.py download process atomic

2017-12-30 Thread jenkins-bot (Code Review)
jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/400616 )

Change subject: Made download_dump.py download process atomic
..


Made download_dump.py download process atomic

Bug: T183675
Change-Id: I142629bb89ffc1c810adcf8f1417ecc824594e41
---
M scripts/maintenance/download_dump.py
1 file changed, 66 insertions(+), 22 deletions(-)

Approvals:
  Zhuyifei1999: Looks good to me, but someone else must approve
  Framawiki: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/scripts/maintenance/download_dump.py 
b/scripts/maintenance/download_dump.py
index d3a5123..c55940b 100644
--- a/scripts/maintenance/download_dump.py
+++ b/scripts/maintenance/download_dump.py
@@ -18,10 +18,27 @@
 #
 from __future__ import absolute_import, division, unicode_literals
 
+import binascii
+
 import os.path
 import sys
 
-from os import remove, symlink
+from os import remove, symlink, urandom
+
+try:
+from os import replace
+except ImportError:   # py2
+if sys.platform == 'win32':
+import os
+
+def replace(src, dst):
+try:
+os.rename(src, dst)
+except OSError:
+remove(dst)
+os.rename(src, dst)
+else:
+from os import rename as replace
 
 import pywikibot
 
@@ -63,36 +80,63 @@
 
 download_filename = self.getOption('wikiname') + \
 '-latest-' + self.getOption('filename')
-file_storepath = os.path.join(
+temp_filename = download_filename + '-' + \
+binascii.b2a_hex(urandom(8)).decode('ascii') + '.part'
+
+file_final_storepath = os.path.join(
 self.getOption('storepath'), download_filename)
+file_current_storepath = os.path.join(
+self.getOption('storepath'), temp_filename)
 
 # https://wikitech.wikimedia.org/wiki/Help:Toolforge#Dumps
 toolforge_dump_filepath = self.get_dump_name(
 self.getOption('wikiname'), self.getOption('filename'))
-if toolforge_dump_filepath:
-pywikibot.output('Symlinking file from ' + toolforge_dump_filepath)
-if os.path.exists(file_storepath):
-remove(file_storepath)
 
-symlink(toolforge_dump_filepath, file_storepath)
-else:
-url = 'https://dumps.wikimedia.org/' + \
-os.path.join(self.getOption('wikiname'),
- 'latest', download_filename)
-pywikibot.output('Downloading file from ' + url)
-response = fetch(url, stream=True)
-if response.status == 200:
+# First iteration for atomic download with temporary file
+# Second iteration for fallback non-atomic download
+for non_atomic in range(2):
+try:
+if toolforge_dump_filepath:
+pywikibot.output('Symlinking file from ' +
+ toolforge_dump_filepath)
+if non_atomic:
+if os.path.exists(file_final_storepath):
+remove(file_final_storepath)
+symlink(toolforge_dump_filepath, file_current_storepath)
+else:
+url = 'https://dumps.wikimedia.org/{0}/latest/{1}'.format(
+self.getOption('wikiname'), download_filename)
+pywikibot.output('Downloading file from ' + url)
+response = fetch(url, stream=True)
+if response.status == 200:
+with open(file_current_storepath, 'wb') as result_file:
+for data in response.data.iter_content(100 * 1024):
+result_file.write(data)
+else:
+return
+# Rename the temporary file to the target file
+# if the download completes successfully
+if not non_atomic:
+replace(file_current_storepath, file_final_storepath)
+break
+except (OSError, IOError):
+pywikibot.exception()
+
 try:
-with open(file_storepath, 'wb') as result_file:
-for chunk in response.data.iter_content(100 * 1024):
-result_file.write(chunk)
-except IOError:
+remove(file_current_storepath)
+except (OSError, IOError):
 pywikibot.exception()
-return False
-else:
-return
 
-pywikibot.output('Done! File stored as ' + file_storepath)
+# If the atomic download fails, try without a temporary file
+# If the non-atomic download also fails, exit the script
+if not non_atomic:
+