http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97245
Revision: 97245 Author: ariel Date: 2011-09-16 07:40:24 +0000 (Fri, 16 Sep 2011) Log Message: ----------- redo checkpoint file for history 7z step Modified Paths: -------------- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py =================================================================== --- branches/ariel/xmldumps-backup/worker.py 2011-09-16 06:51:27 UTC (rev 97244) +++ branches/ariel/xmldumps-backup/worker.py 2011-09-16 07:40:24 UTC (rev 97245) @@ -609,7 +609,7 @@ "metahistory7zdump", "All pages with complete edit history (.7z)", "These dumps can be *very* large, uncompressing up to 100 times the archive download size. " + - "Suitable for archival and statistical use, most mirror sites won't want or need this.", self.findItemByName('metahistorybz2dump'), self.wiki, self._getChunkToDo("metahistory7zdump"), self.chunkInfo.getPagesPerChunkHistory(), self.checkpointFile)) + "Suitable for archival and statistical use, most mirror sites won't want or need this.", self.findItemByName('metahistorybz2dump'), self.wiki, self._getChunkToDo("metahistory7zdump"), self.chunkInfo.getPagesPerChunkHistory(), checkpoints, self.checkpointFile)) if (self.chunkInfo.chunksEnabled() and self.chunkInfo.recombineHistory()): self.dumpItems.append( RecombineXmlRecompressDump("metahistory7zdumprecombine", @@ -1814,9 +1814,7 @@ # of that very file. meh. how likely is it that we # have one? these files are time based and the start/end pageids # are going to fluctuate. whatever - cf = DumpFilename(self.wiki) - cf.newFromFilename(item.checkpointFile) - checkpoint = cf.checkpoint + checkpoint = item.checkpointFile.checkpoint for d in dumpNames: self.symLinks.removeSymLinksFromOldRuns(self.wiki.date, d, chunk, checkpoint ) @@ -3135,7 +3133,7 @@ class XmlRecompressDump(Dump): """Take a .bz2 and recompress it as 7-Zip.""" - def __init__(self, subset, name, desc, detail, itemForRecompression, wiki, chunkToDo, chunks = False, checkpoints = False): + def __init__(self, subset, name, desc, detail, itemForRecompression, wiki, chunkToDo, chunks = False, checkpoints = False, checkpointFile = None): self._subset = subset self._detail = detail self._chunks = chunks @@ -3146,6 +3144,7 @@ self.itemForRecompression = itemForRecompression if checkpoints: self._checkpointsEnabled = True + self.checkpointFile = checkpointFile Dump.__init__(self, name, desc) def getDumpName(self): @@ -3182,7 +3181,11 @@ commands = [] # Remove prior 7zip attempts; 7zip will try to append to an existing archive self.cleanupOldFiles(runner.dumpDir) - if self._chunksEnabled and not self._chunkToDo: + if self.checkpointFile: + outputFile = DumpFilename(self.wiki, None, self.checkpointFile.dumpName, self.checkpointFile.fileType, self.fileExt, self.checkpointFile.chunk, self.checkpointFile.checkpoint) + series = self.buildCommand(runner, [ outputFile ]) + commands.append(series) + elif self._chunksEnabled and not self._chunkToDo: # must set up each parallel job separately, they may have checkpoint files that # need to be processed in series, it's a special case for i in range(1, len(self._chunks)+1): _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs