http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92610

Revision: 92610
Author:   ariel
Date:     2011-07-20 07:23:56 +0000 (Wed, 20 Jul 2011)
Log Message:
-----------
add a few more enabled flags, get rid of a few more checks for dryrun etc

Modified Paths:
--------------
    branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===================================================================
--- branches/ariel/xmldumps-backup/worker.py    2011-07-20 04:28:48 UTC (rev 
92609)
+++ branches/ariel/xmldumps-backup/worker.py    2011-07-20 07:23:56 UTC (rev 
92610)
@@ -1021,7 +1021,6 @@
                return os.path.join(self.wiki.publicDir(), self.date);
 
 class Runner(object):
-
        def __init__(self, wiki, date=None, prefetch=True, spawn=True, 
job=None, restart=False, notice="", dryrun = False, loggingEnabled=False, 
chunkToDo = False):
                self.wiki = wiki
                self.dbName = wiki.dbName
@@ -1029,17 +1028,22 @@
                self.spawn = spawn
                self.chunkInfo = Chunk(wiki, self.dbName, self.logAndPrint)
                self.restart = restart
-               self.loggingEnabled = loggingEnabled
                self.htmlNoticeFile = None
                self.log = None
                self.dryrun = dryrun
                self._chunkToDo = chunkToDo
+
+               self._loggingEnabled = loggingEnabled
                self._statusEnabled = True
                self._checksummerEnabled = True
                self._runInfoFileEnabled = True
                self._symLinksEnabled = True
                self._feedsEnabled = True
                self._noticeFileEnabled = True
+               self._makeDirEnabled = True
+               self._cleanOldDumpsEnabled = True
+               self._cleanupOldFilesEnabled = False
+               self._checkForTruncatedFilesEnabled = True
 
                if self.dryrun or self._chunkToDo:
                        self._statusEnabled = False
@@ -1048,8 +1052,13 @@
                        self._symLinksEnabled = False
                        self._feedsEnabled = False
                        self._noticeFileEnabled = False
+                       self._makeDirEnabled = False
+                       self._cleanOldDumpsEnabled = False
+                       self._cleanupOldFilesEnables = False
+
                if self.dryrun:
-                       self.loggingEnabled = False
+                       self._loggingEnabled = False
+                       self._checkForTruncatedFilesEnabled = False
 
                if date:
                        # Override, continuing a past dump?
@@ -1065,7 +1074,7 @@
                self.lastFailed = False
 
                # these must come after the dumpdir setup so we know which 
directory we are in 
-               if (loggingEnabled):
+               if (self._loggingEnabled and self._makeDirEnabled):
                        self.logFileName = 
self.dumpDir.publicPath(self.wiki.config.logFile)
                        self.makeDir(join(self.wiki.publicDir(), self.date))
                        self.log = Logger(self.logFileName)
@@ -1088,7 +1097,7 @@
                        done = log.doJobOnLogQueue()
                
        def logAndPrint(self, message):
-               if hasattr(self,'log') and self.log and not self.dryrun:
+               if hasattr(self,'log') and self.log and self._loggingEnabled:
                        self.log.addToLogQueue("%s\n" % message)
                print message
 
@@ -1098,9 +1107,8 @@
                else:
                        return ""
 
-       def remove(self, filename):
-               if not self.dryrun:
-                       os.remove(filename)
+       def removeFile(self, filename):
+               os.remove(filename)
 
        # returns 0 on success, 1 on error
        def saveTable(self, table, outfile):
@@ -1224,9 +1232,8 @@
                                # mark all the following jobs to run as well 
                                self.dumpItemList.markFollowingJobsToRun()
 
-               if not self.dryrun:
-                       self.makeDir(join(self.wiki.publicDir(), self.date))
-                       self.makeDir(join(self.wiki.privateDir(), self.date))
+               self.makeDir(join(self.wiki.publicDir(), self.date))
+               self.makeDir(join(self.wiki.privateDir(), self.date))
 
                if (self.restart):
                        self.logAndPrint("Preparing for restart from job %s of 
%s" % (self.jobRequested, self.dbName))
@@ -1250,12 +1257,12 @@
                                        except Exception, ex:
                                                self.debug("*** exception! " + 
str(ex))
                                                item.setStatus("failed")
-                                       if item.status() == "failed" and not 
self.dryrun and not self._chunkToDo:
+                                       if item.status() == "failed":
                                                self.runHandleFailure()
                                        else:
                                                self.lastFailed = False
                                # this ensures that, previous run or new one, 
the old or new md5sums go to the file
-                               if item.status() == "done" and not self.dryrun 
and not self._chunkToDo:
+                               if item.status() == "done":
                                        self.runUpdateItemFileInfo(item)
 
                        if (self.dumpItemList.allPossibleJobsDone()):
@@ -1263,10 +1270,9 @@
                        else:
                                self.status.updateStatusFiles("partialdone")
                        
self.runInfoFile.saveDumpRunInfoFile(self.dumpItemList.reportDumpRunInfo())
-                       if not self.dryrun and not self._chunkToDo:
-                               # if any job succeeds we might as well make the 
sym link
-                               if (self.status.failCount < 1):
-                                       self.completeDump()
+                       # if any job succeeds we might as well make the sym link
+                       if (self.status.failCount < 1):
+                               self.completeDump()
                                                                                
        
                        if (self.restart):
                                self.showRunnerState("Completed run restarting 
from job %s for %s" % (self.jobRequested, self.dbName))
@@ -1285,40 +1291,38 @@
                                except Exception, ex:
                                        self.debug("*** exception! " + str(ex))
                                        item.setStatus("failed")
-                               if item.status() == "failed" and not 
self.dryrun and not self._chunkToDo:
+                               if item.status() == "failed":
                                        self.runHandleFailure()
                                else:
-                                       if not self.dryrun and not 
self._chunkToDo:
-                                               self.runUpdateItemFileInfo(item)
-                                               
self.checksums.cpMd5TmpFileToPermFile()
+                                       self.runUpdateItemFileInfo(item)
+                                       self.checksums.cpMd5TmpFileToPermFile()
                                        self.lastFailed = False
 
                        self.status.updateStatusFiles("done")
-                       if not self.dryrun and not self._chunkToDo:
-                               
self.runInfoFile.saveDumpRunInfoFile(self.dumpItemList.reportDumpRunInfo())
-                               if self.status.failCount < 1:
-                                       self.completeDump()
+                       
self.runInfoFile.saveDumpRunInfoFile(self.dumpItemList.reportDumpRunInfo())
+                       if self.status.failCount < 1:
+                               self.completeDump()
                                                                                
        
                        self.showRunnerStateComplete()
 
        def cleanOldDumps(self):
-               old = self.wiki.dumpDirs()
-               if old:
-                       if old[-1] == self.date:
-                               # If we're re-running today's (or jobs from a 
given day's) dump, don't count it as one
-                               # of the old dumps to keep... or delete it 
halfway through!
-                               old = old[:-1]
-                       if self.wiki.config.keep > 0:
-                               # Keep the last few
-                               old = old[:-(self.wiki.config.keep)]
-               if old:
-                       for dump in old:
-                               self.showRunnerState("Purging old dump %s for 
%s" % (dump, self.dbName))
-                               if not self.dryrun and not self._chunkToDo:
+               if self._cleanOldDumpsEnabled:
+                       old = self.wiki.dumpDirs()
+                       if old:
+                               if old[-1] == self.date:
+                                       # If we're re-running today's (or jobs 
from a given day's) dump, don't count it as one
+                                       # of the old dumps to keep... or delete 
it halfway through!
+                                       old = old[:-1]
+                               if self.wiki.config.keep > 0:
+                                       # Keep the last few
+                                       old = old[:-(self.wiki.config.keep)]
+                       if old:
+                               for dump in old:
+                                       self.showRunnerState("Purging old dump 
%s for %s" % (dump, self.dbName))
                                        base = 
os.path.join(self.wiki.publicDir(), dump)
                                        shutil.rmtree("%s" % base)
-               else:
-                       self.showRunnerState("No old dumps to purge.")
+                       else:
+                               self.showRunnerState("No old dumps to purge.")
 
        def showRunnerState(self, message):
                self.debug(message)
@@ -1335,11 +1339,12 @@
                
self.symLinks.saveSymlink(self.checksums.getChecksumFileNameBasename())
 
        def makeDir(self, dir):
-               if exists(dir):
-                       self.debug("Checkdir dir %s ..." % dir)
-               else:
-                       self.debug("Creating %s ..." % dir)
-                       os.makedirs(dir)
+               if self._makeDirEnabled:
+                       if exists(dir):
+                               self.debug("Checkdir dir %s ..." % dir)
+                       else:
+                               self.debug("Creating %s ..." % dir)
+                               os.makedirs(dir)
 
 class SymLinks(object):
        def __init__(self, wiki, dumpDir, date, logfn, debugfn, enabled):
@@ -1351,11 +1356,12 @@
                self.debugfn = debugfn
 
        def makeDir(self, dir):
-               if exists(dir):
-                       self.debugfn("Checkdir dir %s ..." % dir)
-               else:
-                       self.debugfn("Creating %s ..." % dir)
-                       os.makedirs(dir)
+               if (self._enabled):
+                       if exists(dir):
+                               self.debugfn("Checkdir dir %s ..." % dir)
+                       else:
+                               self.debugfn("Creating %s ..." % dir)
+                               os.makedirs(dir)
 
        def saveSymlink(self, file):
                if (self._enabled):
@@ -1376,7 +1382,7 @@
                                        # no file or it's older than ours... 
*then* remove the link
                                        if not exists(os.path.realpath(link)) 
or dateinterval > 0:
                                                self.debug("Removing old 
symlink %s" % link)
-                                               os.remove(link)
+                                               runner.removeFile(link)
                                else:
                                        self.logfn("What the hell dude, %s is 
not a symlink" % link)
                                        raise BackupError("What the hell dude, 
%s is not a symlink" % link)
@@ -1395,30 +1401,31 @@
                self._enabled = enabled
 
        def makeDir(self, dir):
-               if exists(dir):
-                       self.debugfn("Checkdir dir %s ..." % dir)
-               else:
-                       self.debugfn("Creating %s ..." % dir)
-                       os.makedirs(dir)
+               if (self._enabled):
+                       if exists(dir):
+                               self.debugfn("Checkdir dir %s ..." % dir)
+                       else:
+                               self.debugfn("Creating %s ..." % dir)
+                               os.makedirs(dir)
 
        def saveFeed(self, file):
-               self.makeDir(join(self.wiki.publicDir(), 'latest'))
-               filePath = self.dumpDir.webPath(file)
-               fileName = os.path.basename(filePath)
-               webPath = os.path.dirname(filePath)
-               rssText = self.wiki.config.readTemplate("feed.xml") % {
-                       "chantitle": file,
-                       "chanlink": webPath,
-                       "chandesc": "Wikimedia dump updates for %s" % 
self.dbName,
-                       "title": webPath,
-                       "link": webPath,
-                       "description": xmlEscape("<a href=\"%s\">%s</a>" % 
(filePath, fileName)),
-                       "date": time.strftime("%a, %d %b %Y %H:%M:%S GMT", 
time.gmtime())}
-               directory = self.dumpDir.latestDir()
-               rssPath = self.dumpDir.latestPath(file + "-rss.xml")
-               FileUtils.writeFile(directory, rssPath, rssText, 
self.wiki.config.fileperms)
+               if (self._enabled):
+                       self.makeDir(join(self.wiki.publicDir(), 'latest'))
+                       filePath = self.dumpDir.webPath(file)
+                       fileName = os.path.basename(filePath)
+                       webPath = os.path.dirname(filePath)
+                       rssText = self.wiki.config.readTemplate("feed.xml") % {
+                               "chantitle": file,
+                               "chanlink": webPath,
+                               "chandesc": "Wikimedia dump updates for %s" % 
self.dbName,
+                               "title": webPath,
+                               "link": webPath,
+                               "description": xmlEscape("<a 
href=\"%s\">%s</a>" % (filePath, fileName)),
+                               "date": time.strftime("%a, %d %b %Y %H:%M:%S 
GMT", time.gmtime())}
+                       directory = self.dumpDir.latestDir()
+                       rssPath = self.dumpDir.latestPath(file + "-rss.xml")
+                       FileUtils.writeFile(directory, rssPath, rssText, 
self.wiki.config.fileperms)
 
-
 class Dump(object):
        def __init__(self, name, desc):
                self._desc = desc
@@ -1565,9 +1572,10 @@
                return(recombineCommandString)
 
        def cleanupOldFiles(self, runner, outputFileBasename):
-               outputFilename = self.buildOutputFilename(runner, 
outputFileBasename)
-               if exists(outputFilename):
-                       runner.remove(outputFilename)
+               if (runner._cleanupOldFilesEnabled):
+                       outputFilename = self.buildOutputFilename(runner, 
outputFileBasename)
+                       if exists(outputFilename):
+                               runner.removeFile(outputFilename)
 
        def buildOutputFilename(self, runner, outputFileBasename):
                return outputFilename
@@ -1685,10 +1693,11 @@
                return(series)
 
        def cleanupOldFiles(self, runner, chunk = 0):
-               fileList = self.buildOutputFilenames(runner, chunk)
-               for filename in fileList:
-                        if exists(filename):
-                               runner.remove(filename)
+               if (runner._cleanupOldFilesEnabled):
+                       fileList = self.buildOutputFilenames(runner, chunk)
+                       for filename in fileList:
+                               if exists(filename):
+                                       runner.removeFile(filename)
 
        def buildHistoryOutputFilename(self, runner, chunk = 0):
                if (chunk):
@@ -1801,9 +1810,10 @@
                return ["pages-logging.xml.gz"]
 
        def cleanupOldFiles(self, runner):
-               logging = self.buildOutputFilename(runner)
-               if exists(logging):
-                       runner.remove(logging)
+               if (runner._cleanupOldFilesEnabled):
+                       logging = self.buildOutputFilename(runner)
+                       if exists(logging):
+                               runner.removeFile(logging)
 
        def buildOutputFilename(self, runner):
                logging = runner.dumpDir.publicPath("pages-logging.xml.gz")
@@ -1869,33 +1879,39 @@
                        commands.append(series)
                error = runner.runCommand(commands, 
callbackStderr=self.progressCallback, callbackStderrArg=runner)
 
-               if (not exists( runner.wiki.config.checkforbz2footer ) ):
-                       raise BackupError("checkforbz2footer command %s not 
found" % runner.wiki.config.checkforbz2footer);
-               checkforbz2footer = "%s" % runner.wiki.config.checkforbz2footer
-               if exists(checkforbz2footer):
-                       # check to see if any of the output files are truncated
-                       files = []
-                       if (self._chunks):
-                               if (self._chunkToDo):
-                                       if (self._chunkToDo < 1 or 
self._chunkToDo > len(self._chunks)):
-                                               raise BackupError("chunk option 
must be in range of available chunks to rerun, 1 through %s\n" % 
str(len(self._chunks)))
-                                       files.append( self._path(runner, 'bz2', 
self._chunkToDo ) )
-                               else:
-                                       for i in range(1, len(self._chunks)+1):
-                                               files.append( 
self._path(runner, 'bz2', i ) )
+               truncationError = self.checkForTruncatedFiles(runner)
 
-                       for f in files:
-                               pipeline = []
-                               pipeline.append([ checkforbz2footer, f ])
-                               p = CommandPipeline(pipeline, quiet=True)
-                               p.runPipelineAndGetOutput()
-                               if not p.exitedSuccessfully():
-                                       runner.logAndPrint("file %s is 
truncated, moving out of the way" %f )
-                                       os.rename( f,  f + ".truncated" )
-                                       error = 1
-               if (error):
+               if (error or truncationError):
                        raise BackupError("error producing xml bz2 file(s) %s" 
% self._subset)
 
+       def checkForTruncatedFiles(self, runner):
+               if runner._checkForTruncatedFilesEnabled:
+                       if (not exists( runner.wiki.config.checkforbz2footer ) 
):
+                               raise BackupError("checkforbz2footer command %s 
not found" % runner.wiki.config.checkforbz2footer);
+                       checkforbz2footer = "%s" % 
runner.wiki.config.checkforbz2footer
+                       if exists(checkforbz2footer):
+                               # check to see if any of the output files are 
truncated
+                               files = []
+                               if (self._chunks):
+                                       if (self._chunkToDo):
+                                               if (self._chunkToDo < 1 or 
self._chunkToDo > len(self._chunks)):
+                                                       raise 
BackupError("chunk option must be in range of available chunks to rerun, 1 
through %s\n" % str(len(self._chunks)))
+                                               files.append( 
self._path(runner, 'bz2', self._chunkToDo ) )
+                                       else:
+                                               for i in range(1, 
len(self._chunks)+1):
+                                                       files.append( 
self._path(runner, 'bz2', i ) )
+
+                               for f in files:
+                                       pipeline = []
+                                       pipeline.append([ checkforbz2footer, f 
])
+                                       p = CommandPipeline(pipeline, 
quiet=True)
+                                       p.runPipelineAndGetOutput()
+                                       if not p.exitedSuccessfully():
+                                               runner.logAndPrint("file %s is 
truncated, moving out of the way" %f )
+                                               os.renameFile( f,  f + 
".truncated" )
+                                               return 1
+               return 0
+
        def buildEta(self, runner):
                """Tell the dumper script whether to make ETA estimate on page 
or revision count."""
                return "--current"
@@ -2216,9 +2232,10 @@
                return(commandSeries)
 
        def cleanupOldFiles(self, runner, chunk = 0):
-               xml7z = self.buildOutputFilename(runner, chunk)
-               if exists(xml7z):
-                       runner.remove(xml7z)
+               if (runner._cleanupOldFilesEnabled):
+                       xml7z = self.buildOutputFilename(runner, chunk)
+                       if exists(xml7z):
+                               runner.removeFile(xml7z)
 
        def run(self, runner):
                if runner.lastFailed:
@@ -2297,11 +2314,12 @@
                return [ self._file("7z",0) ]
 
        def cleanupOldFiles(self, runner):
-               files = self.listOutputFiles(runner)
-               for filename in files:
-                       filename = runner.dumpDir.publicPath(filename)
-                       if exists(filename):
-                               runner.remove(filename)
+               if (runner._cleanupOldFilesEnabled):
+                       files = self.listOutputFiles(runner)
+                       for filename in files:
+                               filename = runner.dumpDir.publicPath(filename)
+                               if exists(filename):
+                                       runner.removeFile(filename)
 
        def run(self, runner):
                error = 0


_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to