https://www.mediawiki.org/wiki/Special:Code/MediaWiki/105887

Revision: 105887
Author:   ariel
Date:     2011-12-12 15:23:04 +0000 (Mon, 12 Dec 2011)
Log Message:
-----------
run specified query on list of wikis, one gzipped output file for each, files 
named by date and project

Added Paths:
-----------
    branches/ariel/xmldumps-backup/wikiqueries/
    branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.conf.sample
    branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.py

Added: branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.conf.sample
===================================================================
--- branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.conf.sample          
                (rev 0)
+++ branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.conf.sample  
2011-12-12 15:23:04 UTC (rev 105887)
@@ -0,0 +1,23 @@
+[wiki]
+mediawiki=/home/wmf/mediawiki/1.18
+allwikislist=/home/wmf/conf/all.dblist
+privatewikislist=/home/wmf/conf/private.dblist
+closedwikislist=/home/wmf/conf/closed.dblist
+
+[output]
+wikiqueriesdir=/home/wmf/output/files
+temp=/var/tmp
+fileperms=0644
+
+[database]
+user=dbadmin
+password=XXXXX
+
+[tools]
+php=/usr/bin/php
+mysql=/usr/bin/mysql
+gzip=/usr/bin/gzip
+bzip2=/usr/bin/bzip2
+
+[query]
+queryfile=/home/wmf/scripts/query.sql

Added: branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.py
===================================================================
--- branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.py                   
        (rev 0)
+++ branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.py   2011-12-12 
15:23:04 UTC (rev 105887)
@@ -0,0 +1,352 @@
+# for every wiki, run a specified query, gzipping the output.
+# there's a config file which needs to be set up.
+
+import getopt
+import os
+import re
+import sys
+import ConfigParser
+import subprocess
+import socket
+import time
+from subprocess import Popen, PIPE
+from os.path import exists
+import hashlib
+import traceback
+import shutil
+
+class ContentFile(object):
+    def __init__(self, config, date, wikiName):
+        self._config = config
+        self.date = date
+        self.queryDir = QueryDir(self._config)
+        self.wikiName = wikiName
+
+    # override this.
+    def getFileName(self):
+        return "content.txt"
+
+    def getPath(self):
+        return os.path.join(self.queryDir.getQueryDir(),self.getFileName())
+
+class OutputFile(ContentFile):
+    def getFileName(self):
+        return "%s-%s-wikiquery.gz" % ( self.wikiName, self.date )
+
+class Config(object):
+    def __init__(self, configFile=False):
+        self.projectName = False
+
+        home = os.path.dirname(sys.argv[0])
+        if (not configFile):
+            configFile = "wikiqueries.conf"
+        self.files = [
+            os.path.join(home,configFile),
+            "/etc/wikqueries.conf",
+            os.path.join(os.getenv("HOME"), ".wikiqueries.conf")]
+        defaults = {
+            #"wiki": {
+            "allwikislist": "",
+            "privatewikislist": "",
+            "closedwikislist": "",
+            #"output": {
+            "wikiqueriesdir": "/wikiqueries",
+            "temp":"/wikiqueries/temp",
+            "fileperms": "0640",
+            #"database": {
+            "user": "root",
+            "password": "",
+            #"tools": {
+            "php": "/bin/php",
+            "gzip": "/usr/bin/gzip",
+            "bzip2": "/usr/bin/bzip2",
+            "mysql": "/usr/bin/mysql",
+            "multiversion": "",
+            #"query":{
+            "queryfile": "wikiquery.sql"
+            }
+
+        self.conf = ConfigParser.SafeConfigParser(defaults)
+        self.conf.read(self.files)
+
+        if not self.conf.has_section("wiki"):
+            print "The mandatory configuration section 'wiki' was not defined."
+            raise ConfigParser.NoSectionError('wiki')
+
+        if not self.conf.has_option("wiki","mediawiki"):
+            print "The mandatory setting 'mediawiki' in the section 'wiki' was 
not defined."
+            raise ConfigParser.NoOptionError('wiki','mediawiki')
+
+        self.parseConfFile()
+
+    def parseConfFile(self):
+        self.mediawiki = self.conf.get("wiki", "mediawiki")
+        self.allWikisList = MiscUtils.dbList(self.conf.get("wiki", 
"allwikislist"))
+        self.privateWikisList = MiscUtils.dbList(self.conf.get("wiki", 
"privatewikislist"))
+        self.closedWikisList = MiscUtils.dbList(self.conf.get("wiki", 
"closedwikislist"))
+
+        if not self.conf.has_section('output'):
+            self.conf.add_section('output')
+        self.wikiQueriesDir = self.conf.get("output", "wikiqueriesdir")
+        self.tempDir = self.conf.get("output", "temp")
+        self.fileperms = self.conf.get("output", "fileperms")
+        self.fileperms = int(self.fileperms,0)
+
+        if not self.conf.has_section('database'):
+            self.conf.add_section('database')
+        self.dbUser = self.conf.get("database", "user")
+        self.dbPassword = self.conf.get("database", "password")
+
+        if not self.conf.has_section('tools'):
+            self.conf.add_section('tools')
+        self.php = self.conf.get("tools", "php")
+        self.gzip = self.conf.get("tools", "gzip")
+        self.bzip2 = self.conf.get("tools", "bzip2")
+        self.mysql = self.conf.get("tools", "mysql")
+        self.multiversion = self.conf.get("tools","multiversion")
+
+        if not self.conf.has_section('query'):
+            self.conf.add_section('query')
+        self.queryFile = self.conf.get("query","queryfile")
+
+class MultiVersion(object):
+    def MWScriptAsString(config, maintenanceScript):
+        return(" ".join(MultiVersion.MWScriptAsArray(config, 
maintenanceScript)))
+
+    def MWScriptAsArray(config, maintenanceScript):
+        if config.multiversion != "":
+            if exists(config.multiversion):
+                return [ config.multiversion, maintenanceScript ]
+        return [ "%s/maintenance/%s" % (config.mediawiki, maintenanceScript) ]
+
+    MWScriptAsString = staticmethod(MWScriptAsString)
+    MWScriptAsArray = staticmethod(MWScriptAsArray)
+
+class MiscUtils(object):
+    def dbList(filename):
+        """Read database list from a file"""
+        if (not filename):
+            return []
+        infile = open(filename)
+        dbs = []
+        for line in infile:
+            line = line.strip()
+            if line != "":
+                dbs.append(line)
+        infile.close()
+        dbs.sort()
+        return dbs
+
+    def shellEscape(param):
+        """Escape a string parameter, or set of strings, for the shell."""
+        if isinstance(param, basestring):
+            return "'" + param.replace("'", "'\\''") + "'"
+        elif param is None:
+            # A blank string might actually be needed; None means we can leave 
it out
+            return ""
+        else:
+            return tuple([MiscUtils.shellEscape(x) for x in param])
+
+    def today():
+        return time.strftime("%Y%m%d", time.gmtime())
+
+    def readFile(filename):
+        """Read text from a file in one fell swoop."""
+        file = open(filename, "r")
+        text = file.read()
+        file.close()
+        return text
+
+    dbList = staticmethod(dbList)
+    shellEscape = staticmethod(shellEscape)
+    today = staticmethod(today)
+    readFile = staticmethod(readFile)
+
+class RunSimpleCommand(object):
+    def runWithOutput(command, maxtries = 3, shell=False):
+        """Run a command and return the output as a string.
+        Raises WikiQueriesError on non-zero return code."""
+
+        success = False
+        tries = 0
+        while (not success and tries < maxtries):
+            proc = Popen(command, shell = shell, stdout = PIPE, stderr = PIPE)
+            output, error = proc.communicate()
+            if not proc.returncode:
+                success = True
+            tries = tries + 1
+        if not success:
+            if type(command).__name__=='list':
+                commandString = " ".join(command)
+            else:
+                commandString = command
+            if proc:
+                raise WikiQueriesError("command '" + commandString + ( "' 
failed with return code %s " % proc.returncode ) + " and error '" + error + "'")
+            else:
+                raise WikiQueriesError("command '" + commandString + ( "' 
failed"  ) + " and error '" + error + "'")
+        return output
+
+    def runWithNoOutput(command, maxtries = 3, shell=False):
+        """Run a command, expecting no output.
+        Raises WikiQueriesError on non-zero return code."""
+
+        success = False
+        tries = 0
+        while ((not success) and tries < maxtries):
+            proc = Popen(command, shell = shell, stderr = PIPE)
+            # output will be None, we can ignore it
+            output, error = proc.communicate()
+            if not proc.returncode:
+                success = True
+            tries = tries + 1
+        if not success:
+            if type(command).__name__=='list':
+                commandString = " ".join(command)
+            else:
+                commandString = command
+            raise WikiQueriesError("command '" + commandString + ( "' failed 
with return code %s " % proc.returncode ) + " and error '" + error + "'")
+        return success
+
+    runWithOutput = staticmethod(runWithOutput)
+    runWithNoOutput = staticmethod(runWithNoOutput)
+
+class DBServer(object):
+    def __init__(self, config, wikiName):
+        self.config = config
+        self.wikiName = wikiName
+        self.dbServer = self.defaultServer()
+
+    def defaultServer(self):
+        if (not exists( self.config.php ) ):
+            raise BackupError("php command %s not found" % self.config.php)
+        commandList = MultiVersion.MWScriptAsArray(self.config, 
"getSlaveServer.php")
+        command =  [ self.config.php, "-q" ]
+        command.extend(commandList)
+        command.extend( [ "--wiki=%s" % self.wikiName, "--group=dump" ])
+        return RunSimpleCommand.runWithOutput(command, shell=False).rstrip()
+
+    def buildSqlCommand(self, query, outFile):
+        """Put together a command to execute an sql query to the server for 
this DB."""
+        if (not exists( self.config.mysql ) ):
+            raise BackupError("mysql command %s not found" % self.config.mysql)
+        command =  "/bin/echo '%s' | %s -h %s -u %s " % ( query, 
self.config.mysql, self.dbServer, self.config.dbUser ) 
+        if self.config.dbPassword != "":
+            command = command + "-p" + self.config.dbPassword
+        command = command + " -r --silent " + self.wikiName
+        command = command + "| %s > %s" % ( self.config.gzip, outFile )
+        return command
+
+class WikiQueriesError(Exception):
+    pass
+
+class QueryDir(object):
+    def __init__(self, config):
+        self._config = config
+
+    def getQueryDir(self):
+        return self._config.wikiQueriesDir
+
+class WikiQuery(object):
+    def __init__(self,config, wikiName, dryrun, verbose):
+        self._config = config
+        self.wikiName = wikiName
+        self.queryDir = QueryDir(self._config)
+        self.dryrun = dryrun
+        self.verbose = verbose
+
+    def doOneWiki(self):
+        """returns true on success"""
+        if self.wikiName not in self._config.privateWikisList and 
self.wikiName not in self._config.closedWikisList:
+            if not exists(self.queryDir.getQueryDir()):
+                os.makedirs(self.queryDir.getQueryDir())
+            try:
+               if (self.verbose):
+                    print "Doing run for wiki: ",self.wikiName
+                if not dryrun:
+                    if not self.runWikiQuery():
+                        return False
+            except:
+                if (self.verbose):
+                    traceback.print_exc(file=sys.stdout)
+                return False
+        if (self.verbose):
+            print "Success!  Wiki", self.wikiName, "query complete."
+        return True
+
+    def runWikiQuery(self):
+        outFile = OutputFile(self._config, MiscUtils.today(), self.wikiName)
+        query = MiscUtils.readFile(self._config.queryFile)
+        db = DBServer(self._config, self.wikiName)
+        return RunSimpleCommand.runWithNoOutput(db.buildSqlCommand(query, 
outFile.getPath()), shell = True)
+
+class WikiQueryLoop(object):
+    def __init__(self, config, dryrun, verbose):
+        self._config = config
+        self.dryrun = dryrun
+        self.verbose = verbose
+
+    def doRunOnAllWikis(self):
+        failures = 0
+        for w in self._config.allWikisList:
+            query = WikiQuery(self._config, w, self.dryrun, self.verbose)
+            result = query.doOneWiki()
+            if result == False:
+                failures = failures + 1
+        return failures
+
+    def doAllWikisTilDone(self,numFails):
+        fails = 0
+        while 1:
+            failures = self.doRunOnAllWikis()
+            if not failures:
+                break
+            fails  = fails + 1
+            if fails > numFails:
+                raise WikiQueriesError("Too many consecutive failures, giving 
up")
+            # wait 5 minutes and try another loop
+#            raise WikiQueriesError("would sleep")
+            time.sleep(300)
+
+def usage(message = None):
+    if message:
+        print message
+        print "Usage: python wikiqueries.py [options] [wikidbname]"
+        print "Options: --configfile, --dryrun, --verbose"
+        print "--configfile:  Specify an alternate config file to read. 
Default file is 'wikiqueries.conf' in the current directory."
+        print "--dryrun:      Don't actually run anything but print the 
commands that would be run."
+        print "--verbose:     Print error messages and other informative 
messages (normally the"
+        print "               script runs silently)."
+        print "wikiname:      Run the query only for the specific wiki."
+        sys.exit(1)
+
+if __name__ == "__main__":
+    configFile = False
+    result = False
+    dryrun = False
+    verbose = False
+
+    try:
+        (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "",
+                                                 [ 'configfile=', 'dryrun', 
'verbose' ])
+    except:
+        usage("Unknown option specified")
+
+    for (opt, val) in options:
+        if opt == "--configfile":
+            configFile = val
+        elif opt == "--dryrun":
+            dryrun = True
+        elif opt == "--verbose":
+            verbose = True
+        
+    if (configFile):
+        config = Config(configFile)
+    else:
+        config = Config()
+
+    if len(remainder) > 0:
+        query = WikiQuery(config, remainder[0], dryrun, verbose)
+        query.doOneWiki()
+    else:
+        queries = WikiQueryLoop(config, dryrun, verbose)
+        queries.doAllWikisTilDone(3)


Property changes on: branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.py
___________________________________________________________________
Added: svn:eol-style
   + native


_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to