http://www.mediawiki.org/wiki/Special:Code/MediaWiki/99655

Revision: 99655
Author:   ariel
Date:     2011-10-12 23:24:40 +0000 (Wed, 12 Oct 2011)
Log Message:
-----------
initial checkin of adds/changes dumps

Added Paths:
-----------
    branches/ariel/xmldumps-backup/incrementals/
    branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
    branches/ariel/xmldumps-backup/incrementals/README.config
    branches/ariel/xmldumps-backup/incrementals/README.txt
    branches/ariel/xmldumps-backup/incrementals/all.dblist
    branches/ariel/xmldumps-backup/incrementals/closed.dblist
    branches/ariel/xmldumps-backup/incrementals/dumpincr.conf.sample
    branches/ariel/xmldumps-backup/incrementals/generateincrementals.py
    branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py
    branches/ariel/xmldumps-backup/incrementals/incrmonitor
    branches/ariel/xmldumps-backup/incrementals/incrmonitor.py
    branches/ariel/xmldumps-backup/incrementals/incrs-index.html
    branches/ariel/xmldumps-backup/incrementals/private.dblist

Added: branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py                  
        (rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py  2011-10-12 
23:24:40 UTC (rev 99655)
@@ -0,0 +1,390 @@
+# shared classes for incrementals
+import os
+import sys
+import re
+import ConfigParser
+import WikiDump
+from WikiDump import FileUtils, TimeUtils, MiscUtils
+from os.path import exists
+import socket
+import subprocess
+from subprocess import Popen, PIPE
+
+class ContentFile(object):
+    def __init__(self, config, date, wikiName):
+        self._config = config
+        self.date = date
+        self.incrDir = IncrementDir(self._config, date)
+        self.wikiName = wikiName
+
+    # override this.
+    def getFileName(self):
+        return "content.txt"
+
+    def getPath(self):
+        return 
os.path.join(self.incrDir.getIncDir(self.wikiName),self.getFileName())
+
+    def getFileInfo(self):
+        return FileUtils.fileInfo(self.getPath())
+    
+class MaxRevIDFile(ContentFile):
+    def getFileName(self):
+        return "maxrevid.txt"
+
+class StubFile(ContentFile):
+    def getFileName(self):
+        return "%s-%s-stubs-meta-hist-incr.xml.gz" % ( self.wikiName, 
self.date )
+
+class RevsFile(ContentFile):
+    def getFileName(self):
+        return "%s-%s-pages-meta-hist-incr.xml.bz2" % ( self.wikiName, 
self.date )
+
+class StatusFile(ContentFile):
+    def getFileName(self):
+        return "status.txt"
+
+    def getPath(self, date = None):
+        return os.path.join(self.incrDir.getIncDir(self.wikiName, 
date),self.getFileName())
+
+class LockFile(ContentFile):
+    def getFileName(self):
+        return "%s-%s.lock" % ( self.wikiName, self.date )
+
+    def getPath(self):
+        return 
os.path.join(self.incrDir.getIncDirNoDate(self.wikiName),self.getFileName())
+
+class  MaxRevIDLockFile(LockFile):
+    def getFileName(self):
+        return "%s-%s-maxrevid.lock" % ( self.wikiName, self.date )
+        
+class  IncrDumpLockFile(LockFile):
+    def getFileName(self):
+        return "%s-%s-incrdump.lock" % ( self.wikiName, self.date )
+
+class MD5File(ContentFile):
+    def getFileName(self):
+        return "%s-%s-md5sums.txt" % ( self.wikiName, self.date )
+
+class IndexFile(ContentFile):
+    def __init__(self, config):
+        self._config = config
+        self.incrDir = IncrementDir(self._config)
+
+    def getFileName(self):
+        return "index.html"
+
+    def getPath(self):
+        return os.path.join(self.incrDir.getIncDirBase(),self.getFileName())
+
+class StatusInfo(object):
+    def __init__(self, config, date, wikiName):
+        self._config = config
+        self.date = date
+        self.wikiName = wikiName
+        self.statusFile = StatusFile(self._config, self.date, self.wikiName)
+
+    def getStatus(self, date = None):
+        if exists(self.statusFile.getPath(date)):
+            status = FileUtils.readFile(self.statusFile.getPath(date)).rstrip()
+            if status == "done":
+                return True
+        return False
+
+    def setStatus(self, status):
+        FileUtils.writeFileInPlace(self.statusFile.getPath(),status, 
self._config.fileperms)
+
+class Lock(object):
+    def __init__(self, config, date, wikiName):
+        self._config = config
+        self.date = date
+        self.wikiName = wikiName
+        self.lockFile = LockFile(self._config, self.date, self.wikiName)
+
+    def isLocked(self):
+        return exists(self.lockFile.getPath())
+
+    def getLock(self):
+        try:
+            if not exists(self._config.incrementalsDir):
+                os.makedirs(self._config.incrementalsDir)
+            f = FileUtils.atomicCreate(self.lockFile.getPath(), "w")
+            f.write("%s %d" % (socket.getfqdn(), os.getpid()))
+            f.close()
+            return True
+        except:
+            return False
+
+    def unlock(self):
+        os.remove(self.lockFile.getPath())
+
+    def getLockInfo(self):
+        try:
+            timestamp = os.stat(self.lockFile.getPath()).st_mtime
+            return time.strftime("%Y-%m-%d %H:%M:%S",timestamp)
+        except:
+            return None
+
+class IncrDumpLock(Lock):
+    def __init__(self, config, date, wikiName):
+        self._config = config
+        self.date = date
+        self.wikiName = wikiName
+        self.lockFile = IncrDumpLockFile(self._config, self.date, 
self.wikiName)
+
+class MaxRevIDLock(Lock):
+    def __init__(self,config, date, wikiName):
+        self._config = config
+        self.date = date
+        self.wikiName = wikiName
+        self.lockFile = MaxRevIDLockFile(self._config, self.date, 
self.wikiName)
+
+class Config(object):
+    def __init__(self, configFile=False):
+        self.projectName = False
+
+        home = os.path.dirname(sys.argv[0])
+        if (not configFile):
+            configFile = "dumpincr.conf"
+        self.files = [
+            os.path.join(home,configFile),
+            "/etc/dumpincrementals.conf",
+            os.path.join(os.getenv("HOME"), ".dumpincr.conf")]
+        defaults = {
+            #"wiki": {
+            "allwikislist": "",
+            "privatewikislist": "",
+            "closedwikislist": "",
+            #"output": {
+            "incrementalsdir": "/dumps/public/incr",
+            "templatedir": home,
+            "temp":"/dumps/temp",
+            "webroot": "http://localhost/dumps/incr";,
+            "fileperms": "0640",
+            "delay": "43200",
+            #"database": {
+            "user": "root",
+            "password": "",
+            #"tools": {
+            "mediawiki" : "",
+            "php": "/bin/php",
+            "gzip": "/usr/bin/gzip",
+            "bzip2": "/usr/bin/bzip2",
+            "mysql": "/usr/bin/mysql",
+            "checkforbz2footer": "/usr/local/bin/checkforbz2footer",
+            "writeuptopageid": "/usr/local/bin/writeuptopageid",
+            "multiversion": "",
+            #"cleanup": {
+            "keep": "3",
+            }
+
+        self.conf = ConfigParser.SafeConfigParser(defaults)
+        self.conf.read(self.files)
+
+        if not self.conf.has_section("wiki"):
+            print "The mandatory configuration section 'wiki' was not defined."
+            raise ConfigParser.NoSectionError('wiki')
+
+        if not self.conf.has_option("wiki","mediawiki"):
+            print "The mandatory setting 'mediawiki' in the section 'wiki' was 
not defined."
+            raise ConfigParser.NoOptionError('wiki','mediawiki')
+
+        self.parseConfFile()
+
+    def parseConfFile(self):
+        self.mediawiki = self.conf.get("wiki", "mediawiki")
+        self.allWikisList = MiscUtils.dbList(self.conf.get("wiki", 
"allwikislist"))
+        self.privateWikisList = MiscUtils.dbList(self.conf.get("wiki", 
"privatewikislist"))
+        self.closedWikisList = MiscUtils.dbList(self.conf.get("wiki", 
"closedwikislist"))
+
+        if not self.conf.has_section('output'):
+            self.conf.add_section('output')
+        self.incrementalsDir = self.conf.get("output", "incrementalsdir")
+        self.tempDir = self.conf.get("output", "temp")
+        self.templateDir = self.conf.get("output", "templateDir")
+        self.webRoot = self.conf.get("output", "webroot")
+        self.fileperms = self.conf.get("output", "fileperms")
+        self.fileperms = int(self.fileperms,0)
+        self.delay = self.conf.get("output", "delay")
+        self.delay = int(self.delay,0)
+
+        if not self.conf.has_section('tools'):
+            self.conf.add_section('tools')
+        self.php = self.conf.get("tools", "php")
+        self.gzip = self.conf.get("tools", "gzip")
+        self.bzip2 = self.conf.get("tools", "bzip2")
+        self.mysql = self.conf.get("tools", "mysql")
+        self.checkforbz2footer = self.conf.get("tools","checkforbz2footer")
+        self.writeuptopageid = self.conf.get("tools","writeuptopageid")
+        self.multiversion = self.conf.get("tools","multiversion")
+
+        if not self.conf.has_section('cleanup'):
+            self.conf.add_section('cleanup')
+        self.keep = self.conf.getint("cleanup", "keep")
+
+        if not self.conf.has_section('database'):
+            self.conf.add_section('database')
+        self.dbUser = self.conf.get("database", "user")
+        self.dbPassword = self.conf.get("database", "password")
+
+    def readTemplate(self, name):
+        template = os.path.join(self.templateDir, name)
+        return FileUtils.readFile(template)
+
+class RunSimpleCommand(object):
+    def runWithOutput(command, maxtries = 3, shell=False):
+        """Run a command and return the output as a string.
+        Raises IncrementDumpsError on non-zero return code."""
+        success = False
+        tries = 0
+        while (not success and tries < maxtries):
+            proc = Popen(command, shell = shell, stdout = PIPE, stderr = PIPE)
+            output, error = proc.communicate()
+            if not proc.returncode:
+                success = True
+            tries = tries + 1
+        if not success:
+            if type(command).__name__=='list':
+                commandString = " ".join(command)
+            else:
+                commandString = command
+            if proc:
+                raise IncrementDumpsError("command '" + commandString + ( "' 
failed with return code %s " % proc.returncode ) + " and error '" + error + "'")
+            else:
+                raise IncrementDumpsError("command '" + commandString + ( "' 
failed"  ) + " and error '" + error + "'")
+        return output
+
+    def runWithNoOutput(command, maxtries = 3, shell=False):
+        """Run a command, expecting no output.
+        Raises IncrementDumpsError on non-zero return code."""
+        success = False
+        tries = 0
+        while ((not success) and tries < maxtries):
+            proc = Popen(command, shell = shell, stderr = PIPE)
+            # output will be None, we can ignore it
+            output, error = proc.communicate()
+            if not proc.returncode:
+                success = True
+            tries = tries + 1
+        if not success:
+            if type(command).__name__=='list':
+                commandString = " ".join(command)
+            else:
+                commandString = command
+            raise IncrementDumpsError("command '" + commandString + ( "' 
failed with return code %s " % proc.returncode ) + " and error '" + error + "'")
+ 
+    runWithOutput = staticmethod(runWithOutput)
+    runWithNoOutput = staticmethod(runWithNoOutput)
+
+class MultiVersion(object):
+    def MWScriptAsString(config, maintenanceScript):
+        return(" ".join(MultiVersion.MWScriptAsArray(config, 
maintenanceScript)))
+
+    def MWScriptAsArray(config, maintenanceScript):
+        if config.multiversion != "":
+            if exists(config.multiversion):
+                return [ config.multiversion, maintenanceScript ]
+        return [ "%s/maintenance/%s" % (config.mediawiki, maintenanceScript) ]
+
+    MWScriptAsString = staticmethod(MWScriptAsString)
+    MWScriptAsArray = staticmethod(MWScriptAsArray)
+
+class DBServer(object):
+    def __init__(self, config, wikiName):
+        self.config = config
+        self.wikiName = wikiName
+        self.dbServer = self.defaultServer()
+
+    def defaultServer(self):
+        if (not exists( self.config.php ) ):
+            raise BackupError("php command %s not found" % self.config.php)
+        commandList = MultiVersion.MWScriptAsArray(self.config, 
"getSlaveServer.php")
+        command =  [ self.config.php, "-q" ]
+        command.extend(commandList)
+        command.extend( [ "--wiki=%s" % self.wikiName, "--group=dump" ])
+        return RunSimpleCommand.runWithOutput(command, shell=False).rstrip()
+
+    def buildSqlCommand(self, query):
+        """Put together a command to execute an sql query to the server for 
this DB."""
+        if (not exists( self.config.mysql ) ):
+            raise BackupError("mysql command %s not found" % self.config.mysql)
+        command =  "/bin/echo '%s' | %s -h %s -u %s " % ( query, 
self.config.mysql, self.dbServer, self.config.dbUser ) 
+        if self.config.dbPassword != "":
+            command = command + "-p" + self.config.dbPassword
+        command = command + " -r --silent " + self.wikiName
+        return command
+
+class IncrementDumpsError(Exception):
+    pass
+
+class IncrementDir(object):
+    def __init__(self, config, date = None):
+        self._config = config
+        self.date = date
+
+    def getIncDirBase(self):
+        return self._config.incrementalsDir
+
+    def getIncDirNoDate(self, wikiName):
+            return os.path.join(self.getIncDirBase(), wikiName)
+
+    def getIncDir(self, wikiName, date = None):
+        if (date == None):
+            return os.path.join(self.getIncDirBase(), wikiName, self.date)
+        else:
+            return os.path.join(self.getIncDirBase(), wikiName, date)
+
+class IncrementDumpsError(Exception):
+    pass
+
+class IncDumpDirs(object):
+    def __init__(self, config, wikiName):
+        self._config = config
+        self.wikiName = wikiName
+        self.incrDir = IncrementDir(self._config)
+
+    def getIncDumpDirs(self):
+        base = self.incrDir.getIncDirNoDate(self.wikiName)
+        digits = re.compile(r"^\d{4}\d{2}\d{2}$")
+        dates = []
+        try:
+            for dir in os.listdir(base):
+                if digits.match(dir):
+                    dates.append(dir)
+        except OSError:
+            return []
+        dates.sort()
+        return dates
+
+    def cleanupOldIncrDumps(self, date):
+        old = self.getIncDumpDirs()
+        if old:
+            if old[-1] == date:
+                old = old[:-1]
+                if self._config.keep > 0:
+                    old = old[:-(self._config.keep)]
+            for dump in old:
+                toRemove = 
os.path.join(self.incrDir.getIncDirNoDate(self.wikiName), dump)
+                shutil.rmtree("%s" % toRemove)
+
+    def getPrevIncrDate(self, date):
+        # find the most recent incr dump before the
+        # specified date that completed successfully
+        previous = None
+        old = self.getIncDumpDirs()
+        if old:
+            for dump in old:
+                if dump == date:
+                    return previous
+                else:
+                    statusInfo = StatusInfo(self._config, dump, self.wikiName)
+                    if statusInfo.getStatus(dump) == "done":
+                        previous = dump
+        return previous
+
+    def getLatestIncrDate(self):
+        # find the most recent incr dump 
+        dirs = self.getIncDumpDirs()
+        if dirs:
+            return(dirs[-1])
+        else:
+            return(None)


Property changes on: branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
___________________________________________________________________
Added: svn:eol-style
   + native

Added: branches/ariel/xmldumps-backup/incrementals/README.config
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/README.config                   
        (rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/README.config   2011-10-12 
23:24:40 UTC (rev 99655)
@@ -0,0 +1,38 @@
+By default, all configuration options are read from the file "dumpincr.conf" 
in the current directory.
+A different filename may be specified at run time.
+
+The following configuration options are accepted:
+
+In the "wiki" section,
+mediawiki        -- full path to the directory of the MediaWiki installation
+allwikislist     -- full path to a list of all projects to be dumped, as they 
appear in MySql
+privatewikislist -- full path to a list of all projects that are private and 
hence should not be dumped, if any
+closedwikislist  -- full path to a list of all projects that are closed and 
hence should not be dumped, if any
+                
+In the "output" section,
+incrementalsdir  -- full path to the top level directory where adds/changes 
dumps will be written; this should
+                    be web-accessible
+templatedir      -- full path to the directory containing template html files 
such as incrs-index.html (typically
+                    the same directory as that which contains the dump scripts)
+temp             -- full path to a directory which is used to the generation 
of temporary files; this should
+                    not be web-accessible
+webroot          -- url to top level directory with the main index page, for 
example http://localhost/mydumps
+fileperms        -- read and write permissions that will be assigned to 
created files; this is in octal four-digit
+                    format, for example 0644
+delay            -- number of seconds to wait after a max rev_id has been 
recorded, before dumping revisions
+
+In the "database" section,
+user     -- the name of a database user with read access to all tables in the 
databases 
+            which will be dumped
+password -- the password for the above user
+
+In the "tools" section, 
+php               -- the full path to the php command
+mysql             -- the full path to the mysql command
+gzip              -- the full path to the gzip command
+bzip2             -- the full path to the bzip2 command
+checkforbz2footer -- the full path to the checkforbz2footer command
+writeuptopageid   -- the full path to the writeuptopageid command
+
+In the "cleanup" section,
+keep -- the number of old dumps to keep, per project.

Added: branches/ariel/xmldumps-backup/incrementals/README.txt
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/README.txt                      
        (rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/README.txt      2011-10-12 
23:24:40 UTC (rev 99655)
@@ -0,0 +1,66 @@
+The adds/changes dumps are a supplementary set of dumps intended to accompany
+the regular XML dump files.
+
+The adds/changes dumps are produced in two stages. 
+
+In stage one, the max rev_id value at the time of the run is written out to a 
file for each project for the given date.  Script name: generatemaxrevids.py
+
+In stage two, intended to be run at a later time, a stub file containing all 
+revisions from the previous adds/changes dump through the max rev_id just 
+written.  This file is sorted by page id, just as the regular XML stubs files 
+are.  Next a history file containing metadata and page text for those 
+revisions is written, in the same format as the pages-meta-history file 
+generated for the regular XML dumps.  A status file is written to indicate
+that the job is done, and the md5sums of the stub and revision text files
+is written to a file as well.  Script name: generateincrementals.py
+
+The reason that there are two stages run via two separate scripts is that
+you may want to allow editors time to delete or hide sensitive or offensive
+material newly entered.  A delay of an arbitrary number of seconds between
+the recording of the max rev_id to dump and the start of the stub and 
+revision text dump is configurable in the configuration file; see 
+README.config for information on that. 
+
+Installation: 
+
+Seriously?  You want to install this already?  This is version 0.0.1.  Know
+what that means? It's buggy, risky, and could eat your data.  
+
+However, if you just want to play around with it on your laptop, fine.  
+* Put the files generateincrementals.py, generatemaxrevids.py, incrmonitor.py,
+  incrmonitor and IncrDumpLib.py together with the sample configuration file 
+  dumpincr.conf into a directory from which the job will run.  
+  Make sure you have a copy or a symlink of WikiDump.py from the regular XML
+  dumps in this same directory.
+  Also make sure you have a template for the top level index.html file, called
+  "incrs-index.html" in the same directory with these scripts.  See the 
existing
+  incrs-index.html file for the format; the key here is that you want the
+  string "%(items)s" in between <ul> and </ul> tags.  The status of the dump
+  for each wiki, along with links to the stub and revisions files, will be
+  included as a list item in that spot in the file.
+* See README.config for information on the various options in the config file.
+* Create the top level directory underneath which there will be a directory 
+  for each project you want to generate additions/changes. You needn't create
+  the subdirectories, this will be done for you at run time.
+* Do a test run; run generatemaxrevids.py by hand.  Then look in the top level
+  directory you created earlier.  Is there a directory for each project? Is
+  there a subdirectory under each of these with the date, in YYYYMMDD format?
+  In the date subdirectory are there a file maxrevid.txt containing a positive
+  integer?
+* Do the phase 2 test run: run generateincrementals.py by hand.  If you have 
+  configured a large delay, you will need to wait at least that amount of time
+  before running this script.  When it has completed, check the subdirectory
+  from phase 1; are there files analogous to the following?
+    mywiki-yyyymmdd-md5sums.txt                   
+    mywiki-yyyymmdd-pages-meta-hist-incr.xml.bz2  
+    mywiki-yyyymmdd-stubs-meta-hist-incr.xml.gz
+    maxrevid.txt
+    status.txt
+  Does the status.txt file contain "done"?
+* If the runs look like they are producing the right files, do the html
+  generation by hand; run monitor.py.  In the top level directory for the
+  adds/changes dumps, do you see the file index.html?  If you view that
+  file in a browser, do the contents look reasonable?
+* If that looks good, put phase 1 and phase 2 into separate cron jobs, 
+  spacing them out as appropriate.
+


Property changes on: branches/ariel/xmldumps-backup/incrementals/README.txt
___________________________________________________________________
Added: svn:eol-style
   + native

Added: branches/ariel/xmldumps-backup/incrementals/all.dblist
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/all.dblist                      
        (rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/all.dblist      2011-10-12 
23:24:40 UTC (rev 99655)
@@ -0,0 +1,5 @@
+elwikidb
+simplewikidb
+testAw118wmf1
+testBw118wmf1
+testCw118wmf1
\ No newline at end of file

Added: branches/ariel/xmldumps-backup/incrementals/closed.dblist
===================================================================
Added: branches/ariel/xmldumps-backup/incrementals/dumpincr.conf.sample
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/dumpincr.conf.sample            
                (rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/dumpincr.conf.sample    
2011-10-12 23:24:40 UTC (rev 99655)
@@ -0,0 +1,32 @@
+# sample configuration file
+
+[wiki]
+mediawiki=/src/mediawiki/118wmf1/1.18wmf1
+allwikislist=/home/backups/incrementals/all.dblist
+privatewikislist=/home/backups/incrementals/private.dblist
+closedwikislist=/home/backups/incrementals/closed.dblist
+
+[output]
+incrementalsdir=/dumps/public/incr
+templatedir=/home/backups/incrementals
+temp=/dumps/temp
+webroot=http://localhost/mydumps
+fileperms=0644
+# minimum number of seconds from revision creation 
+# til it can be dumped
+delay=43200
+
+[database]
+user=dbuser
+password=leet
+
+[tools]
+php=/usr/bin/php
+mysql=/usr/bin/mysql
+gzip=/usr/bin/gzip
+bzip2=/usr/bin/bzip2
+checkforbz2footer=/usr/local/bin/checkforbz2footer
+writeuptopageid=/usr/local/bin/writeuptopageid
+
+[cleanup]
+keep=20

Added: branches/ariel/xmldumps-backup/incrementals/generateincrementals.py
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/generateincrementals.py         
                (rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/generateincrementals.py 
2011-10-12 23:24:40 UTC (rev 99655)
@@ -0,0 +1,266 @@
+# for every wiki, read the maxid and the prev maxid
+# recorded for incrementals, dump stubs and dump history file
+# based on stubs.
+# this is phase 2 of daily xml change/adds dumps.
+
+import ConfigParser
+import getopt
+import os
+import re
+import sys
+import WikiDump
+from WikiDump import FileUtils, TimeUtils, MiscUtils
+import subprocess
+import socket
+import time
+import IncrDumpLib
+from IncrDumpLib import Lock, Config, RunSimpleCommand, MultiVersion, 
DBServer, IncrementDir, IncrementDumpsError, MaxRevIDFile, StatusFile, 
IncrDumpLockFile, StubFile, RevsFile, MD5File, IncDumpDirs, IncrDumpLock, 
MaxRevIDLock, StatusInfo
+from subprocess import Popen, PIPE
+from os.path import exists
+import hashlib
+import traceback
+
+class DumpResults(object):
+    def __init__(self):
+        self.TODO = 1
+        self.FAILED = -1
+        self.OK = 0
+
+class IncrDump(object):
+    def __init__(self,config, date, wikiName, doStubs, doRevs, dryrun, 
verbose):
+        self._config = config
+        self.date = date
+        self.wikiName = wikiName
+        self.incrDir = IncrementDir(self._config, self.date)
+        self.doStubs = doStubs
+        self.doRevs = doRevs
+        self.dryrun = dryrun
+        self.maxRevIDFile = MaxRevIDFile(self._config, self.date, 
self.wikiName)
+        self.statusInfo = StatusInfo(self._config, self.date, self.wikiName)
+        self.stubFile = StubFile(self._config, self.date, self.wikiName)
+        self.revsFile = RevsFile(self._config, self.date, self.wikiName)
+        self.incrDumpsDirs = IncDumpDirs(self._config, self.wikiName)
+        self.verbose = verbose
+
+    def getMaxRevIdFromFile(self, date = None):
+        if date == None:
+            date = self.date
+        maxRevIDFile = MaxRevIDFile(self._config, date, self.wikiName)
+        return FileUtils.readFile(maxRevIDFile.getPath().rstrip())
+
+    def doOneWiki(self):
+        retCodes = DumpResults()
+        if self.wikiName not in self._config.privateWikisList and 
self.wikiName not in self._config.closedWikisList:
+            if not exists(self.incrDir.getIncDir(self.wikiName)):
+                os.makedirs(self.incrDir.getIncDir(self.wikiName))
+            status = self.statusInfo.getStatus()
+            if status == "done":
+                if (self.verbose):
+                    print "wiki",self.wikiName,"skipped, adds/changes dump 
already complete"
+                return retCodes.OK
+            if time.time() - os.path.getmtime(self.maxRevIDFile.getPath()) < 
self._config.delay:
+                if (self.verbose):
+                    print "wiki",self.wikiName,"skipped, must wait for 
configured delay interval"
+                return retCodes.TODO
+            if not dryrun:
+                lock = IncrDumpLock(self._config, self.date, self.wikiName)
+                if not lock.getLock():
+                    if (self.verbose):
+                        print "wiki",self.wikiName,"skipped, wiki is locked, 
another process should be doing the job"
+                    return retCodes.TODO
+            try:
+                if not dryrun:
+                    self.incrDumpsDirs.cleanupOldIncrDumps(self.date)
+                maxRevID = self.getMaxRevIdFromFile()
+                prevDate = self.incrDumpsDirs.getPrevIncrDate(self.date)
+                prevRevID = None
+                if prevDate:
+                    prevRevID = self.getMaxRevIdFromFile(prevDate)
+                if not prevRevID:
+                    prevRevID = str(int(maxRevID) - 10)
+                    if int(prevRevID) < 1:
+                        prevRevID = str(1)
+                else:
+                    # this incr will cover every revision from the last 
incremental
+                    # through the maxid we wrote out in phase one of this job.
+                    prevRevID = str(int(prevRevID) + 1)
+                if doStubs:
+                    maxRevID = str(int(maxRevID) + 1) # end rev id is not 
included in dump
+                    if not self.dumpStub(prevRevID, maxRevID):
+                        return retCodes.FAILED
+                if doRevs:
+                    if not self.dumpRevs():
+                        return retCodes.FAILED
+                if not dryrun:
+                    if not self.md5sums():
+                        return retCodes.FAILED
+                    self.statusInfo.setStatus("done")
+                    lock.unlock()
+            except:
+                if (self.verbose):
+                    traceback.print_exc(file=sys.stdout)
+                if not dryrun:
+                    lock.unlock()
+                return retCodes.FAILED
+        if (self.verbose):
+            print "Success!  Wiki", self.wikiName, "incremental dump complete."
+        return retCodes.OK
+
+    def dumpStub(self, startRevID, endRevID):
+        scriptCommand = MultiVersion.MWScriptAsArray(self._config, 
"dumpBackup.php")
+        command = [ "%s" % self._config.php, "-q" ]
+        command.extend(scriptCommand)
+        command.extend(["--wiki=%s" % self.wikiName, "--stub", "--quiet",
+                        "--force-normal", "--output=gzip:%s" % 
self.stubFile.getPath(),
+                        "--revrange", "--revstart=%s" % startRevID, 
"--revend=%s" % endRevID ])
+        if dryrun:
+            print "would run command for stubs dump:", command
+        else:
+            error = RunSimpleCommand.runWithNoOutput(command, shell = False)
+            if (error):
+                if (self.verbose):
+                    print ("error producing stub files for wiki" % 
self.wikiName)
+                return False
+        return True
+
+    def dumpRevs(self):
+        scriptCommand = MultiVersion.MWScriptAsArray(self._config, 
"dumpTextPass.php")
+        command = [ "%s" % self._config.php, "-q" ]
+        command.extend(scriptCommand)
+        command.extend(["--wiki=%s" % self.wikiName, "--stub=gzip:%s" % 
self.stubFile.getPath(),
+                            "--force-normal", "--quiet", "--spawn=%s" % 
self._config.php,
+                            "--output=bzip2:%s" % self.revsFile.getPath()
+                            ])
+        if dryrun:
+            print "would run command for revs dump:", command
+        else:
+            error = RunSimpleCommand.runWithNoOutput(command, shell = False)
+            if (error):
+                if (self.verbose):
+                    print("error producing revision text files for wiki" % 
self.wikiName)
+                return False
+        return True
+
+    def md5sumOneFile(self, filename):
+        summer = hashlib.md5()
+        infile = file(filename, "rb")
+        bufsize = 4192 * 32
+        buffer = infile.read(bufsize)
+        while buffer:
+            summer.update(buffer)
+            buffer = infile.read(bufsize)
+        infile.close()
+        return summer.hexdigest()
+
+    def md5sums(self):
+       try:
+           md5File = MD5File(self._config, self.date, self.wikiName)
+           text = ""
+           summer = hashlib.md5()
+           files = []
+           if self.doStubs:
+               files.append(self.stubFile.getPath())
+           if self.doRevs:
+               files.append(self.revsFile.getPath())
+           for f in files:
+               text = text + "%s\n" % self.md5sumOneFile(f)
+               FileUtils.writeFileInPlace(md5File.getPath(), text, 
self._config.fileperms)
+           return True
+       except:
+           return False
+
+class IncrDumpLoop(object):
+    def __init__(self, config, date, doStubs, doRevs, dryrun, verbose):
+        self._config = config
+        self.date = date
+        self.doStubs = doStubs
+        self.doRevs = doRevs
+        self.dryrun = dryrun
+        self.verbose = verbose
+
+    def doRunOnAllWikis(self):
+        retCodes = DumpResults()
+        failures = 0
+        todos = 0
+        for w in self._config.allWikisList:
+            dump = IncrDump(config, date, w, doStubs, doRevs, dryrun, 
self.verbose)
+            result = dump.doOneWiki()
+            if result == retCodes.FAILED:
+                failures = failures + 1
+            elif result == retCodes.TODO:
+                todos = todos + 1
+        return (failures, todos)
+
+    def doAllWikisTilDone(self,numFails):
+        fails = 0
+        while 1:
+            (failures, todos) = self.doRunOnAllWikis()
+            if not failures and not todos:
+                break
+            fails  = fails + 1
+            if fails > numFails:
+                raise IncrementDumpsError("Too many consecutive failures, 
giving up")
+            # wait 5 minutes and try another loop
+#            raise IncrementDumpsError("would sleep")
+            time.sleep(300)
+
+def usage(message = None):
+    if message:
+        print message
+        print "Usage: python generateincrementals.py [options] [wikidbname]"
+        print "Options: --configfile, --date, --dryrun, --revsonly, 
--stubsonly, --verbose"
+        print "--configfile:  Specify an alternate config file to read. 
Default file is 'dumpincr.conf' in the current directory."
+        print "--date:        (Re)run incremental of a given date (use with 
care)."
+        print "--dryrun:      Don't actually dump anything but print the 
commands that would be run."
+        print "--revsonly:    Do only the stubs part of the dumps."
+        print "--stubsonly:   Do only the revision text part of the dumps."
+        print "--verbose:     Print error messages and other informative 
messages (normally the"
+        print "               script runs silently)."
+        print "wikiname:      Run the dumps only for the specific wiki."
+        sys.exit(1)
+
+if __name__ == "__main__":
+    configFile = False
+    result = False
+    date = None
+    doStubs = True
+    doRevs = True
+    dryrun = False
+    verbose = False
+
+    try:
+        (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "",
+                                                 ['date=', 'configfile=', 
'stubsonly', 'revsonly', 'dryrun', 'verbose' ])
+    except:
+        usage("Unknown option specified")
+
+    for (opt, val) in options:
+        if opt == "--date":
+            date = val
+        elif opt == "--configfile":
+            configFile = val
+        elif opt == "--stubsonly":
+            doRevs = False
+        elif opt == "--revsonly":
+            doStubs = False
+        elif opt == "--dryrun":
+            dryrun = True
+        elif opt == "--verbose":
+            verbose = True
+        
+    if not doRevs and not doStubs:
+        usage("You may not specify stubsonly and revsonly options together.")
+
+    if (configFile):
+        config = Config(configFile)
+    else:
+        config = Config()
+
+    if not date:
+        date = TimeUtils.today()
+
+    if len(remainder) > 0:
+        dump = IncrDump(config, date, remainder[0], doStubs, doRevs, dryrun, 
verbose)
+    else:
+        dump = IncrDumpLoop(config, date, doStubs, doRevs, dryrun, verbose)
+        dump.doAllWikisTilDone(3)


Property changes on: 
branches/ariel/xmldumps-backup/incrementals/generateincrementals.py
___________________________________________________________________
Added: svn:eol-style
   + native

Added: branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py            
                (rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py    
2011-10-12 23:24:40 UTC (rev 99655)
@@ -0,0 +1,138 @@
+# for every wiki, find and record the max rev_id in use.
+# this is phase 1 of daily xml change/adds dumps.
+
+import ConfigParser
+import getopt
+import os
+import re
+import sys
+import WikiDump
+from WikiDump import FileUtils, TimeUtils, MiscUtils
+import subprocess
+import socket
+import time
+import IncrDumpLib
+from IncrDumpLib import Lock, Config, RunSimpleCommand, MultiVersion, 
DBServer, IncrementDir, IncrementDumpsError, MaxRevIDFile, MaxRevIDLockFile, 
IncrDumpLock, MaxRevIDLock
+from subprocess import Popen, PIPE
+from os.path import exists
+import traceback
+
+class MaxRevID(object):
+    def __init__(self, config, wikiName, date):
+        self._config = config
+        self.wikiName = wikiName
+        self.date = date
+        self.maxID = 0
+        self.maxRevIdFile = MaxRevIDFile(self._config, self.date, 
self.wikiName)
+
+    def getMaxRevID(self):
+        query = "select MAX(rev_id) from revision";
+        db = DBServer(self._config, self.wikiName)
+        # get the result
+        self.maxID = RunSimpleCommand.runWithOutput(db.buildSqlCommand(query), 
shell = True)
+
+    def recordMaxRevID(self):
+        self.getMaxRevID()
+        # write the max id in a file in the right place
+        FileUtils.writeFileInPlace(self.maxRevIdFile.getPath(), self.maxID, 
self._config.fileperms)
+
+    def exists(self):
+        return exists(self.maxRevIdFile.getPath())
+
+class MaxIDDump(object):
+    def __init__(self,config, date, verbose):
+        self._config = config
+        self.date = date
+        self.incrDir = IncrementDir(self._config, self.date)
+        self.verbose = verbose
+
+    def doOneWiki(self, w):
+        success = True
+        if w not in self._config.privateWikisList and w not in 
self._config.closedWikisList:
+            if not exists(self.incrDir.getIncDir(w)):
+                os.makedirs(self.incrDir.getIncDir(w))
+            lock = MaxRevIDLock(self._config, self.date, w)
+            if lock.getLock():
+                try:
+                    maxRevID = MaxRevID(self._config, w, self.date)
+                    if not maxRevID.exists():
+                        maxRevID.recordMaxRevID()
+                except:
+                    if (self.verbose):
+                        print "Wiki ", w, "failed to get max revid."
+                        traceback.print_exc(file=sys.stdout)
+                    success = False
+                lock.unlock()
+            else:
+                if (self.verbose):
+                    print "Wiki ", w, "failed to get lock."
+                    traceback.print_exc(file=sys.stdout)
+        if success:
+            if (self.verbose):
+                print "Success!  Wiki", w, "adds/changes dump complete."
+        return success
+
+    def doRunOnAllWikis(self):
+        failures = 0
+        for w in self._config.allWikisList:
+            if not self.doOneWiki(w):
+                failures = failures + 1
+        return failures
+
+    def doAllWikisTilDone(self,numFails):
+        fails = 0
+        while 1:
+            result = self.doRunOnAllWikis()
+            if not result:
+                break
+            fails  = fails + 1
+            if fails > numFails:
+                raise("Too many consecutive failures, giving up")
+            # wait 5 minutes and try another loop
+            time.sleep(300)
+
+def usage(message = None):
+    if message:
+        print message
+        print "Usage: python generateincrementals.py [options] [wikidbname]"
+        print "Options: --configfile, --date, --verbose"
+        print "--configfile:  Specify an alternate config file to read. 
Default file is 'dumpincr.conf' in the current directory."
+        print "--date:        (Re)run incremental of a given date (use with 
care)."
+        print "--verbose:     Print error messages and other informative 
messages (normally the"
+        print "               script runs silently)."
+        print "wikiname:      Run the dumps only for the specific wiki."
+        sys.exit(1)
+
+if __name__ == "__main__":
+    configFile = False
+    result = False
+    date = None
+    verbose = False
+
+    try:
+        (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "",
+                                                 ['date=', 'configfile=', 
'verbose' ])
+    except:
+        usage("Unknown option specified")
+
+    for (opt, val) in options:
+        if opt == "--date":
+            date = val
+        elif opt == "--configfile":
+            configFile = val
+        elif opt == "--verbose":
+            verbose = True
+
+    if (configFile):
+        config = Config(configFile)
+    else:
+        config = Config()
+
+    if not date:
+        date = TimeUtils.today()
+
+    dump = MaxIDDump(config, date, verbose)
+    if len(remainder) > 0:
+        dump.doOneWiki(remainder[0])
+    else:
+        dump.doAllWikisTilDone(3)


Property changes on: 
branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py
___________________________________________________________________
Added: svn:eol-style
   + native

Added: branches/ariel/xmldumps-backup/incrementals/incrmonitor
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/incrmonitor                     
        (rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/incrmonitor     2011-10-12 
23:24:40 UTC (rev 99655)
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+WIKIDUMP_BASE=`dirname "$0"`
+
+if [ ! -z "$1" ]; then
+    configFile="$1"
+else
+    configFile=""
+fi
+
+if [ ! -z "$2" ]; then
+    if [ "$2" == "verbose" ]; then
+       verbose="--verbose"
+    else
+       echo "Unknown option $2"
+       exit 1
+    fi
+fi
+
+while true; do
+       echo ""
+       echo "Sweeping!"
+       python $WIKIDUMP_BASE/incrmonitor.py "$configFile" "$verbose"
+       echo "sleeping"
+       sleep 15
+done


Property changes on: branches/ariel/xmldumps-backup/incrementals/incrmonitor
___________________________________________________________________
Added: svn:executable
   + *

Added: branches/ariel/xmldumps-backup/incrementals/incrmonitor.py
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/incrmonitor.py                  
        (rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/incrmonitor.py  2011-10-12 
23:24:40 UTC (rev 99655)
@@ -0,0 +1,134 @@
+# generate an index page covering the status of and links to 
+# incremental files for the latest date for each project 
+
+import ConfigParser
+import getopt
+import os
+import re
+import sys
+import WikiDump
+from WikiDump import FileUtils, TimeUtils, MiscUtils
+import subprocess
+import socket
+import time
+import IncrDumpLib
+from IncrDumpLib import Lock, Config, RunSimpleCommand, MultiVersion, 
DBServer, IncrementDir, IncrementDumpsError, IndexFile, IncrDumpLockFile, 
IncDumpDirs, IncrDumpLock, MaxRevIDLock, StubFile, RevsFile, StatusFile
+from subprocess import Popen, PIPE
+from os.path import exists
+import hashlib
+import traceback
+
+class Link(object):
+
+    def makeLink(path, linkText):
+        return('<a href = "' + path + '">' + linkText + "</a>")
+
+    makeLink = staticmethod(makeLink)
+
+class Index(object):
+    def __init__(self, config, verbose):
+        self._config = config
+        self.indexFile = IndexFile(self._config)
+        self.incrDir = IncrementDir(self._config)
+        self.verbose = verbose
+
+    def doAllWikis(self):
+        text = ""
+        for w in self._config.allWikisList:
+            result = self.doOneWiki(w)
+            if result:
+                text = text + "<li>"+ result + "</li>\n"
+        indexText = self._config.readTemplate("incrs-index.html") %  { "items" 
: text }
+        FileUtils.writeFileInPlace(self.indexFile.getPath(), indexText, 
self._config.fileperms)
+
+    def doOneWiki(self, w):
+        if w not in self._config.privateWikisList and w not in 
self._config.closedWikisList:
+            self.incrDumpsDirs = IncDumpDirs(self._config, w)
+            if not exists(self.incrDir.getIncDirNoDate(w)):
+                if (self.verbose):
+                    print "No dump for wiki ", w
+                    next
+
+            incrDate = self.incrDumpsDirs.getLatestIncrDate()
+            if not incrDate:
+                if (self.verbose):
+                    print "No dump for wiki ", w
+                    next
+
+            try:
+                lock = IncrDumpLock(self._config, incrDate, w)
+                lockDate = lock.getLockInfo()
+
+                stub = StubFile(self._config, incrDate, w)
+                (stubDate, stubSize) = stub.getFileInfo()
+                revs = RevsFile(self._config, incrDate, w)
+                (revsDate, revsSize) = revs.getFileInfo()
+                stat = StatusFile(self._config, incrDate, w)
+                statContents = FileUtils.readFile(stat.getPath())
+                    
+            except:
+                if (self.verbose):
+                    traceback.print_exc(file=sys.stdout)
+                return "Error encountered, no information available for wiki", 
w
+
+            try:
+                wikinameText = "<strong>%s</strong>" % w
+                if lockDate:
+                    lockText = "run started on %s." % lockDate
+                else:
+                    lockText = None
+                if stubDate:
+                    stubText = "stubs: %s (size %s)" %  
(Link.makeLink(os.path.join(w, incrDate, stub.getFileName()),stubDate), 
stubSize)
+                else:
+                    stubText = None
+                if revsDate:
+                    revsText = "revs: %s (size %s)" %  
(Link.makeLink(os.path.join(w, incrDate, revs.getFileName()),revsDate), 
revsSize)
+                else:
+                    revsText = None
+                if statContents:
+                    statText = "(%s)" % (statContents)
+                else:
+                    statText = None
+
+                wikiInfo = " ".join( filter( None, [ wikinameText, lockText, 
statText ] ) ) + "<br />"
+                wikiInfo = wikiInfo + " &nbsp;&nbsp; " + " |  ".join( filter( 
None, [ stubText, revsText ] ))
+            except:
+                if (self.verbose):
+                    traceback.print_exc(file=sys.stdout)
+                return "Error encountered formatting information for wiki", w
+                
+            return wikiInfo
+
+def usage(message = None):
+    if message:
+        print message
+        print "Usage: python monitor.py [options] [wikidbname]"
+        print "Options: --configfile, --verbose"
+        print "--configfile:  Specify an alternate config file to read. 
Default file is 'dumpincr.conf' in the current directory."
+        print "--verbose:     Print error messages and other informative 
messages (normally the"
+        print "               script runs silently)."
+        sys.exit(1)
+            
+if __name__ == "__main__":
+    configFile = False
+    verbose = False
+
+    try:
+        (options, remainder) = getopt.gnu_getopt(sys.argv[1:], "",
+                                                 ['configfile=', 'verbose' ])
+    except:
+        usage("Unknown option specified")
+
+    for (opt, val) in options:
+        if opt == "--configfile":
+            configFile = val
+        elif opt == '--verbose':
+            verbose = True
+
+    if (configFile):
+        config = Config(configFile)
+    else:
+        config = Config()
+
+    index = Index(config, verbose)
+    index.doAllWikis()


Property changes on: branches/ariel/xmldumps-backup/incrementals/incrmonitor.py
___________________________________________________________________
Added: svn:eol-style
   + native

Added: branches/ariel/xmldumps-backup/incrementals/incrs-index.html
===================================================================
--- branches/ariel/xmldumps-backup/incrementals/incrs-index.html                
                (rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/incrs-index.html        
2011-10-12 23:24:40 UTC (rev 99655)
@@ -0,0 +1,118 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+       "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";>
+
+<html xmlns="http://www.w3.org/1999/xhtml"; xml:lang="en" lang="en">
+<head>
+       <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+       <title>Incremental dumps</title>
+       <style type="text/css">
+               html, body {
+                       background-color: #ffffff;
+                       color: black;
+               }
+               .siteinfo {
+                       text-align: center;
+               }
+               li {
+                       background-color: #ffffff;
+                       list-style-type: none;
+               }
+               li li {
+                       background-color: white;
+               }
+               li ul {
+                       margin-top: 4px;
+                       margin-bottom: 8px;
+               }
+               .detail {
+                       font-weight: normal;
+                       font-style: italic;
+               }
+               .updates {
+                       font: monospace;
+                       font-size: smaller;
+               }
+               .status {
+                       font-weight: bold;
+                       padding-left: 1em;
+                       padding-right: 1em;
+               }
+               .in-progress {
+                       font-weight: bold;
+               }
+               .failed {
+                       color: Maroon;
+                       font-weight: bold;
+               }
+               .waiting {
+                       color: Silver; /* Gray ? */
+               }
+               .progress {
+                       font-family: monospace;
+                       font-size: 80%%;
+                       margin-left: .5in;
+               }
+       </style>
+</head>
+
+<body>
+       <h1>Adds/changes dumps</h1>
+       
+       <p class="siteinfo">
+               This is the Wikimedia adds/changes dump service.
+               Please read the <a href='legal.html'>copyrights</a> information.
+               See <a 
href="http://meta.wikimedia.org/wiki/Data_dumps";>Meta:Data dumps</a>
+               for documentation on the provided data formats.
+       </p>
+       <p>
+         Here's the big fat disclaimer.
+       </p>
+       <p>
+         This service is experimental.  At any time it may not be working, for 
a day, a week or a month.
+         It is not intended to replace the full XML dumps.  We don't expect 
users to be able to construct
+         full dumps of a given date from the incrementals and an older dump.
+       </p>
+       <p>
+         The data provided in these files is ''partial data''.  To be precise: 
+         <ul>
+           <li>* Revisions included in these dumps are not up to the minute.  
We write out those that were
+           created up to 18 hours ago; this gives local editing communities 
time to delete revisions 
+           with sensitive information, vulgarities and other vandalism, 
etc.</li>
+           <li>* New pages entered for the first time during the time interval 
are included</li>
+           <li>* Revisions of undeleted pages will be included only if new 
revision IDs need to be assigned to 
+           the restored revisions.  For most revisions this will not be the 
case.  </li>
+           <li>* Information about moves and deletes are not included.</li>
+           <li>* Imported revisions will be included if they were imported 
during the time interval, since they
+           will have new revisions IDs.</li>
+           <li>* As with all dumps, hidden revisions or more generally 
revisions not readable by the general public
+             are not provided.</li>
+         </ul>
+       </p>
+       <p>
+         What is in these files:
+       </p>
+       <p>
+         The stubs file consists of the metadata for revision texts of each 
page, where the revision texts were
+         added within the time interval.  These look just like the history 
stubs files you would find on our XML data dumps
+         page, having the exact same format but only new revisions since the 
last adds/changes dump. This means you get
+         metadata for articles, user pages, discussion pages, etc.  If you 
want articles only, you will need to write a
+         filter to grab just those entries.
+       </p>
+       <p>
+         The revs file consists of the metadata plus the wikitext for each new 
revision since the last adds/changes dump.
+         This is in the same format as the pages-meta-history files you would 
find on our XML data dumps page.  This means
+         you get articles, user pages, discussion pages, etc.  If you want 
articles only, you will need to write a
+         filter to grab just those entries.
+       </p>
+       <h2>Adds/changes dump listing</h2>
+       <ul>
+               %(items)s
+       </ul>
+       <hr>
+         <p>
+           Return to <a href="http://dumps.wikimedia.org/other/";>our other 
datasets</a>, the
+           <a href="http://dumps.wikimedia.org/backup-index.html";>XML data 
dumps</a>, or
+           <a href="http://dumps.wikimedia.org/index.html";>the main index</a>.
+         <p/>
+</body>
+</html>

Added: branches/ariel/xmldumps-backup/incrementals/private.dblist
===================================================================

_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to