[MediaWiki-CVS] SVN: [111156] branches/ariel/xmldumps-phase3/
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/56 Revision: 56 Author: ariel Date: 2012-02-10 14:09:34 + (Fri, 10 Feb 2012) Log Message: --- remove phase3 dir unused since its creation Removed Paths: - branches/ariel/xmldumps-phase3/ ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [110757] branches/ariel/xmldumps-backup/create-rsync-list.sh
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/110757 Revision: 110757 Author: ariel Date: 2012-02-06 16:39:21 + (Mon, 06 Feb 2012) Log Message: --- pick up the abstract files for rsync too Modified Paths: -- branches/ariel/xmldumps-backup/create-rsync-list.sh Modified: branches/ariel/xmldumps-backup/create-rsync-list.sh === --- branches/ariel/xmldumps-backup/create-rsync-list.sh 2012-02-06 16:29:58 UTC (rev 110756) +++ branches/ariel/xmldumps-backup/create-rsync-list.sh 2012-02-06 16:39:21 UTC (rev 110757) @@ -96,12 +96,14 @@ ls $d/*.7z 2/dev/null $outputfile.tmp ls $d/*.html 2/dev/null $outputfile.tmp ls $d/*.txt 2/dev/null $outputfile.tmp + ls $d/.xml 2/dev/null $outputfile.tmp else ls $d/*.gz 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp ls $d/*.bz2 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp ls $d/*.7z 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp ls $d/*.html 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp ls $d/*.txt 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp + ls $d/*.xml 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp fi else list_dir_only ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [110561] branches/ariel/xmldumps-backup/create-rsync-list.sh
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/110561 Revision: 110561 Author: ariel Date: 2012-02-02 11:26:23 + (Thu, 02 Feb 2012) Log Message: --- option to list dump dirs only w/o file contents Modified Paths: -- branches/ariel/xmldumps-backup/create-rsync-list.sh Modified: branches/ariel/xmldumps-backup/create-rsync-list.sh === --- branches/ariel/xmldumps-backup/create-rsync-list.sh 2012-02-02 11:15:10 UTC (rev 110560) +++ branches/ariel/xmldumps-backup/create-rsync-list.sh 2012-02-02 11:26:23 UTC (rev 110561) @@ -5,6 +5,7 @@ # are not n successful dumps available. # Options: +# dirsonly-- list only the directories to include # dumpsnumber -- number of dumps to list # outputfile -- path to file in which to write the list # configfile -- path to config file used to generate dumps @@ -12,6 +13,7 @@ usage() { echo Usage: $0 --dumpsnumber n --outputfile filename --configfile filename --rsyncprefix path echo +echo dirsonly list only directories to include echo dumpsnumber number of dumps to list echo outputfilename of file to which we will write iw action list echo configfilename of configuration file for dump generation @@ -21,6 +23,7 @@ echo echo For example: echo$0 --dumpsnumber 5 --outputfile /data/dumps/public/dumpsfiles_for_rsync.txt --configfile wikidump.conf.testing + exit 1 } @@ -74,22 +77,34 @@ done } +list_dir_only() { +if [ $rsyncprefix == false ]; then + ls -d $d 2/dev/null $outputfile.tmp +else + ls -d $d 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp +fi +} + list_files_in_dir() { if [ ! -f $outputfile.tmp ]; then touch $outputfile.tmp fi -if [ $rsyncprefix == false ]; then - ls $d/*.gz 2/dev/null $outputfile.tmp - ls $d/*.bz2 2/dev/null $outputfile.tmp - ls $d/*.7z 2/dev/null $outputfile.tmp - ls $d/*.html 2/dev/null $outputfile.tmp - ls $d/*.txt 2/dev/null $outputfile.tmp +if [ $dirsonly == false ]; then + if [ $rsyncprefix == false ]; then + ls $d/*.gz 2/dev/null $outputfile.tmp + ls $d/*.bz2 2/dev/null $outputfile.tmp + ls $d/*.7z 2/dev/null $outputfile.tmp + ls $d/*.html 2/dev/null $outputfile.tmp + ls $d/*.txt 2/dev/null $outputfile.tmp + else + ls $d/*.gz 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp + ls $d/*.bz2 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp + ls $d/*.7z 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp + ls $d/*.html 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp + ls $d/*.txt 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp + fi else - ls $d/*.gz 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp - ls $d/*.bz2 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp - ls $d/*.7z 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp - ls $d/*.html 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp - ls $d/*.txt 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp + list_dir_only fi } @@ -121,7 +136,7 @@ fi } -if [ $# -lt 4 -o $# -gt 8 ]; then +if [ $# -lt 4 -o $# -gt 9 ]; then usage fi @@ -129,21 +144,28 @@ outputfile= configfile=wikidump.conf rsyncprefix=false +dirsonly=false while [ $# -gt 0 ]; do -if [ $1 == --dumpsnumber ]; then +if [ $1 == --dirsonly ]; then + dirsonly=true + shift +elif [ $1 == --dumpsnumber ]; then dumpsnumber=$2 + shift; shift elif [ $1 == --outputfile ]; then outputfile=$2 + shift; shift elif [ $1 == --configfile ]; then configfile=$2 + shift; shift elif [ $1 == --rsyncprefix ]; then rsyncprefix=$2 + shift; shift else echo $0: Unknown option $1 usage fi -shift; shift done check_args ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [109918] branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/109918 Revision: 109918 Author: ariel Date: 2012-01-24 10:34:43 + (Tue, 24 Jan 2012) Log Message: --- make getlatestdir actually get latest instead of first :-P Modified Paths: -- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py Modified: branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py === --- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py 2012-01-24 10:30:05 UTC (rev 109917) +++ branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py 2012-01-24 10:34:43 UTC (rev 109918) @@ -406,7 +406,7 @@ dirs = self.getIncDumpDirs() if dirs: if ok: -for dump in dirs: +for dump in reversed(dirs): statusInfo = StatusInfo(self._config, dump, self.wikiName) if statusInfo.getStatus(dump) == done: return dump ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [109919] branches/ariel/xmldumps-backup/create-rsync-list.sh
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/109919 Revision: 109919 Author: ariel Date: 2012-01-24 10:53:26 + (Tue, 24 Jan 2012) Log Message: --- eh, don't exit before creating the actual useful rsync file Modified Paths: -- branches/ariel/xmldumps-backup/create-rsync-list.sh Modified: branches/ariel/xmldumps-backup/create-rsync-list.sh === --- branches/ariel/xmldumps-backup/create-rsync-list.sh 2012-01-24 10:34:43 UTC (rev 109918) +++ branches/ariel/xmldumps-backup/create-rsync-list.sh 2012-01-24 10:53:26 UTC (rev 109919) @@ -180,7 +180,6 @@ fi if [ -f $outputfile.tmp ]; then mv $outputfile.tmp $outputfile -exit 0 else echo $0: no output file created. Something is wrong. exit 1 ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [109922] branches/ariel/xmldumps-backup/worker.py
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/109922 Revision: 109922 Author: ariel Date: 2012-01-24 11:17:27 + (Tue, 24 Jan 2012) Log Message: --- don't send email on failure for runs of isolated jobs Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2012-01-24 11:14:18 UTC (rev 109921) +++ branches/ariel/xmldumps-backup/worker.py2012-01-24 11:17:27 UTC (rev 109922) @@ -1383,7 +1383,7 @@ # everything that has to do with reporting the status of a piece # of a dump is collected here class Status(object): - def __init__(self, wiki, dumpDir, items, checksums, enabled, noticeFile = None, errorCallback=None, verbose = False): + def __init__(self, wiki, dumpDir, items, checksums, enabled, email = True, noticeFile = None, errorCallback=None, verbose = False): self.wiki = wiki self.dbName = wiki.dbName self.dumpDir = dumpDir @@ -1394,13 +1394,14 @@ self.failCount = 0 self.verbose = verbose self._enabled = enabled + self.email = email def updateStatusFiles(self, done=False): if self._enabled: self._saveStatusSummaryAndDetail(done) def reportFailure(self): - if self._enabled: + if self._enabled and self.email: if self.wiki.config.adminMail: subject = Dump failure for + self.dbName message = self.wiki.config.readTemplate(errormail.txt) % { @@ -1698,7 +1699,12 @@ # some or all of these dumpItems will be marked to run self.dumpItemList = DumpItemList(self.wiki, self.prefetch, self.spawn, self._chunkToDo, self.checkpointFile, self.jobRequested, self.chunkInfo, self.pageIDRange, self.runInfoFile, self.dumpDir) - self.status = Status(self.wiki, self.dumpDir, self.dumpItemList.dumpItems, self.checksums, self._statusEnabled, self.htmlNoticeFile, self.logAndPrint, self.verbose) + # only send email failure notices for full runs + if (self.jobRequested): + email = False + else: + email = True + self.status = Status(self.wiki, self.dumpDir, self.dumpItemList.dumpItems, self.checksums, self._statusEnabled, email, self.htmlNoticeFile, self.logAndPrint, self.verbose) def logQueueReader(self,log): if not log: ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [109614] branches/ariel/xmldumps-backup/worker.py
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/109614 Revision: 109614 Author: ariel Date: 2012-01-20 13:22:02 + (Fri, 20 Jan 2012) Log Message: --- typo in log file initalization Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2012-01-20 13:05:44 UTC (rev 109613) +++ branches/ariel/xmldumps-backup/worker.py2012-01-20 13:22:02 UTC (rev 109614) @@ -1685,7 +1685,7 @@ # these must come after the dumpdir setup so we know which directory we are in if (self._loggingEnabled and self._makeDirEnabled): fileObj = DumpFilename(self.wiki) - fileObj.newFromfilename(self.wiki.config.logFile) + fileObj.newFromFilename(self.wiki.config.logFile) self.logFileName = self.dumpDir.filenamePublicPath(fileObj) self.makeDir(os.path.join(self.wiki.publicDir(), self.wiki.date)) self.log = Logger(self.logFileName) ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [109616] branches/ariel/xmldumps-backup/worker
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/109616 Revision: 109616 Author: ariel Date: 2012-01-20 13:47:57 + (Fri, 20 Jan 2012) Log Message: --- make failout actually check fails in a row, not total; logging option; standard syntax for options now Modified Paths: -- branches/ariel/xmldumps-backup/worker Modified: branches/ariel/xmldumps-backup/worker === --- branches/ariel/xmldumps-backup/worker 2012-01-20 13:44:13 UTC (rev 109615) +++ branches/ariel/xmldumps-backup/worker 2012-01-20 13:47:57 UTC (rev 109616) @@ -1,32 +1,60 @@ #!/bin/bash -# number of failures of worker.py in a row before we decide +# default number of failures of worker.py in a row before we decide # something serious is broken and we refuse to run MAXFAILS=3 +# default: don't pass special config file +CONFIGFILE= +# default: no logging to file +LOG= + failures=0 - WIKIDUMP_BASE=`dirname $0` -if [ ! -z $1 ]; then -configFile=$1 +while [ $# -gt 0 ]; do +if [ $1 == --configfile ]; then + CONFIGFILE=$2 + shift; shift +elif [ $1 == --maxfails ]; then + MAXFAILS=$2 + shift; shift +elif [ $1 == --log ]; then + LOG=true + shift; +else + echo $0: Unknown option $1 + echo Usage: $0 [--configfile filename] [--log] [--maxfails num] + echo --configfile use specified file for config file (default: wikidump.conf) + echo --log write log of (almost) everything written to stderr (default: no logging) + echo --maxfails if more than this many dumps fail in a row, exit (default: 3) + exit 1 +fi +done + +# set up the command +pythonargs=( $WIKIDUMP_BASE/worker.py ) +if [ ! -z $CONFIGFILE ]; then +pythonargs=( ${pythonargs[@]} --configfile $CONFIGFILE ) fi +if [ ! -z $LOG ]; then +pythonargs=( ${pythonargs[@]} --log ) +fi while true; do if [ -e $WIKIDUMP_BASE/maintenance.txt ]; then echo in maintenance mode, sleeping 5 minutes sleep 300 else - if [ ! -z $configFile ]; then - python $WIKIDUMP_BASE/worker.py --configfile $configFile - else - python $WIKIDUMP_BASE/worker.py - fi + echo python ${pythonargs[@]} + python ${pythonargs[@]} if [ $? -ne 0 ]; then failures=$(($failures+1)) if [ $failures -gt $MAXFAILS ]; then echo more than $MAXFAILS failures in a row, halting. exit 1 fi + else + failures=0 fi echo sleeping sleep 30 ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [109202] trunk/extensions/CongressLookup/CongressLookup.db.php
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/109202 Revision: 109202 Author: ariel Date: 2012-01-17 20:03:50 + (Tue, 17 Jan 2012) Log Message: --- make the friggin dash in the zip work Modified Paths: -- trunk/extensions/CongressLookup/CongressLookup.db.php Modified: trunk/extensions/CongressLookup/CongressLookup.db.php === --- trunk/extensions/CongressLookup/CongressLookup.db.php 2012-01-17 20:02:44 UTC (rev 109201) +++ trunk/extensions/CongressLookup/CongressLookup.db.php 2012-01-17 20:03:50 UTC (rev 109202) @@ -21,7 +21,7 @@ table or to have NULL for the rep id. */ if ( ( !$row ) || ( !$row-clz5_rep_id ) ) { /* if we got the extra 4 digits, use them */ - $zip9 = intval( $zip ); + $zip9 = intval( self::trimZip( $zip, 9 ) ); // remove the dash and pad if needed if ( $zip9 = 1 ) { $row = $dbr-selectRow( 'cl_zip9', 'clz9_rep_id', array( 'clz9_zip' = $zip9 ) ); if ( $row ) { @@ -131,14 +131,22 @@ */ public static function trimZip( $zip, $length ) { $zip = trim( $zip ); - if ( strlen( $zip ) 5 ) { - $zip = sprintf( %05d, $zip ); + if ( strpos( $zip, '-' ) === False ) { + if ( strlen( $zip ) 5 ) { + $zip = sprintf( %05d, $zip ); + } + elseif ( strlen( $zip ) 5 ) { + $zip = sprintf( %09d, $zip ); + } } - elseif ( strlen( $zip ) 5 ) { - $zip = sprintf( %09d, $zip ); + else { + $zipPieces = explode( '-', $zip, 2 ); + if (! $zipPieces[1]) { + $zipPieces[1] = 0; + } + $zip = sprintf( %05d%04d, $zipPieces[0], $zipPieces[1] ); } - $zipPieces = explode( '-', $zip, 2 ); - $zip = substr( $zipPieces[0], 0, $length ); + $zip = substr( $zip, 0, $length ); return $zip; } } ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [108383] branches/ariel/xmldumps-backup/mwbzutils/mwbzlib.c
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/108383 Revision: 108383 Author: ariel Date: 2012-01-09 08:32:59 + (Mon, 09 Jan 2012) Log Message: --- tweak a couple error messages Modified Paths: -- branches/ariel/xmldumps-backup/mwbzutils/mwbzlib.c Modified: branches/ariel/xmldumps-backup/mwbzutils/mwbzlib.c === --- branches/ariel/xmldumps-backup/mwbzutils/mwbzlib.c 2012-01-09 06:36:40 UTC (rev 108382) +++ branches/ariel/xmldumps-backup/mwbzutils/mwbzlib.c 2012-01-09 08:32:59 UTC (rev 108383) @@ -313,7 +313,7 @@ } seekresult = lseek(fin, bfile-position, SEEK_SET); if (seekresult == (off_t)-1) { -fprintf(stderr,lseek of file to %PRId64 failed (7)\n,bfile-position); +fprintf(stderr,lseek of file to %PRId64 failed (9)\n,bfile-position); return(-1); } @@ -682,7 +682,7 @@ /* leave the file at the right position */ seekresult = lseek(fin, bfile-block_start, SEEK_SET); if (seekresult == (off_t)-1) { - fprintf(stderr,lseek of file to %PRId64 failed (7)\n,bfile-position); + fprintf(stderr,lseek of file to %PRId64 failed (8)\n,bfile-position); return(-1); } bfile-position = seekresult; ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [108384] branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/108384 Revision: 108384 Author: ariel Date: 2012-01-09 08:36:00 + (Mon, 09 Jan 2012) Log Message: --- catch the case where the page id requested is less than first page id in file Modified Paths: -- branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c Modified: branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c === --- branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c 2012-01-09 08:32:59 UTC (rev 108383) +++ branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c 2012-01-09 08:36:00 UTC (rev 108384) @@ -41,7 +41,7 @@ return(0); } else { -fprintf(stderr,failed to find the next frigging block marker\n); +fprintf(stderr,Failed to find the next block marker\n); return(-1); } } @@ -91,7 +91,7 @@ if (match_base_expr[1].rm_so =0) { hostname_length = match_base_expr[1].rm_eo - match_base_expr[1].rm_so; if (hostname_length sizeof(hostname)) { - fprintf(stderr,very long hostname, giving up\n); + fprintf(stderr,Very long hostname, giving up\n); break; } else { @@ -339,7 +339,7 @@ hopefully that doesn't take forever. */ if (buffer_count(2000/BUFINSIZE) rev_id) { - if (verbose) fprintf(stderr, passed cutoff for using api\n); + if (verbose) fprintf(stderr, passed retries cutoff for using api\n); if (use_api) { page_id_found = get_page_id_from_rev_id_via_api(rev_id, fin); } @@ -442,19 +442,23 @@ /* if we're this close, we'll check this value and be done with it */ if (iinfo-right_end -iinfo-left_end (off_t)2) { new_position = iinfo-left_end; +if (verbose = 2) fprintf(stderr, choosing new position (1) %PRId64\n,new_position); iinfo-right_end = iinfo-left_end; } else { if (iinfo-last_value iinfo-value_wanted) { - if (verbose =2) fprintf(stderr,resetting left end\n); + if (verbose = 2) fprintf(stderr,resetting left end\n); iinfo-left_end = iinfo-last_position; new_position = iinfo-last_position + interval; + if (verbose = 2) fprintf(stderr, choosing new position (2) %PRId64\n,new_position); } /* iinfo-last_value iinfo-value_wanted */ else { if (verbose =2) fprintf(stderr,resetting right end\n); iinfo-right_end = iinfo-last_position; new_position = iinfo-last_position - interval; + if (new_position 0) new_position = 0; + if (verbose = 2) fprintf(stderr, choosing new position (3) %PRId64\n,new_position); } } res = get_first_page_id_after_offset(fin, new_position, pinfo, use_api, use_stub, stubfilename, verbose); @@ -550,7 +554,7 @@ else if (optc=='v') verbose++; else if (optc==-1) break; -else usage(argv[0],unknown option or other error\n); +else usage(argv[0],Unknown option or other error\n); } if (! filename || ! page_id) { @@ -558,12 +562,12 @@ } if (page_id 1) { -usage(argv[0], please specify a page_id = 1.\n); +usage(argv[0], Please specify a page_id = 1.\n); } fin = open (filename, O_RDONLY); if (fin 0) { -fprintf(stderr,failed to open file %s for read\n, argv[1]); +fprintf(stderr,Failed to open file %s for read\n, argv[1]); exit(1); } @@ -585,7 +589,7 @@ iinfo.last_position = (off_t)0; } else { -fprintf(stderr,failed to get anything useful from the beginning of the file even, bailing.\n); +fprintf(stderr,Failed to find any page from start of file, exiting\n); exit(1); } if (pinfo.page_id == page_id) { @@ -593,18 +597,26 @@ fprintf(stdout,position:%PRId64 page_id:%d\n,pinfo.position, pinfo.page_id); exit(0); } - + if (pinfo.page_id page_id) { +fprintf(stderr,Page requested is less than first page id in file\n); +exit(-1); + } while (1) { res = do_iteration(iinfo, fin, pinfo, use_api, use_stub, stubfile, verbose); -/* things to check: bad return? interval is 0 bytes long? */ -if (iinfo.left_end == iinfo.right_end) { - fprintf(stdout,position:%PRId64 page_id:%d\n,pinfo.position, pinfo.page_id); - exit(0); -} -else if (res 0) { - fprintf(stderr,broken and quitting\n); +if (res 0) { + fprintf(stderr,Error encountered during search\n); exit(-1); } +else if (iinfo.left_end == iinfo.right_end) { + if ( pinfo.page_id = page_id) { + fprintf(stdout,position:%PRId64 page_id:%d\n,pinfo.position, pinfo.page_id); + exit(0); + } + else { + fprintf(stderr,File does not contain requested page id\n); + exit(-1); + } +} } exit(0); } ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org
[MediaWiki-CVS] SVN: [108204] branches/ariel/xmldumps-backup/writeuptopageid.c
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/108204 Revision: 108204 Author: ariel Date: 2012-01-06 09:16:53 + (Fri, 06 Jan 2012) Log Message: --- skip potential garbage after siteinfo header and before page tag Modified Paths: -- branches/ariel/xmldumps-backup/writeuptopageid.c Modified: branches/ariel/xmldumps-backup/writeuptopageid.c === --- branches/ariel/xmldumps-backup/writeuptopageid.c2012-01-06 09:14:45 UTC (rev 108203) +++ branches/ariel/xmldumps-backup/writeuptopageid.c2012-01-06 09:16:53 UTC (rev 108204) @@ -4,7 +4,7 @@ #include errno.h #include string.h -typedef enum { None, StartHeader, StartPage, AtPageID, WriteMem, Write, EndPage, AtLastPageID } States; +typedef enum { None, StartHeader, EndHeader, StartPage, AtPageID, WriteMem, Write, EndPage, AtLastPageID } States; /* assume the header is never going to be longer than 1000 x 80 4-byte characters... how many namespaces will one project want? */ @@ -29,9 +29,20 @@ States setState (char *line, States currentState, int startPageID, int endPageID) { int pageID = 0; + if (currentState == EndHeader) { +/* if we have junk after the header we don't write it. + commands like dumpbz2filefromoffset can produce such streams. */ +if (strncmp(line,page,6)) { + return(None); +} + } + if (!strncmp(line,mediawiki,10)) { return(StartHeader); } + else if (!strncmp(line,/siteinfo,11)) { +return(EndHeader); + } else if (!strncmp(line,page,6)) { return(StartPage); } @@ -87,7 +98,7 @@ /* returns 1 on success, 0 on error */ int writeIfNeeded(char *line, States state) { - if (state == StartHeader || state == WriteMem || state == Write || state == EndPage) { + if (state == StartHeader || state == EndHeader || state == WriteMem || state == Write || state == EndPage) { return(fwrite(line,strlen(line),1,stdout)); } } ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [108011] branches/ariel/xmldumps-backup/mwbzutils/Makefile
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/108011 Revision: 108011 Author: ariel Date: 2012-01-04 10:58:29 + (Wed, 04 Jan 2012) Log Message: --- bump version number Modified Paths: -- branches/ariel/xmldumps-backup/mwbzutils/Makefile Modified: branches/ariel/xmldumps-backup/mwbzutils/Makefile === --- branches/ariel/xmldumps-backup/mwbzutils/Makefile 2012-01-04 10:51:57 UTC (rev 108010) +++ branches/ariel/xmldumps-backup/mwbzutils/Makefile 2012-01-04 10:58:29 UTC (rev 108011) @@ -84,7 +84,7 @@ distclean: clean -DISTNAME=mwbzutils-0.0.2 +DISTNAME=mwbzutils-0.0.3 dist: rm -f $(DISTNAME) ln -s -f . $(DISTNAME) ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [107870] branches/ariel/xmldumps-backup
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/107870 Revision: 107870 Author: ariel Date: 2012-01-03 09:06:41 + (Tue, 03 Jan 2012) Log Message: --- add support for creation of a multiple bz2 stream dump of articles Modified Paths: -- branches/ariel/xmldumps-backup/WikiDump.py branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/WikiDump.py === --- branches/ariel/xmldumps-backup/WikiDump.py 2012-01-03 09:05:11 UTC (rev 107869) +++ branches/ariel/xmldumps-backup/WikiDump.py 2012-01-03 09:06:41 UTC (rev 107870) @@ -203,6 +203,7 @@ grep: /bin/grep, checkforbz2footer: /usr/local/bin/checkforbz2footer, writeuptopageid: /usr/local/bin/writeuptopageid, + recompressxml: /usr/local/bin/recompressxml, #cleanup: { keep: 3, #chunks: { @@ -284,6 +285,7 @@ self.grep = self.conf.get(tools, grep) self.checkforbz2footer = self.conf.get(tools,checkforbz2footer) self.writeuptopageid = self.conf.get(tools,writeuptopageid) + self.recompressxml = self.conf.get(tools,recompressxml) if not self.conf.has_section('cleanup'): self.conf.add_section('cleanup') Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2012-01-03 09:05:11 UTC (rev 107869) +++ branches/ariel/xmldumps-backup/worker.py2012-01-03 09:06:41 UTC (rev 107870) @@ -688,6 +688,19 @@ Recombine all pages with complete edit history (.7z), These dumps can be *very* large, uncompressing up to 100 times the archive download size. + Suitable for archival and statistical use, most mirror sites won't want or need this., self.findItemByName('metahistory7zdump'), self.wiki)) + # doing this only for recombined/full articles dump + if (self.chunkInfo.chunksEnabled()): + inputForMultistream = articlesdumprecombine + else: + inputForMultistream = articlesdump + self.dumpItems.append( + XmlMultiStreamDump(articles, + articlesmultistreamdump, + Articles, templates, media/file descriptions, and primary meta-pages, in multiple bz2 streams, 100 pages per stream, + This contains current versions of article content, in concatenated bz2 streams, 100 pages per stream, plus a separate + + index of page titles/ids and offsets into the file. Useful for offline readers, or for parallel processing of pages., + self.findItemByName(inputForMultistream), self.wiki, None)) + results = self._runInfoFile.getOldRunInfoFromFile() if (results): for runInfoObj in results: @@ -3326,6 +3339,161 @@ if (error): raise BackupError(error recombining xml bz2 files) +class XmlMultiStreamDump(XmlDump): +#class XmlRecompressDump(Dump): + Take a .bz2 and recompress it as multistream bz2, 100 pages per stream. + + def __init__(self, subset, name, desc, detail, itemForRecompression, wiki, chunkToDo, chunks = False, checkpoints = False, checkpointFile = None): + self._subset = subset + self._detail = detail + self._chunks = chunks + if self._chunks: + self._chunksEnabled = True + self._chunkToDo = chunkToDo + self.wiki = wiki + self.itemForRecompression = itemForRecompression + if checkpoints: + self._checkpointsEnabled = True + self.checkpointFile = checkpointFile + Dump.__init__(self, name, desc) + + def getDumpName(self): + return pages- + self._subset + + def getFileType(self): + return xml + + def getFileExt(self): + return bz2 + + def getDumpNameMultistream(self, name): + return name + -multistream + + def getDumpNameMultistreamIndex(self, name): + return self.getDumpNameMultistream(name) + -index + + def getFileMultistreamName(self, f): + assuming that f is the name of an input file, + return the name of the associated multistream output file
[MediaWiki-CVS] SVN: [107871] branches/ariel/xmldumps-backup/worker.py
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/107871 Revision: 107871 Author: ariel Date: 2012-01-03 09:22:24 + (Tue, 03 Jan 2012) Log Message: --- check job dependencies for article bz2 multistream Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2012-01-03 09:06:41 UTC (rev 107870) +++ branches/ariel/xmldumps-backup/worker.py2012-01-03 09:22:24 UTC (rev 107871) @@ -801,6 +801,14 @@ if ((job == metahistorybz2dump) or (job == metacurrentdump) or (job == articlesdump)): if (not self.jobDoneSuccessfully(xmlstubsdump)): return False + if (job == articlesmultistreamdump): + if (self.chunkInfo.chunksEnabled()): + if (not self.jobDoneSuccessfully(articlesdumprecombine)): + return False + else: + if (not self.jobDoneSuccessfully(articlesdump)): + return False + return True def _getChunkToDo(self, jobName): ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [107839] branches/ariel/xmldumps-backup/mwbzutils
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/107839 Revision: 107839 Author: ariel Date: 2012-01-02 17:20:24 + (Mon, 02 Jan 2012) Log Message: --- utility to compress an input stream into multiple bz2 streams on output, with index of pages and offsets Modified Paths: -- branches/ariel/xmldumps-backup/mwbzutils/Makefile branches/ariel/xmldumps-backup/mwbzutils/README Added Paths: --- branches/ariel/xmldumps-backup/mwbzutils/recompressxml.c Modified: branches/ariel/xmldumps-backup/mwbzutils/Makefile === --- branches/ariel/xmldumps-backup/mwbzutils/Makefile 2012-01-02 17:16:10 UTC (rev 107838) +++ branches/ariel/xmldumps-backup/mwbzutils/Makefile 2012-01-02 17:20:24 UTC (rev 107839) @@ -29,7 +29,8 @@ all: checkforbz2footer \ dumpbz2filefromoffset \ dumplastbz2block \ - findpageidinbz2xml + findpageidinbz2xml \ + recompressxml dumplastbz2block: $(OBJSBZ) mwbzlib.o dumplastbz2block.o $(CC) $(CFLAGS) $(LDFLAGS) -o dumplastbz2block dumplastbz2block.o mwbzlib.o $(OBJSBZ) -lbz2 @@ -43,20 +44,26 @@ dumpbz2filefromoffset: $(OBJSBZ) mwbzlib.o dumpbz2filefromoffset.o $(CC) $(CFLAGS) $(LDFLAGS) -o dumpbz2filefromoffset dumpbz2filefromoffset.o mwbzlib.o $(OBJSBZ) -lbz2 -install: dumplastbz2block findpageidinbz2xml checkforbz2footer dumpbz2filefromoffset +recompressxml: $(OBJSBZ) recompressxml.o + $(CC) $(CFLAGS) $(LDFLAGS) -o recompressxml recompressxml.o -lbz2 + +install: dumplastbz2block findpageidinbz2xml checkforbz2footer dumpbz2filefromoffset recompressxml if ( test ! -d $(PREFIX)/bin ) ; then mkdir -p $(PREFIX)/bin ; fi cp -f dumplastbz2block $(PREFIX)/bin/dumplastbz2block cp -f findpageidinbz2xml $(PREFIX)/bin/findpageidinbz2xml cp -f checkforbz2footer $(PREFIX)/bin/checkforbz2footer cp -f dumpbz2filefromoffset $(PREFIX)/bin/dumpbz2filefromoffset + cp -f recompressxml $(PREFIX)/bin/recompressxml chmod a+x $(PREFIX)/bin/dumplastbz2block chmod a+x $(PREFIX)/bin/findpageidinbz2xml chmod a+x $(PREFIX)/bin/checkforbz2footer chmod a+x $(PREFIX)/bin/dumpbz2filefromoffset + chmod a+x $(PREFIX)/bin/recompressxml clean: rm -f *.o *.a dumplastbz2block findpageidinbz2xml \ - checkforbz2footer dumpbz2filefromoffset + checkforbz2footer dumpbz2filefromoffset \ + recompressxml bzlibfuncs.o: bzlibfuncs.c bzlib.h bzlib_private.h $(CC) $(CFLAGS) -c bzlibfuncs.c @@ -72,6 +79,8 @@ $(CC) $(CFLAGS) -c checkforbz2footer.c dumpbz2filefromoffset.o: dumpbz2filefromoffset.c $(CC) $(CFLAGS) -c dumpbz2filefromoffset.c +recompressxml.o: recompressxml.c + $(CC) $(CFLAGS) -c recompressxml.c distclean: clean @@ -80,6 +89,7 @@ rm -f $(DISTNAME) ln -s -f . $(DISTNAME) tar cvf $(DISTNAME).tar \ + $(DISTNAME)/recompressxml.c \ $(DISTNAME)/dumplastbz2block.c \ $(DISTNAME)/findpageidinbz2xml.c \ $(DISTNAME)/checkforbz2footer.c \ Modified: branches/ariel/xmldumps-backup/mwbzutils/README === --- branches/ariel/xmldumps-backup/mwbzutils/README 2012-01-02 17:16:10 UTC (rev 107838) +++ branches/ariel/xmldumps-backup/mwbzutils/README 2012-01-02 17:20:24 UTC (rev 107839) @@ -42,6 +42,18 @@ position:x pageid:nnn It exits with 0 on success, -1 on error. +recompresszml - Reads an xml stream of pages and writes multiple bz2 compressed + streams, concatenated, to stdout, with the specified number of + pages per stream. The mediawiki site info header is in its + own bz2 stream. Each stream can be extracted as a separate file + by an appropriate tool, checking for the byte-aligned string BZh91AYSY + and a following page tag (after uncompressing the first chunk + of data after that string). Alternatively, a tool can seek to + the location of one of the streams in order to find a particular + page. An index of file-offset:page-id:page-title lines + is written to a specified file if desired; the index file will be + bz2 compressed if the filename given ends with .bz2. + Library routines: mwbz2lib.c- various utility functions (bitmasks, shifting and comparing bytes, Added: branches/ariel/xmldumps-backup/mwbzutils/recompressxml.c === --- branches/ariel/xmldumps-backup/mwbzutils/recompressxml.c (rev 0) +++ branches/ariel/xmldumps-backup/mwbzutils
[MediaWiki-CVS] SVN: [107841] branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/107841 Revision: 107841 Author: ariel Date: 2012-01-02 17:24:54 + (Mon, 02 Jan 2012) Log Message: --- add verbose option Modified Paths: -- branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c Modified: branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c === --- branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c 2012-01-02 17:22:25 UTC (rev 107840) +++ branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c 2012-01-02 17:24:54 UTC (rev 107841) @@ -226,12 +226,12 @@ format: ?xml version=1.0?apiquerypagespage pageid=6215 ns=0 title=hystérique //pages/query/api */ -match_page_id_expr = (regmatch_t *)malloc(sizeof(regmatch_t)*2); +match_page_id_expr = (regmatch_t *)malloc(sizeof(regmatch_t)*3); res = regcomp(compiled_page_id_expr, page_id_expr, REG_EXTENDED); -if (regexec(compiled_page_id_expr, buffer, 2, match_page_id_expr, 0 ) == 0) { - if (match_page_id_expr[1].rm_so =0) { - page_id = atol(buffer + match_page_id_expr[1].rm_so); +if (regexec(compiled_page_id_expr, buffer, 3, match_page_id_expr, 0 ) == 0) { + if (match_page_id_expr[2].rm_so =0) { + page_id = atol(buffer + match_page_id_expr[2].rm_so); } } return(page_id); @@ -250,13 +250,13 @@ 0 if no pageid found, -1 on error */ -int get_first_page_id_after_offset(int fin, off_t position, page_info_t *pinfo, int use_api, int use_stub, char *stubfilename) { +int get_first_page_id_after_offset(int fin, off_t position, page_info_t *pinfo, int use_api, int use_stub, char *stubfilename, int verbose) { int res; regmatch_t *match_page, *match_page_id, *match_rev, *match_rev_id; regex_t compiled_page, compiled_page_id, compiled_rev, compiled_rev_id; int length=5000; /* output buffer size */ char *page = page; - char *page_id = page\n[ ]+title[^]+/title\n[ ]+id([0-9]+)/id\n; + char *page_id = page\n[ ]+title[^]+/title\n([ ]+ns[0-9]+/ns\n)?[ ]+id([0-9]+)/id\n; char *rev = revision; char *rev_id_expr = revision\n[ ]+id([0-9]+)/id\n; @@ -275,7 +275,7 @@ res = regcomp(compiled_rev_id, rev_id_expr, REG_EXTENDED); match_page = (regmatch_t *)malloc(sizeof(regmatch_t)*1); - match_page_id = (regmatch_t *)malloc(sizeof(regmatch_t)*2); + match_page_id = (regmatch_t *)malloc(sizeof(regmatch_t)*3); match_rev = (regmatch_t *)malloc(sizeof(regmatch_t)*1); match_rev_id = (regmatch_t *)malloc(sizeof(regmatch_t)*2); @@ -288,21 +288,23 @@ bfile.bytes_read = 0; if (find_first_bz2_block_from_offset(bfile, fin, position, FORWARD) = (off_t)0) { -/* fprintf(stderr,failed to find block in bz2file (1)\n); */ +if (verbose) fprintf(stderr,failed to find block in bz2file after offset %PRId64 (1)\n, position); return(-1); } + if (verbose) fprintf(stderr,found first block in bz2file after offset %PRId64\n, position); + while (!get_buffer_of_uncompressed_data(b, fin, bfile, FORWARD) (! bfile.eof)) { buffer_count++; +if (verbose =2) fprintf(stderr,buffers read: %d\n, buffer_count); if (bfile.bytes_written) { - while (regexec(compiled_page_id, (char *)b-next_to_read, 2, match_page_id, 0 ) == 0) { - if (match_page_id[1].rm_so =0) { - /* write page_id to stderr */ - /* - fwrite(b-next_to_read+match_page_id[1].rm_so, sizeof(unsigned char), match_page_id[1].rm_eo - match_page_id[1].rm_so, stderr); + while (regexec(compiled_page_id, (char *)b-next_to_read, 3, match_page_id, 0 ) == 0) { + if (match_page_id[2].rm_so =0) { + if (verbose){ + fwrite(b-next_to_read+match_page_id[2].rm_so, sizeof(unsigned char), match_page_id[2].rm_eo - match_page_id[2].rm_so, stderr); fwrite(\n,1,1,stderr); - */ - pinfo-page_id = atoi((char *)(b-next_to_read+match_page_id[1].rm_so)); + } + pinfo-page_id = atoi((char *)(b-next_to_read+match_page_id[2].rm_so)); pinfo-position = bfile.block_start; pinfo-bits_shifted = bfile.bits_shifted; return(1); @@ -337,6 +339,7 @@ hopefully that doesn't take forever. */ if (buffer_count(2000/BUFINSIZE) rev_id) { + if (verbose) fprintf(stderr, passed cutoff for using api\n); if (use_api) { page_id_found = get_page_id_from_rev_id_via_api(rev_id, fin); } @@ -420,7 +423,7 @@ return value from guess, or -1 on error. */ -int do_iteration(iter_info_t *iinfo, int fin, page_info_t *pinfo, int use_api, int use_stub, char *stubfilename) { +int do_iteration(iter_info_t *iinfo, int fin, page_info_t *pinfo, int use_api, int use_stub, char *stubfilename, int verbose) { int res; off_t new_position; off_t interval; @@ -434,7 +437,8 @@ if (interval == (off_t)0
[MediaWiki-CVS] SVN: [106443] branches/ariel/xmldumps-backup/worker.py
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/106443 Revision: 106443 Author: ariel Date: 2011-12-16 15:57:06 + (Fri, 16 Dec 2011) Log Message: --- * Add aftercheckpoint option which will restart a job step from immediately after the specified checkpoint file * When doing dump using pageidrange, delete old files from same date covering that same range of pages; also true for aftercheckpoint option * Clean up old symlinks and rss feed entries when running latestlinks job * verbose option for more ... verbosity (debugging) * In human-readable description of various dump files, change image to media/files (thanks to Danny_B for that) Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-12-16 15:45:55 UTC (rev 106442) +++ branches/ariel/xmldumps-backup/worker.py2011-12-16 15:57:06 UTC (rev 106443) @@ -18,6 +18,7 @@ import CommandManagement import Queue import thread +import traceback from os.path import exists from subprocess import Popen, PIPE @@ -369,9 +370,10 @@ pass class RunInfoFile(object): - def __init__(self, wiki, enabled): + def __init__(self, wiki, enabled, verbose = False): self.wiki = wiki self._enabled = enabled + self.verbose = verbose def saveDumpRunInfoFile(self, text): Write out a simple text file with the status for this wiki's dump. @@ -379,6 +381,9 @@ try: self._writeDumpRunInfoFile(text) except: + if (self.verbose): + exc_type, exc_value, exc_traceback = sys.exc_info() + print repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) print Couldn't save dump run info file. Continuing anyways def statusOfOldDumpIsDone(self, runner, date, jobName, jobDesc): @@ -410,6 +415,9 @@ infile.close return results except: + if (self.verbose): + exc_type, exc_value, exc_traceback = sys.exc_info() + print repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) return False # @@ -481,6 +489,9 @@ infile.close return None except: + if (self.verbose): + exc_type, exc_value, exc_traceback = sys.exc_info() + print repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) return None # find desc in there, look for class='done' @@ -506,6 +517,9 @@ infile.close return None except: + if (self.verbose): + exc_type, exc_value, exc_traceback = sys.exc_info() + print repr(traceback.format_exception(exc_type, exc_value, exc_traceback)) return None @@ -591,11 +605,11 @@ #PrivateTable(filearchive, filearchivetable, Deleted image data), PublicTable(site_stats, sitestatstable, A few statistics such as the page count.), - PublicTable(image, imagetable, Metadata on current versions of uploaded images.), - PublicTable(oldimage, oldimagetable, Metadata on prior versions of uploaded images.), + PublicTable(image, imagetable, Metadata on current versions of uploaded media/files.), + PublicTable(oldimage, oldimagetable, Metadata on prior versions of uploaded media/files.), PublicTable(pagelinks, pagelinkstable, Wiki page-to-page link records.), PublicTable(categorylinks, categorylinkstable, Wiki category membership link records.), - PublicTable(imagelinks, imagelinkstable, Wiki image usage records.), + PublicTable(imagelinks, imagelinkstable, Wiki media/files usage records.), PublicTable(templatelinks, templatelinkstable, Wiki template inclusion link records.), PublicTable(externallinks, externallinkstable, Wiki external URL link records.), PublicTable(langlinks, langlinkstable, Wiki interlanguage link records.), @@ -627,10 +641,10 @@ self.dumpItems.append( XmlDump(articles, articlesdump
[MediaWiki-CVS] SVN: [106020] branches/ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/106020 Revision: 106020 Author: ariel Date: 2011-12-13 13:25:12 + (Tue, 13 Dec 2011) Log Message: --- some scripts we used for looking at thumbs counts, sizes, etc... in case we need 'em again Added Paths: --- branches/ariel/tools/ branches/ariel/tools/thumbs/ branches/ariel/tools/thumbs/crunchinglogs/ branches/ariel/tools/thumbs/crunchinglogs/README branches/ariel/tools/thumbs/crunchinglogs/datascripts/ branches/ariel/tools/thumbs/crunchinglogs/datascripts/thumbDateAnalysis.py branches/ariel/tools/thumbs/crunchinglogs/datascripts/thumbFilesSizesCounts.py branches/ariel/tools/thumbs/crunchinglogs/datascripts/thumbPxSize.py branches/ariel/tools/thumbs/crunchinglogs/otherscripts/ branches/ariel/tools/thumbs/crunchinglogs/otherscripts/checkExistingThumbDirs.py branches/ariel/tools/thumbs/crunchinglogs/otherscripts/listFileNames.py branches/ariel/tools/thumbs/crunchinglogs/otherscripts/listThumbFilesByDir.py branches/ariel/tools/thumbs/crunchinglogs/otherscripts/removeThumbDirs.py branches/ariel/tools/thumbs/crunchinglogs/samples/ branches/ariel/tools/thumbs/crunchinglogs/samples/do-dateanal-dates-created.sh branches/ariel/tools/thumbs/crunchinglogs/samples/do-dateanal-dates.sh branches/ariel/tools/thumbs/crunchinglogs/samples/do-pixel-sizes.sh Added: branches/ariel/tools/thumbs/crunchinglogs/README === --- branches/ariel/tools/thumbs/crunchinglogs/README (rev 0) +++ branches/ariel/tools/thumbs/crunchinglogs/README2011-12-13 13:25:12 UTC (rev 106020) @@ -0,0 +1,16 @@ +These scripts were written so we could get some notion of what was going on with thumbs, given that +we don't keep logs and the host is i/o-bound so we can't just do a pile of finds. + +I can't imagine they will be useful to someone else but they might be useful to us sometime, who knows + +stats on the thumbs files on the filesystem: +* go to (for example) commons/thumb/0/00, run an ls --sort=none, capture results into some file +* cat the input of that to python listThumbFilesByDir.py and save the output of that to a file +* filter it as needed for crap names, results into 0-00-files.txt.nobad +* now you can run the following: do-dateanal-dates-created.sh do-dateanal-dates.sh do-pixel-sizes.sh + they will create a little pile of files aug*txt sept*txt etc. + +stats on googlebot requests: +* go to locke, zcat sample*log*gz | grep Googlebot-Image and gzip the output into googlebot-image-requests.gz +* now you can run check-all-dates.sh and it produces a small pile of output files + Added: branches/ariel/tools/thumbs/crunchinglogs/datascripts/thumbDateAnalysis.py === --- branches/ariel/tools/thumbs/crunchinglogs/datascripts/thumbDateAnalysis.py (rev 0) +++ branches/ariel/tools/thumbs/crunchinglogs/datascripts/thumbDateAnalysis.py 2011-12-13 13:25:12 UTC (rev 106020) @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- + +import os +import re +import sys +import time +import getopt + +def usage(message=None): +print Usage: %s [--sdate=date --edate=date --created [filename] % sys.argv[0] +print sdate: start date for which to print stats, default: earliest date in file +print edate: end date for which to print stats, default: latest date in file +print created: show only the number of files and sizes on the date the first thumb +print was created (presumably the date the image itself was first uploaded) +print +print Date format for sdate and edate: -mm-dd +print +print If no filename is specified, input is read from stdin +print +print Format of input file: (sample line) +print 2011-10-29 01:57:51 100311 Festiwal_Słowian_i_Wikingów_2009_121.jpg/640px-Festiwal_Słowian_i_Wikingów_2009_121.jpg +print date in -mm-dd format, time in hh:mm::ss format, size in bytes, thumb directory/thumb filename +sys.exit(1) + +if __name__ == __main__: +sdate = None +edate = None +created = False +try: +(options, remainder) = getopt.gnu_getopt(sys.argv[1:], , + [ 'sdate=', 'edate=', 'created' ]) +except: +usage(Unknown option specified) + +for (opt, val) in options: +if opt == --sdate: +sdate = val +elif opt == --edate: +edate = val +elif opt == --created: +created = True + +dateexp = re.compile(r^\d{4}-\d{2}-\d{2}$) +for d in filter(None, [ sdate, edate ]): +if not dateexp.match(d): +usage(Bad date format.) + +if len(remainder) == 1: +inputFile = remainder[0] +fHandle = open(inputFile,r) +elif len(remainder) == 0: +fHandle = sys.stdin
[MediaWiki-CVS] SVN: [105887] branches/ariel/xmldumps-backup
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/105887 Revision: 105887 Author: ariel Date: 2011-12-12 15:23:04 + (Mon, 12 Dec 2011) Log Message: --- run specified query on list of wikis, one gzipped output file for each, files named by date and project Added Paths: --- branches/ariel/xmldumps-backup/wikiqueries/ branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.conf.sample branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.py Added: branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.conf.sample === --- branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.conf.sample (rev 0) +++ branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.conf.sample 2011-12-12 15:23:04 UTC (rev 105887) @@ -0,0 +1,23 @@ +[wiki] +mediawiki=/home/wmf/mediawiki/1.18 +allwikislist=/home/wmf/conf/all.dblist +privatewikislist=/home/wmf/conf/private.dblist +closedwikislist=/home/wmf/conf/closed.dblist + +[output] +wikiqueriesdir=/home/wmf/output/files +temp=/var/tmp +fileperms=0644 + +[database] +user=dbadmin +password=X + +[tools] +php=/usr/bin/php +mysql=/usr/bin/mysql +gzip=/usr/bin/gzip +bzip2=/usr/bin/bzip2 + +[query] +queryfile=/home/wmf/scripts/query.sql Added: branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.py === --- branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.py (rev 0) +++ branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.py 2011-12-12 15:23:04 UTC (rev 105887) @@ -0,0 +1,352 @@ +# for every wiki, run a specified query, gzipping the output. +# there's a config file which needs to be set up. + +import getopt +import os +import re +import sys +import ConfigParser +import subprocess +import socket +import time +from subprocess import Popen, PIPE +from os.path import exists +import hashlib +import traceback +import shutil + +class ContentFile(object): +def __init__(self, config, date, wikiName): +self._config = config +self.date = date +self.queryDir = QueryDir(self._config) +self.wikiName = wikiName + +# override this. +def getFileName(self): +return content.txt + +def getPath(self): +return os.path.join(self.queryDir.getQueryDir(),self.getFileName()) + +class OutputFile(ContentFile): +def getFileName(self): +return %s-%s-wikiquery.gz % ( self.wikiName, self.date ) + +class Config(object): +def __init__(self, configFile=False): +self.projectName = False + +home = os.path.dirname(sys.argv[0]) +if (not configFile): +configFile = wikiqueries.conf +self.files = [ +os.path.join(home,configFile), +/etc/wikqueries.conf, +os.path.join(os.getenv(HOME), .wikiqueries.conf)] +defaults = { +#wiki: { +allwikislist: , +privatewikislist: , +closedwikislist: , +#output: { +wikiqueriesdir: /wikiqueries, +temp:/wikiqueries/temp, +fileperms: 0640, +#database: { +user: root, +password: , +#tools: { +php: /bin/php, +gzip: /usr/bin/gzip, +bzip2: /usr/bin/bzip2, +mysql: /usr/bin/mysql, +multiversion: , +#query:{ +queryfile: wikiquery.sql +} + +self.conf = ConfigParser.SafeConfigParser(defaults) +self.conf.read(self.files) + +if not self.conf.has_section(wiki): +print The mandatory configuration section 'wiki' was not defined. +raise ConfigParser.NoSectionError('wiki') + +if not self.conf.has_option(wiki,mediawiki): +print The mandatory setting 'mediawiki' in the section 'wiki' was not defined. +raise ConfigParser.NoOptionError('wiki','mediawiki') + +self.parseConfFile() + +def parseConfFile(self): +self.mediawiki = self.conf.get(wiki, mediawiki) +self.allWikisList = MiscUtils.dbList(self.conf.get(wiki, allwikislist)) +self.privateWikisList = MiscUtils.dbList(self.conf.get(wiki, privatewikislist)) +self.closedWikisList = MiscUtils.dbList(self.conf.get(wiki, closedwikislist)) + +if not self.conf.has_section('output'): +self.conf.add_section('output') +self.wikiQueriesDir = self.conf.get(output, wikiqueriesdir) +self.tempDir = self.conf.get(output, temp) +self.fileperms = self.conf.get(output, fileperms) +self.fileperms = int(self.fileperms,0) + +if not self.conf.has_section('database'): +self.conf.add_section('database') +self.dbUser = self.conf.get(database, user) +self.dbPassword = self.conf.get(database, password) + +if not self.conf.has_section('tools
[MediaWiki-CVS] SVN: [104422] branches/ariel/xmldumps-backup/incrementals
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/104422 Revision: 104422 Author: ariel Date: 2011-11-28 11:53:49 + (Mon, 28 Nov 2011) Log Message: --- config option determines when locks for maxrevid phase are stale; cmd line option to clean up stale locks encountered during run Modified Paths: -- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py Modified: branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py === --- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py 2011-11-28 11:41:47 UTC (rev 104421) +++ branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py 2011-11-28 11:53:49 UTC (rev 104422) @@ -10,6 +10,7 @@ import subprocess from subprocess import Popen, PIPE import shutil +import time class ContentFile(object): def __init__(self, config, date, wikiName): @@ -114,6 +115,18 @@ except: return False +def isStaleLock(self): +if not self.isLocked(): +return False +try: +timestamp = os.stat(self.lockFile.getPath()).st_mtime +except: +return False +if (time.time() - timestamp) self._config.staleInterval: +return True +else: +return False + def unlock(self): os.remove(self.lockFile.getPath()) @@ -161,6 +174,7 @@ webroot: http://localhost/dumps/incr;, fileperms: 0640, delay: 43200, +maxrevidstaleinterval: 3600, #database: { user: root, password: , @@ -206,6 +220,8 @@ self.fileperms = int(self.fileperms,0) self.delay = self.conf.get(output, delay) self.delay = int(self.delay,0) +self.staleInterval = self.conf.get(output, maxrevidstaleinterval) +self.staleInterval = int(self.staleInterval,0) if not self.conf.has_section('tools'): self.conf.add_section('tools') Modified: branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py === --- branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py 2011-11-28 11:41:47 UTC (rev 104421) +++ branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py 2011-11-28 11:53:49 UTC (rev 104422) @@ -40,11 +40,12 @@ return exists(self.maxRevIdFile.getPath()) class MaxIDDump(object): -def __init__(self,config, date, verbose): +def __init__(self,config, date, verbose, cleanupStale): self._config = config self.date = date self.incrDir = IncrementDir(self._config, self.date) self.verbose = verbose +self.cleanupStale = cleanupStale def doOneWiki(self, w): success = True @@ -52,7 +53,23 @@ if not exists(self.incrDir.getIncDir(w)): os.makedirs(self.incrDir.getIncDir(w)) lock = MaxRevIDLock(self._config, self.date, w) -if lock.getLock(): +lockResult = lock.getLock() +if not lockResult: +if (self.verbose): +print failed to get lock for wiki, w +if lock.isStaleLock(): +if (self.verbose): +print lock is stale for wiki, w +# this option should be given to one process only, or you could have trouble. +if (self.cleanupStale): +lock.unlock() +lockResult = lock.getLock() +if (self.verbose): +print stale lock removed and trying again to get for wiki, w +if lockResult: +if (self.verbose): +print got lock ,lock.lockFile.getFileName() +print checking max rev id for wiki, w try: maxRevID = MaxRevID(self._config, w, self.date) if not maxRevID.exists(): @@ -66,10 +83,10 @@ else: if (self.verbose): print Wiki , w, failed to get lock. -traceback.print_exc(file=sys.stdout) +success = False if success: if (self.verbose): -print Success! Wiki, w, adds/changes dump complete. +print Success! Wiki, w, rev max id for adds/changes dump complete. return success def doRunOnAllWikis(self): @@ -96,11 +113,13 @@ print message print Usage: python generateincrementals.py [options] [wikidbname] print Options: --configfile, --date, --verbose -print --configfile: Specify an alternate config file to read. Default file is 'dumpincr.conf' in the current directory. -print --date:(Re)run
[MediaWiki-CVS] SVN: [104139] branches/ariel/xmldumps-backup/incrementals/incrmonitor.py
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/104139 Revision: 104139 Author: ariel Date: 2011-11-24 08:45:18 + (Thu, 24 Nov 2011) Log Message: --- be able to run with cutoff of specific date; add link to all runs for a wiki; fix error returns so they print; more verbosity for verbose option Modified Paths: -- branches/ariel/xmldumps-backup/incrementals/incrmonitor.py Modified: branches/ariel/xmldumps-backup/incrementals/incrmonitor.py === --- branches/ariel/xmldumps-backup/incrementals/incrmonitor.py 2011-11-24 08:44:49 UTC (rev 104138) +++ branches/ariel/xmldumps-backup/incrementals/incrmonitor.py 2011-11-24 08:45:18 UTC (rev 104139) @@ -26,8 +26,9 @@ makeLink = staticmethod(makeLink) class Index(object): -def __init__(self, config, verbose): +def __init__(self, config, date, verbose): self._config = config +self.date = date self.indexFile = IndexFile(self._config) self.incrDir = IncrementDir(self._config) self.verbose = verbose @@ -37,6 +38,8 @@ for w in self._config.allWikisList: result = self.doOneWiki(w) if result: +if (self.verbose): +print result for wiki , w, is , result text = text + li+ result + /li\n indexText = self._config.readTemplate(incrs-index.html) % { items : text } FileUtils.writeFileInPlace(self.indexFile.getPath(), indexText, self._config.fileperms) @@ -48,8 +51,10 @@ if (self.verbose): print No dump for wiki , w next - -incrDate = self.incrDumpsDirs.getLatestIncrDate() +if date: +incrDate = date + else: +incrDate = self.incrDumpsDirs.getLatestIncrDate() if not incrDate: if (self.verbose): print No dump for wiki , w @@ -69,7 +74,8 @@ except: if (self.verbose): traceback.print_exc(file=sys.stdout) -return Error encountered, no information available for wiki, w +print Error encountered, no information available for wiki, w +return Error encountered, no information available for wiki + w try: wikinameText = strong%s/strong % w @@ -85,17 +91,19 @@ revsText = revs: %s (size %s) % (Link.makeLink(os.path.join(w, incrDate, revs.getFileName()),revsDate), revsSize) else: revsText = None + otherRunsText = other runs: %s % Link.makeLink(w,w) if statContents: statText = (%s) % (statContents) else: statText = None wikiInfo = .join( filter( None, [ wikinameText, lockText, statText ] ) ) + br / -wikiInfo = wikiInfo + nbsp;nbsp; + | .join( filter( None, [ stubText, revsText ] )) +wikiInfo = wikiInfo + nbsp;nbsp; + | .join( filter( None, [ stubText, revsText, otherRunsText ] )) except: if (self.verbose): traceback.print_exc(file=sys.stdout) -return Error encountered formatting information for wiki, w +print Error encountered formatting information for wiki, w +return Error encountered formatting information for wiki + w return wikiInfo @@ -103,25 +111,29 @@ if message: print message print Usage: python monitor.py [options] [wikidbname] -print Options: --configfile, --verbose +print Options: --configfile, --date, --verbose print --configfile: Specify an alternate config file to read. Default file is 'dumpincr.conf' in the current directory. +print --date:Look at runs starting on specified date or earler print --verbose: Print error messages and other informative messages (normally the printscript runs silently). sys.exit(1) if __name__ == __main__: configFile = False +date = False verbose = False try: (options, remainder) = getopt.gnu_getopt(sys.argv[1:], , - ['configfile=', 'verbose' ]) + ['configfile=', 'date=', 'verbose' ]) except: usage(Unknown option specified) for (opt, val) in options: if opt == --configfile: configFile = val +elif opt == --date: +date=val elif opt == '--verbose': verbose = True @@ -130,5 +142,5 @@ else: config = Config() -index = Index(config, verbose) +index = Index(config, date
[MediaWiki-CVS] SVN: [104140] branches/ariel/xmldumps-backup/incrementals/incrs-index.html
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/104140 Revision: 104140 Author: ariel Date: 2011-11-24 08:46:33 + (Thu, 24 Nov 2011) Log Message: --- more disclaimers, bolder disclaimers, info on the other files in each dir Modified Paths: -- branches/ariel/xmldumps-backup/incrementals/incrs-index.html Modified: branches/ariel/xmldumps-backup/incrementals/incrs-index.html === --- branches/ariel/xmldumps-backup/incrementals/incrs-index.html 2011-11-24 08:45:18 UTC (rev 104139) +++ branches/ariel/xmldumps-backup/incrementals/incrs-index.html 2011-11-24 08:46:33 UTC (rev 104140) @@ -65,18 +65,20 @@ for documentation on the provided data formats. /p p - Here's the big fat disclaimer. + strongHere's the big fat disclaimer./strong /p p - This service is experimental. At any time it may not be working, for a day, a week or a month. + emThis service is experimental./em At any time it may not be working, for a day, a week or a month. It is not intended to replace the full XML dumps. We don't expect users to be able to construct - full dumps of a given date from the incrementals and an older dump. + full dumps of a given date from the incrementals and an older dump. We don't guarantee that the data + included in these dumps is complete, or correct, or won't break your Xbox. In short: don't blame us (but + do get on the email list and send mail: see a href=https://lists.wikimedia.org/mailman/listinfo/xmldatadumps-l;xmldatadumps-l/a). /p p The data provided in these files is ''partial data''. To be precise: ul li* Revisions included in these dumps are not up to the minute. We write out those that were - created up to 18 hours ago; this gives local editing communities time to delete revisions + created up to 12 hours ago; this gives local editing communities time to delete revisions with sensitive information, vulgarities and other vandalism, etc./li li* New pages entered for the first time during the time interval are included/li li* Revisions of undeleted pages will be included only if new revision IDs need to be assigned to @@ -85,7 +87,8 @@ li* Imported revisions will be included if they were imported during the time interval, since they will have new revisions IDs./li li* As with all dumps, hidden revisions or more generally revisions not readable by the general public - are not provided./li + are not provided./li + li* When a wiki is closed, it no longer shows up in this list./li /ul /p p @@ -104,7 +107,19 @@ you get articles, user pages, discussion pages, etc. If you want articles only, you will need to write a filter to grab just those entries. /p - h2Adds/changes dump listing/h2 + p + The md5sums.txt file contains the md5 hash of the stubs file and the revs file, so that downloaders can verify + the integrity of the files after download. + /p + p + The file maxrevid.txt contains the largest revision ID on the project at the time we checked, which should be the + same as the timestamp of that file. + /p + p + The file status.txt, if it exists, will contain the value done in cases where the run is complete and was + successful. + /p + h2Adds/changes dump listing (links to latest complete run)/h2 ul %(items)s /ul ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [104142] branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/104142 Revision: 104142 Author: ariel Date: 2011-11-24 08:58:58 + (Thu, 24 Nov 2011) Log Message: --- flexibility in retrieving previous dump date ('must be successful run' is now a parameter) Modified Paths: -- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py Modified: branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py === --- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py 2011-11-24 08:48:14 UTC (rev 104141) +++ branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py 2011-11-24 08:58:58 UTC (rev 104142) @@ -366,9 +366,10 @@ toRemove = os.path.join(self.incrDir.getIncDirNoDate(self.wikiName), dump) shutil.rmtree(%s % toRemove) -def getPrevIncrDate(self, date): +def getPrevIncrDate(self, date, ok = False): # find the most recent incr dump before the -# specified date that completed successfully +# specified date +# if ok is True, find most recent dump that completed successfully previous = None old = self.getIncDumpDirs() if old: @@ -376,8 +377,11 @@ if dump == date: return previous else: -statusInfo = StatusInfo(self._config, dump, self.wikiName) -if statusInfo.getStatus(dump) == done: +if ok: +statusInfo = StatusInfo(self._config, dump, self.wikiName) +if statusInfo.getStatus(dump) == done: +previous = dump +else: previous = dump return previous ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [104143] branches/ariel/xmldumps-backup/incrementals/ generateincrementals.py
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/104143 Revision: 104143 Author: ariel Date: 2011-11-24 09:01:52 + (Thu, 24 Nov 2011) Log Message: --- more verbosity Modified Paths: -- branches/ariel/xmldumps-backup/incrementals/generateincrementals.py Modified: branches/ariel/xmldumps-backup/incrementals/generateincrementals.py === --- branches/ariel/xmldumps-backup/incrementals/generateincrementals.py 2011-11-24 08:58:58 UTC (rev 104142) +++ branches/ariel/xmldumps-backup/incrementals/generateincrementals.py 2011-11-24 09:01:52 UTC (rev 104143) @@ -74,6 +74,7 @@ self.incrDumpsDirs.cleanupOldIncrDumps(self.date) maxRevID = self.getMaxRevIdFromFile() if (self.verbose): +print Doing run for wiki: ,self.wikiName if maxRevID: print maxRevID is , maxRevID else: ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [104144] branches/ariel/xmldumps-backup/incrementals
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/104144 Revision: 104144 Author: ariel Date: 2011-11-24 09:29:16 + (Thu, 24 Nov 2011) Log Message: --- get latest incr date now has optional check for successful run Modified Paths: -- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py branches/ariel/xmldumps-backup/incrementals/incrmonitor.py Modified: branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py === --- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py 2011-11-24 09:01:52 UTC (rev 104143) +++ branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py 2011-11-24 09:29:16 UTC (rev 104144) @@ -385,10 +385,16 @@ previous = dump return previous -def getLatestIncrDate(self): +def getLatestIncrDate(self, ok = False): # find the most recent incr dump dirs = self.getIncDumpDirs() if dirs: -return(dirs[-1]) +if ok: +for dump in dirs: +statusInfo = StatusInfo(self._config, dump, self.wikiName) +if statusInfo.getStatus(dump) == done: +return dump +else: +return(dirs[-1]) else: return(None) Modified: branches/ariel/xmldumps-backup/incrementals/incrmonitor.py === --- branches/ariel/xmldumps-backup/incrementals/incrmonitor.py 2011-11-24 09:01:52 UTC (rev 104143) +++ branches/ariel/xmldumps-backup/incrementals/incrmonitor.py 2011-11-24 09:29:16 UTC (rev 104144) @@ -54,7 +54,7 @@ if date: incrDate = date else: -incrDate = self.incrDumpsDirs.getLatestIncrDate() +incrDate = self.incrDumpsDirs.getLatestIncrDate(True) if not incrDate: if (self.verbose): print No dump for wiki , w ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [104026] branches/ariel/xmldumps-backup/incrementals/ generateincrementals.py
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/104026 Revision: 104026 Author: ariel Date: 2011-11-23 12:10:29 + (Wed, 23 Nov 2011) Log Message: --- missing arg for IncrDump() call, add some verbosity generally Modified Paths: -- branches/ariel/xmldumps-backup/incrementals/generateincrementals.py Modified: branches/ariel/xmldumps-backup/incrementals/generateincrementals.py === --- branches/ariel/xmldumps-backup/incrementals/generateincrementals.py 2011-11-23 11:42:04 UTC (rev 104025) +++ branches/ariel/xmldumps-backup/incrementals/generateincrementals.py 2011-11-23 12:10:29 UTC (rev 104026) @@ -73,10 +73,25 @@ if not dryrun: self.incrDumpsDirs.cleanupOldIncrDumps(self.date) maxRevID = self.getMaxRevIdFromFile() + if (self.verbose): +if maxRevID: +print maxRevID is , maxRevID +else: +print no maxRevID found prevDate = self.incrDumpsDirs.getPrevIncrDate(self.date) + if (self.verbose): +if prevDate: +print prevDate is, prevDate +else: +print no prevDate found prevRevID = None if prevDate: -prevRevID = self.getMaxRevIdFromFile(prevDate) + prevRevID = self.getMaxRevIdFromFile(prevDate) + if (self.verbose): + if prevRevID: +print prevRevId is , prevRevID + else: +print no prevRevID found if not prevRevID: prevRevID = str(int(maxRevID) - 10) if int(prevRevID) 1: @@ -266,7 +281,7 @@ date = TimeUtils.today() if len(remainder) 0: -dump = IncrDump(config, date, remainder[0], doStubs, doRevs, dryrun, verbose) +dump = IncrDump(config, date, remainder[0], doStubs, doRevs, dryrun, verbose, forcerun) dump.doOneWiki() else: dump = IncrDumpLoop(config, date, doStubs, doRevs, dryrun, verbose, forcerun) ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [104054] branches/ariel/xmldumps-backup/worker.py
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/104054 Revision: 104054 Author: ariel Date: 2011-11-23 17:36:55 + (Wed, 23 Nov 2011) Log Message: --- check for maintenance mode for regular run, not just specific job Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-11-23 17:27:08 UTC (rev 104053) +++ branches/ariel/xmldumps-backup/worker.py2011-11-23 17:36:55 UTC (rev 104054) @@ -1834,6 +1834,7 @@ self.checksums.prepareChecksums() for item in self.dumpItemList.dumpItems: + Maintenance.exitIfInMaintenanceMode(In maintenance mode, exiting dump of %s at step %s % ( self.dbName, item.name() ) ) item.start(self) self.status.updateStatusFiles() self.runInfoFile.saveDumpRunInfoFile(self.dumpItemList.reportDumpRunInfo()) ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [103821] branches/ariel/xmldumps-backup/incrementals/incrs-index.html
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/103821 Revision: 103821 Author: ariel Date: 2011-11-21 16:37:23 + (Mon, 21 Nov 2011) Log Message: --- relative paths for links in index.html Modified Paths: -- branches/ariel/xmldumps-backup/incrementals/incrs-index.html Modified: branches/ariel/xmldumps-backup/incrementals/incrs-index.html === --- branches/ariel/xmldumps-backup/incrementals/incrs-index.html 2011-11-21 16:28:12 UTC (rev 103820) +++ branches/ariel/xmldumps-backup/incrementals/incrs-index.html 2011-11-21 16:37:23 UTC (rev 103821) @@ -110,9 +110,9 @@ /ul hr p - Return to a href=http://dumps.wikimedia.org/other/;our other datasets/a, the - a href=http://dumps.wikimedia.org/backup-index.html;XML data dumps/a, or - a href=http://dumps.wikimedia.org/index.html;the main index/a. + Return to a href=/other/our other datasets/a, the + a href=/backup-index.htmlXML data dumps/a, or + a href=/index.htmlthe main index/a. p/ /body /html ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [103582] trunk/phase3/includes/Export.php
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/103582 Revision: 103582 Author: ariel Date: 2011-11-18 09:08:29 + (Fri, 18 Nov 2011) Log Message: --- followup to r103448, tighten up code Modified Paths: -- trunk/phase3/includes/Export.php Modified: trunk/phase3/includes/Export.php === --- trunk/phase3/includes/Export.php2011-11-18 08:10:56 UTC (rev 103581) +++ trunk/phase3/includes/Export.php2011-11-18 09:08:29 UTC (rev 103582) @@ -614,17 +614,11 @@ function writeContributor( $id, $text ) { $out = contributor\n; - if ( $id ) { + if ( $id || !IP::isValid( $text ) ) { $out .= . Xml::elementClean( 'username', null, strval( $text ) ) . \n; $out .= . Xml::element( 'id', null, strval( $id ) ) . \n; } else { - if ( IP::isValid( $text ) ) { - $out .= . Xml::elementClean( 'ip', null, strval( $text ) ) . \n; - } - else { - $out .= . Xml::elementClean( 'username', null, strval( $text ) ) . \n; - $out .= . Xml::element( 'id', null, strval( $id ) ) . \n; - } + $out .= . Xml::elementClean( 'ip', null, strval( $text ) ) . \n; } $out .= /contributor\n; return $out; ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [103448] trunk/phase3/includes/Export.php
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/103448 Revision: 103448 Author: ariel Date: 2011-11-17 09:20:51 + (Thu, 17 Nov 2011) Log Message: --- if user id is 0 and username is actually an IP, write it as ip, not username Modified Paths: -- trunk/phase3/includes/Export.php Modified: trunk/phase3/includes/Export.php === --- trunk/phase3/includes/Export.php2011-11-17 08:03:14 UTC (rev 103447) +++ trunk/phase3/includes/Export.php2011-11-17 09:20:51 UTC (rev 103448) @@ -618,7 +618,13 @@ $out .= . Xml::elementClean( 'username', null, strval( $text ) ) . \n; $out .= . Xml::element( 'id', null, strval( $id ) ) . \n; } else { - $out .= . Xml::elementClean( 'ip', null, strval( $text ) ) . \n; + if ( IP::isValid( $text ) ) { + $out .= . Xml::elementClean( 'ip', null, strval( $text ) ) . \n; + } + else { + $out .= . Xml::elementClean( 'username', null, strval( $text ) ) . \n; + $out .= . Xml::element( 'id', null, strval( $id ) ) . \n; + } } $out .= /contributor\n; return $out; ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [103314] trunk/phase3
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/103314 Revision: 103314 Author: ariel Date: 2011-11-16 12:01:58 + (Wed, 16 Nov 2011) Log Message: --- add checkbox for listauthors on export form if wgExportAllowListContributors set Modified Paths: -- trunk/phase3/includes/specials/SpecialExport.php trunk/phase3/languages/messages/MessagesEn.php Modified: trunk/phase3/includes/specials/SpecialExport.php === --- trunk/phase3/includes/specials/SpecialExport.php2011-11-16 11:45:22 UTC (rev 103313) +++ trunk/phase3/includes/specials/SpecialExport.php2011-11-16 12:01:58 UTC (rev 103314) @@ -216,6 +216,15 @@ $request-wasPosted() ? $request-getCheck( 'wpDownload' ) : true ) . 'br /'; + if ( $wgExportAllowListContributors ) { + $form .= Xml::checkLabel( + wfMsg( 'exportlistauthors' ), + 'listauthors', + 'listauthors', + $request-wasPosted() ? $request-getCheck( 'listauthors' ) : false + ) . 'br /'; + } + $form .= Xml::submitButton( wfMsg( 'export-submit' ), Linker::tooltipAndAccesskeyAttribs( 'export' ) ); $form .= Xml::closeElement( 'form' ); Modified: trunk/phase3/languages/messages/MessagesEn.php === --- trunk/phase3/languages/messages/MessagesEn.php 2011-11-16 11:45:22 UTC (rev 103313) +++ trunk/phase3/languages/messages/MessagesEn.php 2011-11-16 12:01:58 UTC (rev 103314) @@ -3278,6 +3278,7 @@ 'exportcuronly' = 'Include only the current revision, not the full history', 'exportnohistory' = '''Note:''' Exporting the full history of pages through this form has been disabled due to performance reasons., +'exportlistauthors' = 'Include a full list of contributors for each page', 'export-submit' = 'Export', 'export-addcattext' = 'Add pages from category:', 'export-addcat' = 'Add', ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [101971] branches/ariel/xmldumps-backup/mysql2txt.py
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/101971 Revision: 101971 Author: ariel Date: 2011-11-04 09:46:30 + (Fri, 04 Nov 2011) Log Message: --- possibily useful, likely buggy script for grabbing specific columns from desired rows of a mysql table dump Added Paths: --- branches/ariel/xmldumps-backup/mysql2txt.py Added: branches/ariel/xmldumps-backup/mysql2txt.py === --- branches/ariel/xmldumps-backup/mysql2txt.py (rev 0) +++ branches/ariel/xmldumps-backup/mysql2txt.py 2011-11-04 09:46:30 UTC (rev 101971) @@ -0,0 +1,405 @@ +# this script reads from stdin a sql file created by mysqldump, grabs the requested columns from +# the requested table from each tuple, and writes them out one tuple per line +# with a comma between columns, keeping the original escaping of values as done by mysql. + +import getopt +import os +import re +import sys + +class ConverterError(Exception): +pass + +class MysqlFile: +def __init__(self, f, tableRequested, columnsRequested, valuesRequestedCols, valuesRequestedVals, fieldSeparator): +self.file = f +self.tableRequested = tableRequested +self.columnsRequested = columnsRequested +self.valuesRequestedCols = valuesRequestedCols +self.valuesRequestedVals = valuesRequestedVals +self.fieldSeparator = fieldSeparator + +self.buffer = +self.bufferInd = 0 +self.eof = False +self.rowsDone = False +self.GET = 1 +self.CHECK = 2 +self.SKIP = 0 + +def findCreateStatement(self): +tableFound = False +toFind = CREATE TABLE `%s` (\n % self.tableRequested +line = self.getLine(len(toFind)) +if (not line.endswith(\n)): +self.skipLineRemainder() +while line != : +if line == toFind: +tableFound = True +break +line = self.getLine(len(toFind)) +if (not line.endswith(\n)): +self.skipLineRemainder() +if not tableFound: +raise ConverterError(create statement for requested table not found in file) + +def getLine(self, maxbytes = 0): +returns line including the \n, up to maxbytes +line = +length = 0 +if self.eof: +return False +while self.buffer[self.bufferInd] != '\n': +line = line + self.buffer[self.bufferInd] +if not self.incrementBufferPtr(): + return False +length = length + 1 +if maxbytes and length == maxbytes: +return line + +if not self.skipChar('\n'): +return False +return line + \n + +def skipLineRemainder(self): +# skip up to the newline... +while self.buffer[self.bufferInd] != '\n': +if not self.incrementBufferPtr(): + return False +# and now the newline. +return self.incrementBufferPtr() + +def findInsertStatement(self): +leave the file contents at the line immediately following +an INSERT statement +if m.eof: +return False +insertFound = False +toFind = INSERT INTO `%s` VALUES % self.tableRequested +line = self.getLine(len(toFind)) +while line and not self.eof: +if line.startswith(toFind): +insertFound = True +break +if (not line.endswith(\n)): +self.skipLineRemainder() +line = self.getLine(len(toFind)) +return insertFound + +def setupColumnRetrieval(self): +self.columnsInTable = [] +columnNameExpr = re.compile('\s+`([^`]+)`') +line = self.getLine() +while (line and not self.eof and line[0] != ')' ): +columnNameMatch = columnNameExpr.match(line) +if (columnNameMatch): +self.columnsInTable.append(columnNameMatch.group(1)) +line = self.getLine() + +for c in self.columnsRequested: +if not c in self.columnsInTable: +raise ConverterError(requested column %s not found in table % c) + +#print columns in table: , self.columnsInTable +#print columnsRequested: , self.columnsRequested + +self.columnsToGet = [] +for c in self.columnsInTable: +v = self.SKIP +if c in self.columnsRequested: +v = v | self.GET +if c in self.valuesRequestedCols: +v = v | self.CHECK +self.columnsToGet.append( v ) + +#print columns to get: , self.columnsToGet + +self.columnOrder = [] +# we want here a list which tells us to +# write the ith column we read from tuple first, +# the jth one second, the kth one third etc
[MediaWiki-CVS] SVN: [101975] trunk/extensions/Renameuser/renameUserCleanup.php
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/101975 Revision: 101975 Author: ariel Date: 2011-11-04 11:10:54 + (Fri, 04 Nov 2011) Log Message: --- typo in email; ability to override result of check for rename of user in logs Modified Paths: -- trunk/extensions/Renameuser/renameUserCleanup.php Modified: trunk/extensions/Renameuser/renameUserCleanup.php === --- trunk/extensions/Renameuser/renameUserCleanup.php 2011-11-04 11:08:11 UTC (rev 101974) +++ trunk/extensions/Renameuser/renameUserCleanup.php 2011-11-04 11:10:54 UTC (rev 101975) @@ -20,7 +20,7 @@ * http://www.gnu.org/copyleft/gpl.html * * @ingroup Maintenance - * @author Ariel Glenn ar...@wikimedia.orf + * @author Ariel Glenn ar...@wikimedia.org */ $IP = getenv( 'MW_INSTALL_PATH' ); @@ -42,6 +42,7 @@ $this-output( Rename User Cleanup starting...\n\n ); $olduser = User::newFromName( $this-getOption( 'olduser' ) ); $newuser = User::newFromName( $this-getOption( 'newuser' ) ); + if ( !$newuser-getId() ) { $this-error( No such user: . $this-getOption( 'newuser' ), true ); exit(1); @@ -79,8 +80,14 @@ __METHOD__ ); if (! $result || ! $result-numRows() ) { - print(No log entry found for a rename of .$olduser-getName(). to .$newuser-getName()., giving up\n); - exit(1); + print(No log entry found for a rename of .$olduser-getName(). to .$newuser-getName()., proceed anyways??? [N/y] ); + $stdin = fopen (php://stdin,rt); + $line = fgets($stdin); + fclose($stdin); + if ( $line[0] != Y $line[0] != y ) { + print(Exiting at user's request\n); + exit(1); + } } else { foreach ( $result as $row ) { @@ -93,7 +100,7 @@ print(Found log entry of the rename: .$olduser-getName(). to .$newuser-getName(). on $row-log_timestamp\n); } } - if ($result-numRows() 1) { + if ($result $result-numRows() 1) { print(More than one rename entry found in the log, not sure what to do. Continue anyways? [N/y] ); $stdin = fopen (php://stdin,rt); $line = fgets($stdin); ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [101992] trunk/extensions/Renameuser/renameUserCleanup.php
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/101992 Revision: 101992 Author: ariel Date: 2011-11-04 14:19:52 + (Fri, 04 Nov 2011) Log Message: --- allow renames for user with specific uid set in row; minor formatting cleanup; refactor (one large function - several smaller ones) Modified Paths: -- trunk/extensions/Renameuser/renameUserCleanup.php Modified: trunk/extensions/Renameuser/renameUserCleanup.php === --- trunk/extensions/Renameuser/renameUserCleanup.php 2011-11-04 13:59:02 UTC (rev 101991) +++ trunk/extensions/Renameuser/renameUserCleanup.php 2011-11-04 14:19:52 UTC (rev 101992) @@ -35,6 +35,7 @@ $this-mDescription = Maintenance script to finish incomplete rename user, in particular to reassign edits that were missed; $this-addOption( 'olduser', 'Old user name', true, true ); $this-addOption( 'newuser', 'New user name', true, true ); + $this-addOption( 'olduid', 'Old user id in revision records (DANGEROUS)', false, true ); $this-mBatchSize = 1000; } @@ -42,23 +43,42 @@ $this-output( Rename User Cleanup starting...\n\n ); $olduser = User::newFromName( $this-getOption( 'olduser' ) ); $newuser = User::newFromName( $this-getOption( 'newuser' ) ); + $olduid = $this-getOption( 'olduid' ); + $this-checkUserExistence( $olduser, $newuser ); + $this-checkRenameLog( $olduser, $newuser ); + + if ( $olduid ) { + $this-doUpdates( $olduser, $newuser, $olduid, $dbw ); + } + $this-doUpdates( $olduser, $newuser, $newuser-getId(), $dbw ); + $this-doUpdates( $olduser, $newuser, 0, $dbw ); + + print Done!\n; + exit(0); + } + + + public function checkUserExistence( $olduser, $newuser ) { if ( !$newuser-getId() ) { $this-error( No such user: . $this-getOption( 'newuser' ), true ); exit(1); } if ($olduser-getId() ) { - print( WARNING!!: Old user still exists: . $this-getOption( 'olduser' ) . \n); - print(proceed anyways? We'll only re-attribute edits that have the new user uid (or 0) and the old user name. [N/y] ); + print WARNING!!: Old user still exists: . $this-getOption( 'olduser' ) . \n; + print proceed anyways? We'll only re-attribute edits that have the new user uid (or 0); + print or the uid specified by the caller, and the old user name. [N/y] ; $stdin = fopen (php://stdin,rt); $line = fgets($stdin); fclose($stdin); if ( $line[0] != Y $line[0] != y ) { - print(Exiting at user's request\n); + print Exiting at user's request\n; exit(0); } } + } + public function checkRenameLog( $olduser, $newuser ) { $dbr = wfGetDB( DB_SLAVE ); $result = $dbr-select( 'logging', '*', array( 'log_type' = 'renameuser', @@ -80,130 +100,111 @@ __METHOD__ ); if (! $result || ! $result-numRows() ) { - print(No log entry found for a rename of .$olduser-getName(). to .$newuser-getName()., proceed anyways??? [N/y] ); + print No log entry found for a rename of .$olduser-getName(). to .$newuser-getName()., proceed anyways??? [N/y] ; $stdin = fopen (php://stdin,rt); $line = fgets($stdin); fclose($stdin); if ( $line[0] != Y $line[0] != y ) { - print(Exiting at user's request\n); + print Exiting at user's request\n; exit(1); } } else { foreach ( $result as $row ) { - print(Found possible log entry of the rename, please check: .$row-log_title. with comment .$row-log_comment. on $row-log_timestamp\n); + print Found possible log entry of the rename, please check: .$row-log_title. with comment .$row-log_comment. on $row-log_timestamp\n; } } } else
[MediaWiki-CVS] SVN: [101998] trunk/extensions/Renameuser/renameUserCleanup.php
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/101998 Revision: 101998 Author: ariel Date: 2011-11-04 15:08:05 + (Fri, 04 Nov 2011) Log Message: --- use getdbkey() form of user name for log check Modified Paths: -- trunk/extensions/Renameuser/renameUserCleanup.php Modified: trunk/extensions/Renameuser/renameUserCleanup.php === --- trunk/extensions/Renameuser/renameUserCleanup.php 2011-11-04 15:05:00 UTC (rev 101997) +++ trunk/extensions/Renameuser/renameUserCleanup.php 2011-11-04 15:08:05 UTC (rev 101998) @@ -80,12 +80,16 @@ public function checkRenameLog( $olduser, $newuser ) { $dbr = wfGetDB( DB_SLAVE ); + + $oldTitle = Title::makeTitle( NS_USER, $olduser-getName() ); + $newTitle = Title::makeTitle( NS_USER, $newuser-getName() ); + $result = $dbr-select( 'logging', '*', array( 'log_type' = 'renameuser', 'log_action'= 'renameuser', 'log_namespace' = NS_USER, - 'log_title' = $olduser-getName(), - 'log_params'= $newuser-getName() + 'log_title' = $oldTitle-getDBkey(), + 'log_params'= $newTitle-getDBkey() ), __METHOD__ ); ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [102000] trunk/extensions/Renameuser/renameUserCleanup.php
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/102000 Revision: 102000 Author: ariel Date: 2011-11-04 15:28:14 + (Fri, 04 Nov 2011) Log Message: --- bah, only the log_title needs the dbkey format; make 'no edits' message similar to 'found edits' message Modified Paths: -- trunk/extensions/Renameuser/renameUserCleanup.php Modified: trunk/extensions/Renameuser/renameUserCleanup.php === --- trunk/extensions/Renameuser/renameUserCleanup.php 2011-11-04 15:16:54 UTC (rev 101999) +++ trunk/extensions/Renameuser/renameUserCleanup.php 2011-11-04 15:28:14 UTC (rev 102000) @@ -82,14 +82,13 @@ $dbr = wfGetDB( DB_SLAVE ); $oldTitle = Title::makeTitle( NS_USER, $olduser-getName() ); - $newTitle = Title::makeTitle( NS_USER, $newuser-getName() ); $result = $dbr-select( 'logging', '*', array( 'log_type' = 'renameuser', 'log_action'= 'renameuser', 'log_namespace' = NS_USER, 'log_title' = $oldTitle-getDBkey(), - 'log_params'= $newTitle-getDBkey() + 'log_params'= $newuser-getName() ), __METHOD__ ); @@ -153,7 +152,7 @@ array( $usernamefield = $olduser-getName(), $useridfield = $uid ), __METHOD__ ); if ( $contribs == 0 ) { - print No edits to be re-attributed from table $table\n ; + print No edits to be re-attributed from table $table for uid $uid\n ; return(0); } ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [101591] trunk/phase3
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/101591 Revision: 101591 Author: ariel Date: 2011-11-02 07:58:43 + (Wed, 02 Nov 2011) Log Message: --- export specified range of revisions (as stubs) Modified Paths: -- trunk/phase3/includes/Export.php trunk/phase3/maintenance/backup.inc trunk/phase3/maintenance/dumpBackup.php Modified: trunk/phase3/includes/Export.php === --- trunk/phase3/includes/Export.php2011-11-02 07:47:00 UTC (rev 101590) +++ trunk/phase3/includes/Export.php2011-11-02 07:58:43 UTC (rev 101591) @@ -41,6 +41,7 @@ const CURRENT = 2; const STABLE = 4; // extension defined const LOGS = 8; + const RANGE = 16; const BUFFER = 0; const STREAM = 1; @@ -56,7 +57,8 @@ * main query is still running. * * @param $db Database -* @param $history Mixed: one of WikiExporter::FULL or WikiExporter::CURRENT, +* @param $history Mixed: one of WikiExporter::FULL, WikiExporter::CURRENT, +* WikiExporter::RANGE or WikiExporter::STABLE, * or an associative array: * offset: non-inclusive offset at which to start the query * limit: maximum number of rows to return @@ -120,6 +122,21 @@ } /** +* Dumps a series of page and revision records for those pages +* in the database with revisions falling within the rev_id range given. +* @param $start Int: inclusive lower limit (this id is included) +* @param $end Int: Exclusive upper limit (this id is not included) +* If 0, no upper limit. +*/ + public function revsByRange( $start, $end ) { + $condition = 'rev_id = ' . intval( $start ); + if ( $end ) { + $condition .= ' AND rev_id ' . intval( $end ); + } + return $this-dumpFrom( $condition ); + } + + /** * @param $title Title */ public function pageByTitle( $title ) { @@ -259,6 +276,10 @@ wfProfileOut( __METHOD__ ); throw new MWException( __METHOD__ . given invalid history dump type. ); } + } elseif ( $this-history WikiExporter::RANGE ) { + # Dump of revisions within a specified range + $join['revision'] = array( 'INNER JOIN', 'page_id=rev_page' ); + $opts['ORDER BY'] = 'rev_page ASC, rev_id ASC'; } else { # Uknown history specification parameter? wfProfileOut( __METHOD__ ); Modified: trunk/phase3/maintenance/backup.inc === --- trunk/phase3/maintenance/backup.inc 2011-11-02 07:47:00 UTC (rev 101590) +++ trunk/phase3/maintenance/backup.inc 2011-11-02 07:58:43 UTC (rev 101591) @@ -217,6 +217,8 @@ } else if ( is_null( $this-pages ) ) { if ( $this-startId || $this-endId ) { $exporter-pagesByRange( $this-startId, $this-endId ); + } elseif ( $this-revStartId || $this-revEndId ) { + $exporter-revsByRange( $this-revStartId, $this-revEndId ); } else { $exporter-allPages(); } Modified: trunk/phase3/maintenance/dumpBackup.php === --- trunk/phase3/maintenance/dumpBackup.php 2011-11-02 07:47:00 UTC (rev 101590) +++ trunk/phase3/maintenance/dumpBackup.php 2011-11-02 07:58:43 UTC (rev 101591) @@ -27,7 +27,7 @@ $originalDir = getcwd(); -$optionsWithArgs = array( 'pagelist', 'start', 'end' ); +$optionsWithArgs = array( 'pagelist', 'start', 'end', 'revstart', 'revend'); require_once( dirname( __FILE__ ) . '/commandLine.inc' ); require_once( 'backup.inc' ); @@ -57,6 +57,13 @@ if ( isset( $options['end'] ) ) { $dumper-endId = intval( $options['end'] ); } + +if ( isset( $options['revstart'] ) ) { + $dumper-revStartId = intval( $options['revstart'] ); +} +if ( isset( $options['revend'] ) ) { + $dumper-revEndId = intval( $options['revend'] ); +} $dumper-skipHeader = isset( $options['skip-header'] ); $dumper-skipFooter = isset( $options['skip-footer'] ); $dumper-dumpUploads = isset( $options['uploads'] ); @@ -72,6 +79,8 @@ $dumper-dump( WikiExporter::STABLE, $textMode ); } elseif ( isset( $options['logs'] ) ) { $dumper-dump( WikiExporter::LOGS ); +} elseif ( isset($options['revrange'] ) ) { + $dumper-dump( WikiExporter::RANGE
[MediaWiki-CVS] SVN: [101606] trunk/phase3/includes/Export.php
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/101606 Revision: 101606 Author: ariel Date: 2011-11-02 09:55:43 + (Wed, 02 Nov 2011) Log Message: --- corrections for fixme in r96486 Modified Paths: -- trunk/phase3/includes/Export.php Modified: trunk/phase3/includes/Export.php === --- trunk/phase3/includes/Export.php2011-11-02 09:34:11 UTC (rev 101605) +++ trunk/phase3/includes/Export.php2011-11-02 09:55:43 UTC (rev 101606) @@ -871,11 +871,19 @@ protected $filename; function __construct( $file ) { - $command = setup7zCommand( $file ); + $command = $this-setup7zCommand( $file ); parent::__construct( $command ); $this-filename = $file; } + function setup7zCommand( $file ) { + $command = 7za a -bd -si . wfEscapeShellArg( $file ); + // Suppress annoying useless crap from p7zip + // Unfortunately this could suppress real error messages too + $command .= ' ' . wfGetNull() . ' 21'; + return( $command ); + } + function closeRenameAndReopen( $newname ) { $this-closeAndRename( $newname, true ); } @@ -895,10 +903,7 @@ throw new MWException( __METHOD__ . : rename of file {$this-filename} to $newname failed\n ); } elseif ( $open ) { - $command = 7za a -bd -si . wfEscapeShellArg( $file ); - // Suppress annoying useless crap from p7zip - // Unfortunately this could suppress real error messages too - $command .= ' ' . wfGetNull() . ' 21'; + $command = setup7zCommand( $file ); $this-startCommand( $command ); } } ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [101614] trunk/phase3/includes/Export.php
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/101614 Revision: 101614 Author: ariel Date: 2011-11-02 11:08:06 + (Wed, 02 Nov 2011) Log Message: --- clean up duplicated code, for fixme in r97178 Modified Paths: -- trunk/phase3/includes/Export.php Modified: trunk/phase3/includes/Export.php === --- trunk/phase3/includes/Export.php2011-11-02 10:59:34 UTC (rev 101613) +++ trunk/phase3/includes/Export.php2011-11-02 11:08:06 UTC (rev 101614) @@ -763,7 +763,13 @@ $this-closeAndRename( $newname, true ); } - function closeAndRename( $newname, $open = false ) { + function renameOrException( $newname ) { + if (! rename( $this-filename, $newname ) ) { + throw new MWException( __METHOD__ . : rename of file {$this-filename} to $newname failed\n ); + } + } + + function checkRenameArgCount( $newname ) { if ( is_array( $newname ) ) { if ( count( $newname ) 1 ) { throw new MWException( __METHOD__ . : passed multiple arguments for rename of single file\n ); @@ -771,12 +777,15 @@ $newname = $newname[0]; } } + return $newname; + } + + function closeAndRename( $newname, $open = false ) { + $newname = $this-checkRenameArgCount( $newname ); if ( $newname ) { fclose( $this-handle ); - if (! rename( $this-filename, $newname ) ) { - throw new MWException( __METHOD__ . : rename of file {$this-filename} to $newname failed\n ); - } - elseif ( $open ) { + $this-renameOrException( $newname ); + if ( $open ) { $this-handle = fopen( $this-filename, wt ); } } @@ -820,20 +829,12 @@ } function closeAndRename( $newname, $open = false ) { - if ( is_array( $newname ) ) { - if ( count( $newname ) 1 ) { - throw new MWException( __METHOD__ . : passed multiple arguments for rename of single file\n ); - } else { - $newname = $newname[0]; - } - } + $newname = $this-checkRenameArgCount( $newname ); if ( $newname ) { fclose( $this-handle ); proc_close( $this-procOpenResource ); - if (! rename( $this-filename, $newname ) ) { - throw new MWException( __METHOD__ . : rename of file {$this-filename} to $newname failed\n ); - } - elseif ( $open ) { + $this-renameOrException( $newname ); + if ( $open ) { $command = $this-command; $command .= . wfEscapeShellArg( $this-filename ); $this-startCommand( $command ); @@ -889,20 +890,12 @@ } function closeAndRename( $newname, $open = false ) { - if ( is_array( $newname ) ) { - if ( count( $newname ) 1 ) { - throw new MWException( __METHOD__ . : passed multiple arguments for rename of single file\n ); - } else { - $newname = $newname[0]; - } - } + $newname = $this-checkRenameArgCount( $newname ); if ( $newname ) { fclose( $this-handle ); proc_close( $this-procOpenResource ); - if (! rename( $this-filename, $newname ) ) { - throw new MWException( __METHOD__ . : rename of file {$this-filename} to $newname failed\n ); - } - elseif ( $open ) { + $this-renameOrException( $newname ); + if ( $open ) { $command = setup7zCommand( $file ); $this-startCommand( $command ); } ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [101625] branches/ariel/xmldumps-backup/incrementals
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/101625 Revision: 101625 Author: ariel Date: 2011-11-02 14:29:09 + (Wed, 02 Nov 2011) Log Message: --- add forcerun option which will do a run even if a completed run exists for the given date; fix status check (which in turn fixes retrieving max revid of previous good run) Modified Paths: -- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py branches/ariel/xmldumps-backup/incrementals/generateincrementals.py Modified: branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py === --- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py 2011-11-02 14:03:29 UTC (rev 101624) +++ branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py 2011-11-02 14:29:09 UTC (rev 101625) @@ -85,11 +85,10 @@ self.statusFile = StatusFile(self._config, self.date, self.wikiName) def getStatus(self, date = None): +status = if exists(self.statusFile.getPath(date)): status = FileUtils.readFile(self.statusFile.getPath(date)).rstrip() -if status == done: -return True -return False +return(status) def setStatus(self, status): FileUtils.writeFileInPlace(self.statusFile.getPath(),status, self._config.fileperms) Modified: branches/ariel/xmldumps-backup/incrementals/generateincrementals.py === --- branches/ariel/xmldumps-backup/incrementals/generateincrementals.py 2011-11-02 14:03:29 UTC (rev 101624) +++ branches/ariel/xmldumps-backup/incrementals/generateincrementals.py 2011-11-02 14:29:09 UTC (rev 101625) @@ -27,7 +27,7 @@ self.OK = 0 class IncrDump(object): -def __init__(self,config, date, wikiName, doStubs, doRevs, dryrun, verbose): +def __init__(self,config, date, wikiName, doStubs, doRevs, dryrun, verbose, forcerun): self._config = config self.date = date self.wikiName = wikiName @@ -35,6 +35,7 @@ self.doStubs = doStubs self.doRevs = doRevs self.dryrun = dryrun +self.forcerun = forcerun self.maxRevIDFile = MaxRevIDFile(self._config, self.date, self.wikiName) self.statusInfo = StatusInfo(self._config, self.date, self.wikiName) self.stubFile = StubFile(self._config, self.date, self.wikiName) @@ -54,7 +55,7 @@ if not exists(self.incrDir.getIncDir(self.wikiName)): os.makedirs(self.incrDir.getIncDir(self.wikiName)) status = self.statusInfo.getStatus() -if status == done: +if status == done and not forcerun: if (self.verbose): print wiki,self.wikiName,skipped, adds/changes dump already complete return retCodes.OK @@ -170,20 +171,21 @@ return False class IncrDumpLoop(object): -def __init__(self, config, date, doStubs, doRevs, dryrun, verbose): +def __init__(self, config, date, doStubs, doRevs, dryrun, verbose, forcerun): self._config = config self.date = date self.doStubs = doStubs self.doRevs = doRevs self.dryrun = dryrun self.verbose = verbose +self.forcerun = forcerun def doRunOnAllWikis(self): retCodes = DumpResults() failures = 0 todos = 0 for w in self._config.allWikisList: -dump = IncrDump(config, date, w, doStubs, doRevs, dryrun, self.verbose) +dump = IncrDump(self._config, self.date, w, self.doStubs, self.doRevs, self.dryrun, self.verbose, self.forcerun) result = dump.doOneWiki() if result == retCodes.FAILED: failures = failures + 1 @@ -212,6 +214,7 @@ print --configfile: Specify an alternate config file to read. Default file is 'dumpincr.conf' in the current directory. print --date:(Re)run incremental of a given date (use with care). print --dryrun: Don't actually dump anything but print the commands that would be run. +print --forcerun:Do the run even if there is already a successful run in place. print --revsonly:Do only the stubs part of the dumps. print --stubsonly: Do only the revision text part of the dumps. print --verbose: Print error messages and other informative messages (normally the @@ -227,10 +230,11 @@ doRevs = True dryrun = False verbose = False +forcerun = False try: (options, remainder) = getopt.gnu_getopt(sys.argv[1:], , - ['date=', 'configfile=', 'stubsonly', 'revsonly', 'dryrun', 'verbose' ]) + ['date=', 'configfile=', 'stubsonly', 'revsonly', 'dryrun', 'verbose', 'forcerun' ]) except: usage
[MediaWiki-CVS] SVN: [100112] branches/ariel/xmldumps-backup/create-rsync-list.sh
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/100112 Revision: 100112 Author: ariel Date: 2011-10-18 10:37:50 + (Tue, 18 Oct 2011) Log Message: --- generate rsync-friendly full listing of files we want mirrored Modified Paths: -- branches/ariel/xmldumps-backup/create-rsync-list.sh Modified: branches/ariel/xmldumps-backup/create-rsync-list.sh === --- branches/ariel/xmldumps-backup/create-rsync-list.sh 2011-10-18 09:49:08 UTC (rev 100111) +++ branches/ariel/xmldumps-backup/create-rsync-list.sh 2011-10-18 10:37:50 UTC (rev 100112) @@ -186,4 +186,4 @@ exit 1 fi - +/usr/bin/rsync --list-only --files-from=$outputfile $publicdir dummy $outputfile.rsync ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [100038] branches/ariel/xmldumps-backup/monitor.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/100038 Revision: 100038 Author: ariel Date: 2011-10-17 15:32:10 + (Mon, 17 Oct 2011) Log Message: --- fix check of argument existence Modified Paths: -- branches/ariel/xmldumps-backup/monitor.py Modified: branches/ariel/xmldumps-backup/monitor.py === --- branches/ariel/xmldumps-backup/monitor.py 2011-10-17 15:29:32 UTC (rev 100037) +++ branches/ariel/xmldumps-backup/monitor.py 2011-10-17 15:32:10 UTC (rev 100038) @@ -6,12 +6,6 @@ from os.path import exists from WikiDump import FileUtils -# can specify name of alternate config file -if (sys.argv[1]): - config = WikiDump.Config(sys.argv[1]) -else: - config = WikiDump.Config() - def generateIndex(): running = False states = [] @@ -52,4 +46,10 @@ os.rename(tempFilename, outputFileName) if __name__ == __main__: + # can specify name of alternate config file + if (len(sys.argv) 2): + config = WikiDump.Config(sys.argv[1]) + else: + config = WikiDump.Config() + updateIndex() ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [100043] branches/ariel/xmldumps-backup
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/100043 Revision: 100043 Author: ariel Date: 2011-10-17 15:59:10 + (Mon, 17 Oct 2011) Log Message: --- remove stray raise; use relative web paths in generated html Modified Paths: -- branches/ariel/xmldumps-backup/WikiDump.py branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/WikiDump.py === --- branches/ariel/xmldumps-backup/WikiDump.py 2011-10-17 15:57:47 UTC (rev 100042) +++ branches/ariel/xmldumps-backup/WikiDump.py 2011-10-17 15:59:10 UTC (rev 100043) @@ -95,7 +95,6 @@ size = os.path.getsize(path) return (timestamp, size) except: - raise return(None, None) fileAge = staticmethod(fileAge) @@ -429,6 +428,16 @@ def webDir(self): return /.join((self.config.webRoot, self.dbName)) + + def webDirRelative(self): + webRootRelative = self.webDir() + i = webRootRelative.find(://) + if i = 0: + webRootRelative = webRootRelative[i:] + i = webRootRelative.find(/) + if i = 0: + webRootRelative = webRootRelative[i:] + return webRootRelative # Actions! Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-10-17 15:57:47 UTC (rev 100042) +++ branches/ariel/xmldumps-backup/worker.py2011-10-17 15:59:10 UTC (rev 100043) @@ -897,6 +897,14 @@ dateString = self._wiki.date return os.path.join(self._wiki.webDir(), dateString, dumpFile.filename) + + def webPathRelative(self, dumpFile, dateString = None): + Given a DumpFilename object produce the url relative to the docroot for the filename for the date of + the dump for the selected database. + if (not dateString): + dateString = self._wiki.date + return os.path.join(self._wiki.webDirRelative(), dateString, dumpFile.filename) + def dirCacheOutdated(self, date): if not date: date = self._wiki.date @@ -1375,8 +1383,8 @@ if itemStatus == in-progress: return li class='file'%s %s (written) /li % (fileObj.filename, size) elif itemStatus == done: - webpath = self.dumpDir.webPath(fileObj) - return li class='file'a href=\%s\%s/a %s/li % (webpath, fileObj.filename, size) + webpathRelative = self.dumpDir.webPathRelative(fileObj) + return li class='file'a href=\%s\%s/a %s/li % (webpathRelative, fileObj.filename, size) else: return li class='missing'%s/li % fileObj.filename @@ -1423,7 +1431,7 @@ status: self._reportStatusSummaryLine(done), previous: self._reportPreviousDump(done), items: html, - checksum: self.dumpDir.webPath(f), + checksum: self.dumpDir.webPathRelative(f), index: self.wiki.config.index} def _reportPreviousDump(self, done): ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [100051] branches/ariel/xmldumps-backup/WikiDump.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/100051 Revision: 100051 Author: ariel Date: 2011-10-17 16:46:02 + (Mon, 17 Oct 2011) Log Message: --- and actually make relative path contruction work Modified Paths: -- branches/ariel/xmldumps-backup/WikiDump.py Modified: branches/ariel/xmldumps-backup/WikiDump.py === --- branches/ariel/xmldumps-backup/WikiDump.py 2011-10-17 16:31:26 UTC (rev 100050) +++ branches/ariel/xmldumps-backup/WikiDump.py 2011-10-17 16:46:02 UTC (rev 100051) @@ -427,16 +427,19 @@ return os.path.join(self.config.privateDir, self.dbName) def webDir(self): - return /.join((self.config.webRoot, self.dbName)) + webRoot = self.config.webRoot + if webRoot[-1] == '/': + webRoot = webRoot[:-1] + return /.join((webRoot, self.dbName)) def webDirRelative(self): webRootRelative = self.webDir() i = webRootRelative.find(://) if i = 0: - webRootRelative = webRootRelative[i:] + webRootRelative = webRootRelative[i+3:] i = webRootRelative.find(/) if i = 0: - webRootRelative = webRootRelative[i:] + webRootRelative = webRootRelative[i:] return webRootRelative # Actions! ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [100075] branches/ariel/xmldumps-backup/monitor.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/100075 Revision: 100075 Author: ariel Date: 2011-10-17 19:36:13 + (Mon, 17 Oct 2011) Log Message: --- count the number of arguments correctly. and don't fix bugs while jetlagged. Modified Paths: -- branches/ariel/xmldumps-backup/monitor.py Modified: branches/ariel/xmldumps-backup/monitor.py === --- branches/ariel/xmldumps-backup/monitor.py 2011-10-17 19:35:29 UTC (rev 100074) +++ branches/ariel/xmldumps-backup/monitor.py 2011-10-17 19:36:13 UTC (rev 100075) @@ -47,7 +47,7 @@ if __name__ == __main__: # can specify name of alternate config file - if (len(sys.argv) 2): + if (len(sys.argv) = 2): config = WikiDump.Config(sys.argv[1]) else: config = WikiDump.Config() ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [99704] branches/ariel/xmldumps-backup/incrementals/ generateincrementals.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/99704 Revision: 99704 Author: ariel Date: 2011-10-13 17:08:11 + (Thu, 13 Oct 2011) Log Message: --- actually dump something when we specify just one wiki as an option Modified Paths: -- branches/ariel/xmldumps-backup/incrementals/generateincrementals.py Modified: branches/ariel/xmldumps-backup/incrementals/generateincrementals.py === --- branches/ariel/xmldumps-backup/incrementals/generateincrementals.py 2011-10-13 17:06:03 UTC (rev 99703) +++ branches/ariel/xmldumps-backup/incrementals/generateincrementals.py 2011-10-13 17:08:11 UTC (rev 99704) @@ -261,6 +261,7 @@ if len(remainder) 0: dump = IncrDump(config, date, remainder[0], doStubs, doRevs, dryrun, verbose) +dump.doOneWiki() else: dump = IncrDumpLoop(config, date, doStubs, doRevs, dryrun, verbose) dump.doAllWikisTilDone(3) ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [99655] branches/ariel/xmldumps-backup
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/99655 Revision: 99655 Author: ariel Date: 2011-10-12 23:24:40 + (Wed, 12 Oct 2011) Log Message: --- initial checkin of adds/changes dumps Added Paths: --- branches/ariel/xmldumps-backup/incrementals/ branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py branches/ariel/xmldumps-backup/incrementals/README.config branches/ariel/xmldumps-backup/incrementals/README.txt branches/ariel/xmldumps-backup/incrementals/all.dblist branches/ariel/xmldumps-backup/incrementals/closed.dblist branches/ariel/xmldumps-backup/incrementals/dumpincr.conf.sample branches/ariel/xmldumps-backup/incrementals/generateincrementals.py branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py branches/ariel/xmldumps-backup/incrementals/incrmonitor branches/ariel/xmldumps-backup/incrementals/incrmonitor.py branches/ariel/xmldumps-backup/incrementals/incrs-index.html branches/ariel/xmldumps-backup/incrementals/private.dblist Added: branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py === --- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py (rev 0) +++ branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py 2011-10-12 23:24:40 UTC (rev 99655) @@ -0,0 +1,390 @@ +# shared classes for incrementals +import os +import sys +import re +import ConfigParser +import WikiDump +from WikiDump import FileUtils, TimeUtils, MiscUtils +from os.path import exists +import socket +import subprocess +from subprocess import Popen, PIPE + +class ContentFile(object): +def __init__(self, config, date, wikiName): +self._config = config +self.date = date +self.incrDir = IncrementDir(self._config, date) +self.wikiName = wikiName + +# override this. +def getFileName(self): +return content.txt + +def getPath(self): +return os.path.join(self.incrDir.getIncDir(self.wikiName),self.getFileName()) + +def getFileInfo(self): +return FileUtils.fileInfo(self.getPath()) + +class MaxRevIDFile(ContentFile): +def getFileName(self): +return maxrevid.txt + +class StubFile(ContentFile): +def getFileName(self): +return %s-%s-stubs-meta-hist-incr.xml.gz % ( self.wikiName, self.date ) + +class RevsFile(ContentFile): +def getFileName(self): +return %s-%s-pages-meta-hist-incr.xml.bz2 % ( self.wikiName, self.date ) + +class StatusFile(ContentFile): +def getFileName(self): +return status.txt + +def getPath(self, date = None): +return os.path.join(self.incrDir.getIncDir(self.wikiName, date),self.getFileName()) + +class LockFile(ContentFile): +def getFileName(self): +return %s-%s.lock % ( self.wikiName, self.date ) + +def getPath(self): +return os.path.join(self.incrDir.getIncDirNoDate(self.wikiName),self.getFileName()) + +class MaxRevIDLockFile(LockFile): +def getFileName(self): +return %s-%s-maxrevid.lock % ( self.wikiName, self.date ) + +class IncrDumpLockFile(LockFile): +def getFileName(self): +return %s-%s-incrdump.lock % ( self.wikiName, self.date ) + +class MD5File(ContentFile): +def getFileName(self): +return %s-%s-md5sums.txt % ( self.wikiName, self.date ) + +class IndexFile(ContentFile): +def __init__(self, config): +self._config = config +self.incrDir = IncrementDir(self._config) + +def getFileName(self): +return index.html + +def getPath(self): +return os.path.join(self.incrDir.getIncDirBase(),self.getFileName()) + +class StatusInfo(object): +def __init__(self, config, date, wikiName): +self._config = config +self.date = date +self.wikiName = wikiName +self.statusFile = StatusFile(self._config, self.date, self.wikiName) + +def getStatus(self, date = None): +if exists(self.statusFile.getPath(date)): +status = FileUtils.readFile(self.statusFile.getPath(date)).rstrip() +if status == done: +return True +return False + +def setStatus(self, status): +FileUtils.writeFileInPlace(self.statusFile.getPath(),status, self._config.fileperms) + +class Lock(object): +def __init__(self, config, date, wikiName): +self._config = config +self.date = date +self.wikiName = wikiName +self.lockFile = LockFile(self._config, self.date, self.wikiName) + +def isLocked(self): +return exists(self.lockFile.getPath()) + +def getLock(self): +try: +if not exists(self._config.incrementalsDir): +os.makedirs(self._config.incrementalsDir) +f = FileUtils.atomicCreate(self.lockFile.getPath(), w) +f.write(%s %d % (socket.getfqdn(), os.getpid())) +f.close() +return True
[MediaWiki-CVS] SVN: [99435] branches/ariel/xmldumps-backup/WikiDump.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/99435 Revision: 99435 Author: ariel Date: 2011-10-10 21:47:20 + (Mon, 10 Oct 2011) Log Message: --- function for file size and date Modified Paths: -- branches/ariel/xmldumps-backup/WikiDump.py Modified: branches/ariel/xmldumps-backup/WikiDump.py === --- branches/ariel/xmldumps-backup/WikiDump.py 2011-10-10 21:36:52 UTC (rev 99434) +++ branches/ariel/xmldumps-backup/WikiDump.py 2011-10-10 21:47:20 UTC (rev 99435) @@ -87,6 +87,17 @@ else: return FileUtils._prettySize(size / 1024.0, quanta[1:]) + def fileInfo(path): + Return a tuple of date/time and size of a file, or None, None + try: + timestamp = time.gmtime(os.stat(path).st_mtime) + timestamp = time.strftime(%Y-%m-%d %H:%M:%S,timestamp) + size = os.path.getsize(path) + return (timestamp, size) + except: + raise + return(None, None) + fileAge = staticmethod(fileAge) atomicCreate = staticmethod(atomicCreate) writeFile = staticmethod(writeFile) @@ -96,6 +107,7 @@ relativePath = staticmethod(relativePath) prettySize = staticmethod(prettySize) _prettySize = staticmethod(_prettySize) + fileInfo = staticmethod(fileInfo) class TimeUtils(object): ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [99203] branches/ariel/xmldumps-backup/worker.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/99203 Revision: 99203 Author: ariel Date: 2011-10-07 09:36:16 + (Fri, 07 Oct 2011) Log Message: --- last het deploy fix (for active abstract filter); remove some comment cruft; clean up old symlinks in latest only for the given job; don't barf with exception if we are asked to get files from dump dir that hasn't been created Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-10-07 09:26:00 UTC (rev 99202) +++ branches/ariel/xmldumps-backup/worker.py2011-10-07 09:36:16 UTC (rev 99203) @@ -179,14 +179,26 @@ return( .join(MultiVersion.MWScriptAsArray(config, maintenanceScript))) def MWScriptAsArray(config, maintenanceScript): - MWScriptLocation = os.path.join(config.wikiDir,..,multiversion,MWScript.php) + MWScriptLocation = os.path.join(config.wikiDir,multiversion,MWScript.php) if exists(MWScriptLocation): return [ MWScriptLocation, maintenanceScript ] else: return [ %s/maintenance/%s % (config.wikiDir, maintenanceScript) ] + def MWVersion(config, dbName): + getVersionLocation = os.path.join(config.wikiDir,multiversion,getMWVersion) + if exists(getVersionLocation): + # run the command for the wiki and get the version + command = getVersionLocation + + dbName + version = RunSimpleCommand.runAndReturn(command) + if version: + version = version.rstrip() + return version + return None + MWScriptAsString = staticmethod(MWScriptAsString) MWScriptAsArray = staticmethod(MWScriptAsArray) + MWVersion = staticmethod(MWVersion) class DbServerInfo(object): def __init__(self, wiki, dbName, errorCallback = None): @@ -200,7 +212,6 @@ if (not exists( self.wiki.config.php ) ): raise BackupError(php command %s not found % self.wiki.config.php) commandList = MultiVersion.MWScriptAsArray(self.wiki.config, getSlaveServer.php) -# command = %s -q %s/maintenance/getSlaveServer.php --wiki=%s --group=dump % MiscUtils.shellEscape(( for i in range(0,len(commandList)): phpCommand = MiscUtils.shellEscape(self.wiki.config.php) dbName = MiscUtils.shellEscape(self.dbName) @@ -267,7 +278,6 @@ if (not exists( self.wiki.config.php ) ): raise BackupError(php command %s not found % self.wiki.config.php) commandList = MultiVersion.MWScriptAsArray(self.wiki.config, eval.php) -# command = echo 'print $wgDBprefix; ' | %s -q %s/maintenance/eval.php --wiki=%s % MiscUtils.shellEscape(( for i in range(0,len(commandList)): phpCommand = MiscUtils.shellEscape(self.wiki.config.php) dbName = MiscUtils.shellEscape(self.dbName) @@ -891,11 +901,14 @@ if not date: date = self._wiki.date directory = os.path.join(self._wiki.publicDir(), date) - dirTimeStamp = os.stat(directory).st_mtime - if (not date in self._dirCache or dirTimeStamp self._dirCacheTime[date]): + if exists(directory): + dirTimeStamp = os.stat(directory).st_mtime + if (not date in self._dirCache or dirTimeStamp self._dirCacheTime[date]): + return True + else: + return False + else: return True - else: - return False # warning: date can also be latest def getFilesInDir(self, date = None): @@ -903,15 +916,18 @@ date = self._wiki.date if (self.dirCacheOutdated(date)): directory = os.path.join(self._wiki.publicDir(),date) - dirTimeStamp = os.stat(directory).st_mtime - files = os.listdir(directory) - fileObjs = [] - for f in files: - fileObj = DumpFilename(self._wiki) - fileObj.newFromFilename(f) - fileObjs.append(fileObj) - self._dirCache[date] = fileObjs - self._dirCacheTime[date] = dirTimeStamp + if exists(directory): + dirTimeStamp = os.stat
[MediaWiki-CVS] SVN: [98214] branches/ariel/xmldumps-backup
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/98214 Revision: 98214 Author: ariel Date: 2011-09-27 09:16:37 + (Tue, 27 Sep 2011) Log Message: --- allow for self-termination via 'maintenance mode' Modified Paths: -- branches/ariel/xmldumps-backup/monitor.py branches/ariel/xmldumps-backup/worker branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/monitor.py === --- branches/ariel/xmldumps-backup/monitor.py 2011-09-27 08:51:19 UTC (rev 98213) +++ branches/ariel/xmldumps-backup/monitor.py 2011-09-27 09:16:37 UTC (rev 98214) @@ -3,6 +3,8 @@ import os import sys import WikiDump +from os.path import exists +from WikiDump import FileUtils # can specify name of alternate config file if (sys.argv[1]): @@ -32,6 +34,8 @@ if running: status = Dumps are in progress... + elif exists(maintenance.txt): + status = FileUtils.readFile(maintenance.txt) else: status = Dump process is idle. Modified: branches/ariel/xmldumps-backup/worker === --- branches/ariel/xmldumps-backup/worker 2011-09-27 08:51:19 UTC (rev 98213) +++ branches/ariel/xmldumps-backup/worker 2011-09-27 09:16:37 UTC (rev 98214) @@ -7,11 +7,16 @@ fi while true; do -if [ ! -z $configFile ]; then - python $WIKIDUMP_BASE/worker.py --configfile $configFile +if [ -e maintenance.txt ]; then + echo in maintenance mode, sleeping 5 minutes + sleep 300 else - python $WIKIDUMP_BASE/worker.py + if [ ! -z $configFile ]; then + python $WIKIDUMP_BASE/worker.py --configfile $configFile + else + python $WIKIDUMP_BASE/worker.py + fi + echo sleeping + sleep 30 fi -echo sleeping -sleep 30 done Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-09-27 08:51:19 UTC (rev 98213) +++ branches/ariel/xmldumps-backup/worker.py2011-09-27 09:16:37 UTC (rev 98214) @@ -24,6 +24,26 @@ from WikiDump import FileUtils, MiscUtils, TimeUtils from CommandManagement import CommandPipeline, CommandSeries, CommandsInParallel +class Maintenance(object): + + def inMaintenanceMode(): + Use this to let callers know that we really should not + be running. Callers should try to exit the job + they are running as soon as possible. + return exists(maintenance.txt) + + def exitIfInMaintenanceMode(message = None): + Call this from possible exit points of running jobs + in order to exit if we need to + if Maintenance.inMaintenanceMode(): + if message: + raise BackupError(message) + else: + raise BackupError(In maintenance mode, exiting.) + + inMaintenanceMode = staticmethod(inMaintenanceMode) + exitIfInMaintenanceMode = staticmethod(exitIfInMaintenanceMode) + class Logger(object): def __init__(self, logFileName=None): @@ -1735,6 +1755,8 @@ # mark all the following jobs to run as well self.dumpItemList.markFollowingJobsToRun() + Maintenance.exitIfInMaintenanceMode(In maintenance mode, exiting dump of %s % self.dbName ) + self.makeDir(os.path.join(self.wiki.publicDir(), self.wiki.date)) self.makeDir(os.path.join(self.wiki.privateDir(), self.wiki.date)) @@ -1752,6 +1774,8 @@ for item in self.dumpItemList.dumpItems: if (item.toBeRun()): + Maintenance.exitIfInMaintenanceMode(In maintenance mode, exiting dump of %s at step %s % ( self.dbName, self.jobRequested ) ) + item.start(self) self.status.updateStatusFiles() self.runInfoFile.saveDumpRunInfoFile(self.dumpItemList.reportDumpRunInfo()) ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [98215] branches/ariel/xmldumps-backup
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/98215 Revision: 98215 Author: ariel Date: 2011-09-27 10:12:36 + (Tue, 27 Sep 2011) Log Message: --- quit generating dumps after n failures in a row (n = 3 for now), we probably have some serious problem Modified Paths: -- branches/ariel/xmldumps-backup/worker branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker === --- branches/ariel/xmldumps-backup/worker 2011-09-27 09:16:37 UTC (rev 98214) +++ branches/ariel/xmldumps-backup/worker 2011-09-27 10:12:36 UTC (rev 98215) @@ -1,5 +1,10 @@ #!/bin/bash +# number of failures of worker.py in a row before we decide +# something serious is broken and we refuse to run +MAXFAILS=3 +failures=0 + WIKIDUMP_BASE=`dirname $0` if [ ! -z $1 ]; then @@ -7,7 +12,7 @@ fi while true; do -if [ -e maintenance.txt ]; then +if [ -e $WIKIDUMP_BASE/maintenance.txt ]; then echo in maintenance mode, sleeping 5 minutes sleep 300 else @@ -16,6 +21,13 @@ else python $WIKIDUMP_BASE/worker.py fi + if [ $? -ne 0 ]; then + failures=$(($failures+1)) + if [ $failures -gt $MAXFAILS ]; then + echo more than $MAXFAILS failures in a row, halting. + exit 1 + fi + fi echo sleeping sleep 30 fi Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-09-27 09:16:37 UTC (rev 98214) +++ branches/ariel/xmldumps-backup/worker.py2011-09-27 10:12:36 UTC (rev 98215) @@ -1832,6 +1832,12 @@ self.showRunnerStateComplete() + # let caller know if this was a successful run + if self.status.failCount 0: + return False + else: + return True + def cleanOldDumps(self): if self._cleanOldDumpsEnabled: old = self.wiki.dumpDirs() @@ -3706,6 +3712,7 @@ chunkToDo = False checkpointFile = None pageIDRange = None + result = False try: (options, remainder) = getopt.gnu_getopt(sys.argv[1:], , @@ -3806,11 +3813,16 @@ print Running %s, job %s... % (wiki.dbName, jobRequested) else: print Running %s... % wiki.dbName - runner.run() + result = runner.run() # if we are doing one piece only of the dump, we don't unlock either if locksEnabled: wiki.unlock() else: print No wikis available to run. + result = True finally: WikiDump.cleanup() + if result == False: + sys.exit(1) + else: + sys.exit(0) ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [98115] branches/ariel/xmldumps-backup/dumpcentralauth.sh
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/98115 Revision: 98115 Author: ariel Date: 2011-09-26 07:05:23 + (Mon, 26 Sep 2011) Log Message: --- fix path for db.php; add mwscript wrapper Modified Paths: -- branches/ariel/xmldumps-backup/dumpcentralauth.sh Modified: branches/ariel/xmldumps-backup/dumpcentralauth.sh === --- branches/ariel/xmldumps-backup/dumpcentralauth.sh 2011-09-26 06:24:55 UTC (rev 98114) +++ branches/ariel/xmldumps-backup/dumpcentralauth.sh 2011-09-26 07:05:23 UTC (rev 98115) @@ -16,9 +16,13 @@ echo exiting... exit 1 fi -dbcluster=`grep centralauth /apache/common/php/wmf-config/db.php | awk -F' ' { print $4 }'` -wiki=`grep $dbcluster /apache/common/php/wmf-config/db.php | grep wiki | head -1 | awk -F' ' { print $2 }'` -host=`echo 'echo wfGetLB()-getServerName(0);' | php /apache/common/php/maintenance/eval.php $wiki` +if [ ! -f /apache/common/wmf-config/db.php ]; then + echo failed to find db.php, exiting... + exit 1 +fi +dbcluster=`grep centralauth /apache/common/wmf-config/db.php | awk -F' ' { print $4 }'` +wiki=`grep $dbcluster /apache/common/wmf-config/db.php | grep wiki | head -1 | awk -F' ' { print $2 }'` +host=`echo 'echo wfGetLB()-getServerName(0);' | php /apache/common/multiversion/MWScript.php eval.php $wiki` if [ -z $dbcluster -o -z $wiki -o -z $host ]; then echo can't locate db server for centralauth, exiting. exit 1 ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [97889] trunk/phase3/maintenance/dumpTextPass.php
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97889 Revision: 97889 Author: ariel Date: 2011-09-23 06:15:20 + (Fri, 23 Sep 2011) Log Message: --- handle naming checkpoint file with first/last pageID when the file is empty Modified Paths: -- trunk/phase3/maintenance/dumpTextPass.php Modified: trunk/phase3/maintenance/dumpTextPass.php === --- trunk/phase3/maintenance/dumpTextPass.php 2011-09-23 05:50:11 UTC (rev 97888) +++ trunk/phase3/maintenance/dumpTextPass.php 2011-09-23 06:15:20 UTC (rev 97889) @@ -286,8 +286,19 @@ // we wrote some stuff after last checkpoint that needs renamed if (file_exists($filenameList[0])) { $newFilenames = array(); - $firstPageID = str_pad($this-firstPageWritten,9,0,STR_PAD_LEFT); - $lastPageID = str_pad($this-lastPageWritten,9,0,STR_PAD_LEFT); + # we might have just written the header and footer and had no + # pages or revisions written... perhaps they were all deleted + # there's no pageID 0 so we use that. the caller is responsible + # for deciding what to do with a file containing only the + # siteinfo information and the mw tags. + if (! $this-firstPageWritten) { + $firstPageID = str_pad(0,9,0,STR_PAD_LEFT); + $lastPageID = str_pad(0,9,0,STR_PAD_LEFT); + } + else { + $firstPageID = str_pad($this-firstPageWritten,9,0,STR_PAD_LEFT); + $lastPageID = str_pad($this-lastPageWritten,9,0,STR_PAD_LEFT); + } for ( $i = 0; $i count( $filenameList ); $i++ ) { $checkpointNameFilledIn = sprintf( $this-checkpointFiles[$i], $firstPageID, $lastPageID ); $fileinfo = pathinfo($filenameList[$i]); ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [97895] trunk/phase3/maintenance/dumpTextPass.php
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97895 Revision: 97895 Author: ariel Date: 2011-09-23 07:48:30 + (Fri, 23 Sep 2011) Log Message: --- add mwscript handling for call of fetchText.php maintenance script Modified Paths: -- trunk/phase3/maintenance/dumpTextPass.php Modified: trunk/phase3/maintenance/dumpTextPass.php === --- trunk/phase3/maintenance/dumpTextPass.php 2011-09-23 07:32:41 UTC (rev 97894) +++ trunk/phase3/maintenance/dumpTextPass.php 2011-09-23 07:48:30 UTC (rev 97895) @@ -427,12 +427,23 @@ function openSpawn() { global $IP; - $cmd = implode( , - array_map( 'wfEscapeShellArg', - array( - $this-php, - $IP/maintenance/fetchText.php, - '--wiki', wfWikiID() ) ) ); + if ( file_exists( $IP/../multiversion/MWScript.php ) ) { + $cmd = implode( , + array_map( 'wfEscapeShellArg', + array( + $this-php, + $IP/../multiversion/MWScript.php, + fetchText.php, + '--wiki', wfWikiID() ) ) ); + } + else { + $cmd = implode( , + array_map( 'wfEscapeShellArg', + array( + $this-php, + $IP/maintenance/fetchText.php, + '--wiki', wfWikiID() ) ) ); + } $spec = array( 0 = array( pipe, r ), 1 = array( pipe, w ), ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [97897] branches/wmf/1.17wmf1/maintenance/dumpTextPass.php
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97897 Revision: 97897 Author: ariel Date: 2011-09-23 08:16:40 + (Fri, 23 Sep 2011) Log Message: --- mft r97895 (call MWScript for invocation of fetchText.php, if needed) Modified Paths: -- branches/wmf/1.17wmf1/maintenance/dumpTextPass.php Modified: branches/wmf/1.17wmf1/maintenance/dumpTextPass.php === --- branches/wmf/1.17wmf1/maintenance/dumpTextPass.php 2011-09-23 08:07:28 UTC (rev 97896) +++ branches/wmf/1.17wmf1/maintenance/dumpTextPass.php 2011-09-23 08:16:40 UTC (rev 97897) @@ -307,12 +307,23 @@ function openSpawn() { global $IP; - $cmd = implode( , - array_map( 'wfEscapeShellArg', - array( - $this-php, - $IP/maintenance/fetchText.php, - '--wiki', wfWikiID() ) ) ); + if ( file_exists( $IP/../multiversion/MWScript.php ) ) { + $cmd = implode( , + array_map( 'wfEscapeShellArg', + array( + $this-php, + $IP/../multiversion/MWScript.php, + fetchText.php, + '--wiki', wfWikiID() ) ) ); + } + else { + $cmd = implode( , + array_map( 'wfEscapeShellArg', + array( + $this-php, + $IP/maintenance/fetchText.php, + '--wiki', wfWikiID() ) ) ); + } $spec = array( 0 = array( pipe, r ), 1 = array( pipe, w ), ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [97898] branches/wmf/1.18wmf1/maintenance/dumpTextPass.php
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97898 Revision: 97898 Author: ariel Date: 2011-09-23 08:17:19 + (Fri, 23 Sep 2011) Log Message: --- mft r97895 (call MWScript for invocation of fetchText.php, if needed) Modified Paths: -- branches/wmf/1.18wmf1/maintenance/dumpTextPass.php Modified: branches/wmf/1.18wmf1/maintenance/dumpTextPass.php === --- branches/wmf/1.18wmf1/maintenance/dumpTextPass.php 2011-09-23 08:16:40 UTC (rev 97897) +++ branches/wmf/1.18wmf1/maintenance/dumpTextPass.php 2011-09-23 08:17:19 UTC (rev 97898) @@ -422,12 +422,23 @@ function openSpawn() { global $IP; - $cmd = implode( , - array_map( 'wfEscapeShellArg', - array( - $this-php, - $IP/maintenance/fetchText.php, - '--wiki', wfWikiID() ) ) ); + if ( file_exists( $IP/../multiversion/MWScript.php ) ) { + $cmd = implode( , + array_map( 'wfEscapeShellArg', + array( + $this-php, + $IP/../multiversion/MWScript.php, + fetchText.php, + '--wiki', wfWikiID() ) ) ); + } + else { + $cmd = implode( , + array_map( 'wfEscapeShellArg', + array( + $this-php, + $IP/maintenance/fetchText.php, + '--wiki', wfWikiID() ) ) ); + } $spec = array( 0 = array( pipe, r ), 1 = array( pipe, w ), ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [97946] branches/ariel/xmldumps-backup/worker.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97946 Revision: 97946 Author: ariel Date: 2011-09-23 18:56:51 + (Fri, 23 Sep 2011) Log Message: --- fold in mwscript wrapper for calls to various php maintenance scripts Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-09-23 18:25:10 UTC (rev 97945) +++ branches/ariel/xmldumps-backup/worker.py2011-09-23 18:56:51 UTC (rev 97946) @@ -154,6 +154,20 @@ return 0 return chunks +class MultiVersion(object): + def MWScriptAsString(config, maintenanceScript): + return( .join(MultiVersion.MWScriptAsArray(config, maintenanceScript))) + + def MWScriptAsArray(config, maintenanceScript): + MWScriptLocation = os.path.join(config.wikiDir,..,multiversion,MWScript.php) + if exists(MWScriptLocation): + return [ MWScriptLocation, maintenanceScript ] + else: + return [ %s/maintenance/%s % (config.wikiDir, maintenanceScript) ] + + MWScriptAsString = staticmethod(MWScriptAsString) + MWScriptAsArray = staticmethod(MWScriptAsArray) + class DbServerInfo(object): def __init__(self, wiki, dbName, errorCallback = None): self.wiki = wiki @@ -165,8 +179,14 @@ # if this fails what do we do about it? Not a bleeping thing. *ugh* FIXME!! if (not exists( self.wiki.config.php ) ): raise BackupError(php command %s not found % self.wiki.config.php) - command = %s -q %s/maintenance/getSlaveServer.php --wiki=%s --group=dump % MiscUtils.shellEscape(( - self.wiki.config.php, self.wiki.config.wikiDir, self.dbName)) + commandList = MultiVersion.MWScriptAsArray(self.wiki.config, getSlaveServer.php) +# command = %s -q %s/maintenance/getSlaveServer.php --wiki=%s --group=dump % MiscUtils.shellEscape(( + for i in range(0,len(commandList)): + phpCommand = MiscUtils.shellEscape(self.wiki.config.php) + dbName = MiscUtils.shellEscape(self.dbName) + commandList[i] = MiscUtils.shellEscape(commandList[i]) + command = .join(commandList) + command = %s -q %s --wiki=%s --group=dump % (phpCommand, command, dbName) return RunSimpleCommand.runAndReturn(command, self.errorCallback).strip() def selectDatabaseServer(self): @@ -226,8 +246,14 @@ # FIXME later full path if (not exists( self.wiki.config.php ) ): raise BackupError(php command %s not found % self.wiki.config.php) - command = echo 'print $wgDBprefix; ' | %s -q %s/maintenance/eval.php --wiki=%s % MiscUtils.shellEscape(( - self.wiki.config.php, self.wiki.config.wikiDir, self.dbName)) + commandList = MultiVersion.MWScriptAsArray(self.wiki.config, eval.php) +# command = echo 'print $wgDBprefix; ' | %s -q %s/maintenance/eval.php --wiki=%s % MiscUtils.shellEscape(( + for i in range(0,len(commandList)): + phpCommand = MiscUtils.shellEscape(self.wiki.config.php) + dbName = MiscUtils.shellEscape(self.dbName) + commandList[i] = MiscUtils.shellEscape(commandList[i]) + command = .join(commandList) + command = echo 'print $wgDBprefix; ' | %s -q %s --wiki=%s % (phpCommand, command, dbName) return RunSimpleCommand.runAndReturn(command, self.errorCallback).strip() @@ -2637,15 +2663,17 @@ articlesFile = runner.dumpDir.filenamePublicPath(f) historyFile = runner.dumpDir.filenamePublicPath(DumpFilename(runner.wiki, f.date, self.historyDumpName, f.fileType, f.fileExt, f.chunk, f.checkpoint, f.temp)) currentFile = runner.dumpDir.filenamePublicPath(DumpFilename(runner.wiki, f.date, self.currentDumpName, f.fileType, f.fileExt, f.chunk, f.checkpoint, f.temp)) - command = [ %s % runner.wiki.config.php, - -q, %s/maintenance/dumpBackup.php % runner.wiki.config.wikiDir, - --wiki=%s % runner.dbName, - --full, --stub, --report=1, - %s % runner.forceNormalOption(), - --output=gzip:%s % historyFile, - --output=gzip:%s % currentFile, - --filter=latest, --output=gzip:%s % articlesFile, - --filter=latest, --filter
[MediaWiki-CVS] SVN: [97951] branches/ariel/xmldumps-backup/monitor.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97951 Revision: 97951 Author: ariel Date: 2011-09-23 20:15:18 + (Fri, 23 Sep 2011) Log Message: --- replace call to removed function with a few lines of inline code Modified Paths: -- branches/ariel/xmldumps-backup/monitor.py Modified: branches/ariel/xmldumps-backup/monitor.py === --- branches/ariel/xmldumps-backup/monitor.py 2011-09-23 20:09:54 UTC (rev 97950) +++ branches/ariel/xmldumps-backup/monitor.py 2011-09-23 20:15:18 UTC (rev 97951) @@ -41,7 +41,11 @@ def updateIndex(): outputFileName = os.path.join(config.publicDir, config.index) - WikiDump.dumpFile(outputFileName, generateIndex()) + tempFilename = outputFileName + .tmp + file = open(tempFilename, wt) + file.write(generateIndex()) + file.close() + os.rename(tempFilename, outputFileName) if __name__ == __main__: updateIndex() ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [97409] trunk/backup/legal.html
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97409 Revision: 97409 Author: ariel Date: 2011-09-18 08:02:53 + (Sun, 18 Sep 2011) Log Message: --- add CC-BY_SA license for text, plus pointer to terms of use Modified Paths: -- trunk/backup/legal.html Modified: trunk/backup/legal.html === --- trunk/backup/legal.html 2011-09-18 05:43:40 UTC (rev 97408) +++ trunk/backup/legal.html 2011-09-18 08:02:53 UTC (rev 97409) @@ -11,7 +11,9 @@ h1Copyright and license/h1 pAll original textual content except Wikinews original textual content is licensed under the a href=http://www.wikipedia.org/wiki/Wikipedia:Copyrights; title=Wikipedia Copyrights -GNU Free Documentation License/a (GFDL). +GNU Free Documentation License/a (GFDL) and the +a href=http://creativecommons.org/licenses/by-sa/3.0/; title=Creative Commons Attribution-Share-Alike 3.0 LicenseCreative Commons Attribution-Share-Alike 3.0 License/a. Some text may be available only under the Creative Commons license; see our +a href=http://wikimediafoundation.org/wiki/Terms_of_use;Terms of Use/a for details. Text written by some authors may be released under additional licenses or into the public domain. Some text (including quotations) may be used under fair use, usually where it is believed that the use ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [97410] branches/ariel/xmldumps-backup/legal.html
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97410 Revision: 97410 Author: ariel Date: 2011-09-18 08:04:10 + (Sun, 18 Sep 2011) Log Message: --- add CC-BY_SA license for text, plus pointer to terms of use (mft r97409) Modified Paths: -- branches/ariel/xmldumps-backup/legal.html Modified: branches/ariel/xmldumps-backup/legal.html === --- branches/ariel/xmldumps-backup/legal.html 2011-09-18 08:02:53 UTC (rev 97409) +++ branches/ariel/xmldumps-backup/legal.html 2011-09-18 08:04:10 UTC (rev 97410) @@ -11,7 +11,9 @@ h1Copyright and license/h1 pAll original textual content except Wikinews original textual content is licensed under the a href=http://www.wikipedia.org/wiki/Wikipedia:Copyrights; title=Wikipedia Copyrights -GNU Free Documentation License/a (GFDL). +GNU Free Documentation License/a (GFDL) and the +a href=http://creativecommons.org/licenses/by-sa/3.0/; title=Creative Commons Attribution-Share-Alike 3.0 LicenseCreative Commons Attribution-Share-Alike 3.0 License/a. Some text may be available only under the Creative Commons license; see our +a href=http://wikimediafoundation.org/wiki/Terms_of_use;Terms of Use/a for details. Text written by some authors may be released under additional licenses or into the public domain. Some text (including quotations) may be used under fair use, usually where it is believed that the use ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [97245] branches/ariel/xmldumps-backup/worker.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97245 Revision: 97245 Author: ariel Date: 2011-09-16 07:40:24 + (Fri, 16 Sep 2011) Log Message: --- redo checkpoint file for history 7z step Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-09-16 06:51:27 UTC (rev 97244) +++ branches/ariel/xmldumps-backup/worker.py2011-09-16 07:40:24 UTC (rev 97245) @@ -609,7 +609,7 @@ metahistory7zdump, All pages with complete edit history (.7z), These dumps can be *very* large, uncompressing up to 100 times the archive download size. + - Suitable for archival and statistical use, most mirror sites won't want or need this., self.findItemByName('metahistorybz2dump'), self.wiki, self._getChunkToDo(metahistory7zdump), self.chunkInfo.getPagesPerChunkHistory(), self.checkpointFile)) + Suitable for archival and statistical use, most mirror sites won't want or need this., self.findItemByName('metahistorybz2dump'), self.wiki, self._getChunkToDo(metahistory7zdump), self.chunkInfo.getPagesPerChunkHistory(), checkpoints, self.checkpointFile)) if (self.chunkInfo.chunksEnabled() and self.chunkInfo.recombineHistory()): self.dumpItems.append( RecombineXmlRecompressDump(metahistory7zdumprecombine, @@ -1814,9 +1814,7 @@ # of that very file. meh. how likely is it that we # have one? these files are time based and the start/end pageids # are going to fluctuate. whatever - cf = DumpFilename(self.wiki) - cf.newFromFilename(item.checkpointFile) - checkpoint = cf.checkpoint + checkpoint = item.checkpointFile.checkpoint for d in dumpNames: self.symLinks.removeSymLinksFromOldRuns(self.wiki.date, d, chunk, checkpoint ) @@ -3135,7 +3133,7 @@ class XmlRecompressDump(Dump): Take a .bz2 and recompress it as 7-Zip. - def __init__(self, subset, name, desc, detail, itemForRecompression, wiki, chunkToDo, chunks = False, checkpoints = False): + def __init__(self, subset, name, desc, detail, itemForRecompression, wiki, chunkToDo, chunks = False, checkpoints = False, checkpointFile = None): self._subset = subset self._detail = detail self._chunks = chunks @@ -3146,6 +3144,7 @@ self.itemForRecompression = itemForRecompression if checkpoints: self._checkpointsEnabled = True + self.checkpointFile = checkpointFile Dump.__init__(self, name, desc) def getDumpName(self): @@ -3182,7 +3181,11 @@ commands = [] # Remove prior 7zip attempts; 7zip will try to append to an existing archive self.cleanupOldFiles(runner.dumpDir) - if self._chunksEnabled and not self._chunkToDo: + if self.checkpointFile: + outputFile = DumpFilename(self.wiki, None, self.checkpointFile.dumpName, self.checkpointFile.fileType, self.fileExt, self.checkpointFile.chunk, self.checkpointFile.checkpoint) + series = self.buildCommand(runner, [ outputFile ]) + commands.append(series) + elif self._chunksEnabled and not self._chunkToDo: # must set up each parallel job separately, they may have checkpoint files that # need to be processed in series, it's a special case for i in range(1, len(self._chunks)+1): ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [97295] branches/ariel/xmldumps-backup
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97295 Revision: 97295 Author: ariel Date: 2011-09-16 15:44:53 + (Fri, 16 Sep 2011) Log Message: --- fix typo in error message; make ListOutputFilesToPublish for xml stub recombine step work; rerun xml dump step from given pageid (poorly); remove commented out try stanzas frmo debugging Modified Paths: -- branches/ariel/xmldumps-backup/worker.py branches/ariel/xmldumps-backup/writeuptopageid.c Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-09-16 15:24:57 UTC (rev 97294) +++ branches/ariel/xmldumps-backup/worker.py2011-09-16 15:44:53 UTC (rev 97295) @@ -485,7 +485,7 @@ self._toBeRun = toBeRun class DumpItemList(object): - def __init__(self, wiki, prefetch, spawn, chunkToDo, checkpointFile, singleJob, chunkInfo, runInfoFile, dumpDir): + def __init__(self, wiki, prefetch, spawn, chunkToDo, checkpointFile, singleJob, chunkInfo, pageIDRange, runInfoFile, dumpDir): self.wiki = wiki self._hasFlaggedRevs = self.wiki.hasFlaggedRevs() self._prefetch = prefetch @@ -496,6 +496,8 @@ self._singleJob = singleJob self._runInfoFile = runInfoFile self.dumpDir = dumpDir + self.pageIDRange = pageIDRange + if self.wiki.config.checkpointTime: checkpoints = True else: @@ -570,7 +572,7 @@ XmlDump(articles, articlesdump, bigbArticles, templates, image descriptions, and primary meta-pages./b/big, - This contains current versions of article content, and is the archive most mirror sites will probably want., self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, self._getChunkToDo(articlesdump), self.chunkInfo.getPagesPerChunkHistory(), checkpoints, self.checkpointFile)) + This contains current versions of article content, and is the archive most mirror sites will probably want., self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, self._getChunkToDo(articlesdump), self.chunkInfo.getPagesPerChunkHistory(), checkpoints, self.checkpointFile, self.pageIDRange)) if (self.chunkInfo.chunksEnabled()): self.dumpItems.append(RecombineXmlDump(articlesdumprecombine, bigbRecombine articles, templates, image descriptions, and primary meta-pages./b/big,This contains current versions of article content, and is the archive most mirror sites will probably want., self.findItemByName('articlesdump'))) @@ -578,7 +580,7 @@ XmlDump(meta-current, metacurrentdump, All pages, current versions only., - Discussion and user pages are included in this complete archive. Most mirrors won't want this extra material., self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, self._getChunkToDo(metacurrentdump), self.chunkInfo.getPagesPerChunkHistory(), checkpoints, self.checkpointFile)) + Discussion and user pages are included in this complete archive. Most mirrors won't want this extra material., self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, self._getChunkToDo(metacurrentdump), self.chunkInfo.getPagesPerChunkHistory(), checkpoints, self.checkpointFile, self.pageIDRange)) if (self.chunkInfo.chunksEnabled()): self.dumpItems.append(RecombineXmlDump(metacurrentdumprecombine, Recombine all pages, current versions only.,Discussion and user pages are included in this complete archive. Most mirrors won't want this extra material., self.findItemByName('metacurrentdump'))) @@ -597,7 +599,7 @@ metahistorybz2dump, All pages with complete page edit history (.bz2), These dumps can be *very* large, uncompressing up to 20 times the archive download size. + - Suitable for archival and statistical use, most mirror sites won't want or need this., self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, self._getChunkToDo(metahistorybz2dump), self.chunkInfo.getPagesPerChunkHistory(), checkpoints, self.checkpointFile)) + Suitable for archival and statistical use, most mirror sites won't want or need this., self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, self._getChunkToDo(metahistorybz2dump
[MediaWiki-CVS] SVN: [97178] trunk/phase3/includes/Export.php
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97178 Revision: 97178 Author: ariel Date: 2011-09-15 17:18:13 + (Thu, 15 Sep 2011) Log Message: --- throw exception if rename of output file fails Modified Paths: -- trunk/phase3/includes/Export.php Modified: trunk/phase3/includes/Export.php === --- trunk/phase3/includes/Export.php2011-09-15 16:42:22 UTC (rev 97177) +++ trunk/phase3/includes/Export.php2011-09-15 17:18:13 UTC (rev 97178) @@ -759,8 +759,10 @@ } if ( $newname ) { fclose( $this-handle ); - rename( $this-filename, $newname ); - if ( $open ) { + if (! rename( $this-filename, $newname ) ) { + throw new MWException( __METHOD__ . : rename of file {$this-filename} to $newname failed\n ); + } + elseif ( $open ) { $this-handle = fopen( $this-filename, wt ); } } @@ -814,8 +816,10 @@ if ( $newname ) { fclose( $this-handle ); proc_close( $this-procOpenResource ); - rename( $this-filename, $newname ); - if ( $open ) { + if (! rename( $this-filename, $newname ) ) { + throw new MWException( __METHOD__ . : rename of file {$this-filename} to $newname failed\n ); + } + elseif ( $open ) { $command = $this-command; $command .= . wfEscapeShellArg( $this-filename ); $this-startCommand( $command ); @@ -873,8 +877,10 @@ if ( $newname ) { fclose( $this-handle ); proc_close( $this-procOpenResource ); - rename( $this-filename, $newname ); - if ( $open ) { + if (! rename( $this-filename, $newname ) ) { + throw new MWException( __METHOD__ . : rename of file {$this-filename} to $newname failed\n ); + } + elseif ( $open ) { $command = 7za a -bd -si . wfEscapeShellArg( $file ); // Suppress annoying useless crap from p7zip // Unfortunately this could suppress real error messages too ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [96826] branches/ariel/xmldumps-backup/worker.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/96826 Revision: 96826 Author: ariel Date: 2011-09-12 07:38:15 + (Mon, 12 Sep 2011) Log Message: --- remove some dead fixmes, add _chunkToDo to base Dump class, seriously cleanup linking to rss feed files and removal of old latest links, this were pretty broken after checkpoint files went in Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-09-12 02:10:11 UTC (rev 96825) +++ branches/ariel/xmldumps-backup/worker.py2011-09-12 07:38:15 UTC (rev 96826) @@ -1792,6 +1792,35 @@ dumpFile = DumpFilename(self.wiki, None, self.checksums.getChecksumFileNameBasename()) self.symLinks.saveSymlink(dumpFile) self.symLinks.cleanupSymLinks() + + for item in self.dumpItemList.dumpItems: + dumpNames = item.getDumpName() + if type(dumpNames).__name__!='list': + dumpNames = [ dumpNames ] + + if (item._chunksEnabled): + # if there is a specific chunk, we want to only clear out + # old files for that piece, because new files for the other + # pieces may not have been generated yet. + chunk = item._chunkToDo + else: + chunk = None + + checkpoint = None + if (item._checkpointsEnabled): + if (item.checkpointFile): + # if there's a specific checkpoint file we are + # rerunning, we would only clear out old copies + # of that very file. meh. how likely is it that we + # have one? these files are time based and the start/end pageids + # are going to fluctuate. whatever + cf = DumpFilename(self.wiki) + cf.newFromFilename(item.checkpointFile) + checkpoint = cf.checkpoint + + for d in dumpNames: + self.symLinks.removeSymLinksFromOldRuns(self.wiki.date, d, chunk, checkpoint ) + self.feeds.cleanupFeeds() def makeDir(self, dir): @@ -1826,10 +1855,15 @@ link = os.path.join(self.dumpDir.latestDir(), latestFilename) if exists(link) or os.path.islink(link): if os.path.islink(link): - realfile = os.readlink(link) + oldrealfile = os.readlink(link) # format of these links should be... ../20110228/elwikidb-20110228-templatelinks.sql.gz rellinkpattern = re.compile('^\.\./(20[0-9]+)/') - dateinterval = int(self.wiki.date) - int(dumpFile.date) + dateinlink = rellinkpattern.search(oldrealfile) + if (dateinlink): + dateoflinkedfile = dateinlink.group(1) + dateinterval = int(self.wiki.date) - int(dateoflinkedfile) + else: + dateinterval = 0 # no file or it's older than ours... *then* remove the link if not exists(os.path.realpath(link)) or dateinterval 0: self.debugfn(Removing old symlink %s % link) @@ -1854,6 +1888,34 @@ if not exists(os.path.join(latestDir,realfile)): os.remove(link) + # if the args are False or None, we remove all the old links for all values of the arg. + # example: if chunk is False or None then we remove all old values for all chunks + # old means older than the specified datestring. + def removeSymLinksFromOldRuns(self, dateString, dumpName=None, chunk=None, checkpoint=None): + # fixme this needs to do more work if there are chunks or checkpoint files linked in here from + # earlier dates. checkpoint ranges change, and configuration of chunks changes too, so maybe + # old files still exist and the links need to be removed because we have newer files
[MediaWiki-CVS] SVN: [96648] trunk/phase3
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/96648 Revision: 96648 Author: ariel Date: 2011-09-09 07:28:11 + (Fri, 09 Sep 2011) Log Message: --- getFilename renamed to getFilenames since it can return a list Modified Paths: -- trunk/phase3/includes/Export.php trunk/phase3/maintenance/dumpTextPass.php Modified: trunk/phase3/includes/Export.php === --- trunk/phase3/includes/Export.php2011-09-09 05:17:05 UTC (rev 96647) +++ trunk/phase3/includes/Export.php2011-09-09 07:28:11 UTC (rev 96648) @@ -724,7 +724,7 @@ * Returns the name of the file or files which are * being written to, if there are any. */ - function getFilename() { + function getFilenames() { return NULL; } } @@ -766,7 +766,7 @@ } } - function getFilename() { + function getFilenames() { return $this-filename; } } @@ -938,8 +938,8 @@ $this-sink-closeAndRename( $newname, $open ); } - function getFilename() { - return $this-sink-getFilename(); + function getFilenames() { + return $this-sink-getFilenames(); } /** @@ -1100,10 +1100,10 @@ } } - function getFilename() { + function getFilenames() { $filenames = array(); for ( $i = 0; $i $this-count; $i++ ) { - $filenames[] = $this-sinks[$i]-getFilename(); + $filenames[] = $this-sinks[$i]-getFilenames(); } return $filenames; } Modified: trunk/phase3/maintenance/dumpTextPass.php === --- trunk/phase3/maintenance/dumpTextPass.php 2011-09-09 05:17:05 UTC (rev 96647) +++ trunk/phase3/maintenance/dumpTextPass.php 2011-09-09 07:28:11 UTC (rev 96648) @@ -246,7 +246,7 @@ } if ( $this-checkpointFiles ) { - $filenameList = (array)$this-egress-getFilename(); + $filenameList = (array)$this-egress-getFilenames(); if ( count( $filenameList ) != count( $this-checkpointFiles ) ) { throw new MWException(One checkpointfile must be specified for each output option, if maxtime is used.\n); } @@ -282,7 +282,7 @@ $offset += strlen( $chunk ); } while ( $chunk !== false !feof( $input ) ); if ($this-maxTimeAllowed) { - $filenameList = (array)$this-egress-getFilename(); + $filenameList = (array)$this-egress-getFilenames(); // we wrote some stuff after last checkpoint that needs renamed if (file_exists($filenameList[0])) { $newFilenames = array(); @@ -571,7 +571,7 @@ $this-thisPage = ; // this could be more than one file if we had more than one output arg $checkpointFilenames = array(); - $filenameList = (array)$this-egress-getFilename(); + $filenameList = (array)$this-egress-getFilenames(); $newFilenames = array(); $firstPageID = str_pad($this-firstPageWritten,9,0,STR_PAD_LEFT); $lastPageID = str_pad($this-lastPageWritten,9,0,STR_PAD_LEFT); ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [96651] branches/ariel/xmldumps-backup/worker.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/96651 Revision: 96651 Author: ariel Date: 2011-09-09 09:23:33 + (Fri, 09 Sep 2011) Log Message: --- names of links in 'latest' directory should have 'latest' and not the date Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-09-09 09:03:54 UTC (rev 96650) +++ branches/ariel/xmldumps-backup/worker.py2011-09-09 09:23:33 UTC (rev 96651) @@ -1822,7 +1822,8 @@ if (self._enabled): self.makeDir(self.dumpDir.latestDir()) realfile = self.dumpDir.filenamePublicPath(dumpFile) - link = os.path.join(self.dumpDir.latestDir(), dumpFile.filename) + latestFilename = dumpFile.newFilename(dumpFile.dumpName, dumpFile.fileType, dumpFile.fileExt, 'latest', dumpFile.chunk, dumpFile.checkpoint, dumpFile.temp) + link = os.path.join(self.dumpDir.latestDir(), latestFilename) if exists(link) or os.path.islink(link): if os.path.islink(link): realfile = os.readlink(link) ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [96616] trunk/phase3/maintenance/dumpTextPass.php
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/96616 Revision: 96616 Author: ariel Date: 2011-09-08 21:06:15 + (Thu, 08 Sep 2011) Log Message: --- uniform comment style, fix a few space issues, address couple issues from comments on r95272 Modified Paths: -- trunk/phase3/maintenance/dumpTextPass.php Modified: trunk/phase3/maintenance/dumpTextPass.php === --- trunk/phase3/maintenance/dumpTextPass.php 2011-09-08 20:58:48 UTC (rev 96615) +++ trunk/phase3/maintenance/dumpTextPass.php 2011-09-08 21:06:15 UTC (rev 96616) @@ -56,9 +56,9 @@ var $xmlwriterobj = false; - # when we spend more than maxTimeAllowed seconds on this run, we continue - # processing until we write out the next complete page, then save output file(s), - # rename it/them and open new one(s) + // when we spend more than maxTimeAllowed seconds on this run, we continue + // processing until we write out the next complete page, then save output file(s), + // rename it/them and open new one(s) var $maxTimeAllowed = 0; // 0 = no limit var $timeExceeded = false; var $firstPageWritten = false; @@ -72,11 +72,11 @@ } function dump( $history, $text = WikiExporter::TEXT ) { - # This shouldn't happen if on console... ;) + // This shouldn't happen if on console... ;) header( 'Content-type: text/html; charset=UTF-8' ); - # Notice messages will foul up your XML output even if they're - # relatively harmless. + // Notice messages will foul up your XML output even if they're + // relatively harmless. if ( ini_get( 'display_errors' ) ) ini_set( 'display_errors', 'stderr' ); @@ -86,10 +86,10 @@ $this-egress = new ExportProgressFilter( $this-sink, $this ); - # it would be nice to do it in the constructor, oh well. need egress set + // it would be nice to do it in the constructor, oh well. need egress set $this-finalOptionCheck(); - # we only want this so we know how to close a stream :-P + // we only want this so we know how to close a stream :-P $this-xmlwriterobj = new XmlDumpWriter(); $input = fopen( $this-input, rt ); @@ -234,23 +234,20 @@ } function finalOptionCheck() { - if (($this-checkpointFiles ! $this-maxTimeAllowed) || - ($this-maxTimeAllowed !$this-checkpointFiles)) { + if ( ( $this-checkpointFiles ! $this-maxTimeAllowed ) || + ( $this-maxTimeAllowed !$this-checkpointFiles ) ) { throw new MWException(Options checkpointfile and maxtime must be specified together.\n); } foreach ($this-checkpointFiles as $checkpointFile) { - $count = substr_count ($checkpointFile,%s); - if (substr_count ($checkpointFile,%s) != 2) { + $count = substr_count ( $checkpointFile,%s ); + if ( $count != 2 ) { throw new MWException(Option checkpointfile must contain two '%s' for substitution of first and last pageids, count is $count instead, file is $checkpointFile.\n); } } - if ($this-checkpointFiles) { - $filenameList = $this-egress-getFilename(); - if (! is_array($filenameList)) { - $filenameList = array( $filenameList ); - } - if (count($filenameList) != count($this-checkpointFiles)) { + if ( $this-checkpointFiles ) { + $filenameList = (array)$this-egress-getFilename(); + if ( count( $filenameList ) != count( $this-checkpointFiles ) ) { throw new MWException(One checkpointfile must be specified for each output option, if maxtime is used.\n); } } @@ -285,19 +282,16 @@ $offset += strlen( $chunk ); } while ( $chunk !== false !feof( $input ) ); if ($this-maxTimeAllowed) { - $filenameList = $this-egress-getFilename(); - # we wrote some stuff after last checkpoint that needs renamed */ - if (! is_array($filenameList)) { - $filenameList = array( $filenameList ); - } + $filenameList = (array)$this-egress-getFilename(); + // we wrote some stuff after last checkpoint that needs renamed if (file_exists
[MediaWiki-CVS] SVN: [96486] trunk/phase3/includes/Export.php
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/96486 Revision: 96486 Author: ariel Date: 2011-09-07 20:21:52 + (Wed, 07 Sep 2011) Log Message: --- get rid of duplication, remove unused function rename(), add documentation as per comments on r95260 Modified Paths: -- trunk/phase3/includes/Export.php Modified: trunk/phase3/includes/Export.php === --- trunk/phase3/includes/Export.php2011-09-07 20:14:20 UTC (rev 96485) +++ trunk/phase3/includes/Export.php2011-09-07 20:21:52 UTC (rev 96486) @@ -709,17 +709,21 @@ return; } - // TODO: document - function closeAndRename( $newname ) { + /** +* Close the old file, and move it to a specified name. +* Use this for the last piece of a file written out +* at specified checkpoints (e.g. every n hours). +* @param $newname mixed File name. May be a string or an array with one element +* @param $open bool If true, a new file with the old filename will be opened again for writing (default: false) +*/ + function closeAndRename( $newname, $open = false ) { return; } - // TODO: document - function rename( $newname ) { - return; - } - - // TODO: document + /** +* Returns the name of the file or files which are +* being written to, if there are any. +*/ function getFilename() { return NULL; } @@ -742,21 +746,10 @@ } function closeRenameAndReopen( $newname ) { - if ( is_array( $newname ) ) { - if ( count( $newname ) 1 ) { - throw new MWException( __METHOD__ . : passed multiple arguments for rename of single file\n ); - } else { - $newname = $newname[0]; - } - } - if ( $newname ) { - fclose( $this-handle ); - rename( $this-filename, $newname ); - $this-handle = fopen( $this-filename, wt ); - } + $this-closeAndRename( $newname, true ); } - function closeAndRename( $newname ) { + function closeAndRename( $newname, $open = false ) { if ( is_array( $newname ) ) { if ( count( $newname ) 1 ) { throw new MWException( __METHOD__ . : passed multiple arguments for rename of single file\n ); @@ -767,20 +760,10 @@ if ( $newname ) { fclose( $this-handle ); rename( $this-filename, $newname ); - } - } - - function rename( $newname ) { - if ( is_array( $newname ) ) { - if ( count( $newname ) 1 ) { - throw new MWException( __METHOD__ . : passed multiple arguments for rename of single file\n ); - } else { - $newname = $newname[0]; + if ( $open ) { + $this-handle = fopen( $this-filename, wt ); } } - if ( $newname ) { - rename( $this-filename, $newname ); - } } function getFilename() { @@ -816,29 +799,11 @@ $this-handle = $pipes[0]; } - /** -* Close the old file, move it to a specified name, -* and reopen new file with the old name. -*/ function closeRenameAndReopen( $newname ) { - if ( is_array( $newname ) ) { - if ( count( $newname ) 1 ) { - throw new MWException( __METHOD__ . : passed multiple arguments for rename of single file\n ); - } else { - $newname = $newname[0]; - } - } - if ( $newname ) { - fclose( $this-handle ); - proc_close( $this-procOpenResource ); - rename( $this-filename, $newname ); - $command = $this-command; - $command .= . wfEscapeShellArg( $this-filename ); - $this-startCommand( $command ); - } + $this-closeAndRename( $newname, true ); } - function closeAndRename( $newname ) { + function closeAndRename( $newname, $open = false ) { if ( is_array( $newname ) ) { if ( count( $newname ) 1 ) { throw new MWException( __METHOD__ . : passed multiple arguments for rename of single file\n ); @@ -847,25 +812,17
[MediaWiki-CVS] SVN: [96314] branches/ariel/xmldumps-backup/create-rsync-list.sh
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/96314 Revision: 96314 Author: ariel Date: 2011-09-06 08:04:24 + (Tue, 06 Sep 2011) Log Message: --- script which creates a list of the n most recent XML successful dumps per project for mirroring Added Paths: --- branches/ariel/xmldumps-backup/create-rsync-list.sh Added: branches/ariel/xmldumps-backup/create-rsync-list.sh === --- branches/ariel/xmldumps-backup/create-rsync-list.sh (rev 0) +++ branches/ariel/xmldumps-backup/create-rsync-list.sh 2011-09-06 08:04:24 UTC (rev 96314) @@ -0,0 +1,189 @@ +#!/bin/bash + +# This script generates a list of the last n sets of XML dump files +# per project that were successful, adding failed dumps to the list if there +# are not n successful dumps available. + +# Options: +# dumpsnumber -- number of dumps to list +# outputfile -- path to file in which to write the list +# configfile -- path to config file used to generate dumps + +usage() { +echo Usage: $0 --dumpsnumber n --outputfile filename --configfile filename --rsyncprefix path +echo +echo dumpsnumber number of dumps to list +echo outputfilename of file to which we will write iw action list +echo configfilename of configuration file for dump generation +echo (default value: wikidump.conf) +echo rsyncprefix path to substitute in place of the public path supplied +echo in the configuration file, if needed +echo +echo For example: +echo$0 --dumpsnumber 5 --outputfile /data/dumps/public/dumpsfiles_for_rsync.txt --configfile wikidump.conf.testing +exit 1 +} + +check_args() { +if [ -z $dumpsnumber ]; then + echo $0: dumpsnumber must be an integer greater than 0 + usage +fi +if ! [[ $dumpsnumber =~ ^[0-9]+$ ]] ; then + echo $0: dumpsnumber must be an integer greater than 0 + usage +fi +if [ $dumpsnumber -lt 1 ]; then + echo $0: dumpsnumber must be an integer greater than 0 + usage +fi +if [ -z $outputfile ]; then + echo No value was given for outfile option. + usage +fi +if [ -z $configfile ]; then + echo No value was given for configfile option. + usage +fi +if [ ! -f $configfile ]; then + echo $0: can't open configuration file $configfile, exiting... + exit 1 +fi +} + + +listdumpsforproject() { +# cannot rely on timestamp. sometimes we have rerun a phase in +# some earlier dump and have it completed later than a later dump, +# or we may have two en pedia runs going at once in different +# phases. +dirs=`ls -dr $publicdir/$p/20* 2/dev/null` + +for day in $dirs; do + # tools, mw, static... + if [ -d $day ]; then + complete=`grep Dump complete $day/status.html 2/dev/null | grep -v failed 2/dev/null` + if [ ! -z $complete ]; then + complete_dumps=(${complete_dumps[@]} $day) + fi + failed=`grep Dump complete $day/status.html 2/dev/null | grep failed 2/dev/null` + if [ ! -z $failed ]; then + failed_dumps=(${failed_dumps[@]} $day) + fi + fi +done +} + +list_files_in_dir() { +if [ ! -f $outputfile.tmp ]; then + touch $outputfile.tmp +fi +if [ $rsyncprefix == false ]; then + ls $d/*.gz 2/dev/null $outputfile.tmp + ls $d/*.bz2 2/dev/null $outputfile.tmp + ls $d/*.7z 2/dev/null $outputfile.tmp + ls $d/*.html 2/dev/null $outputfile.tmp + ls $d/*.txt 2/dev/null $outputfile.tmp +else + ls $d/*.gz 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp + ls $d/*.bz2 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp + ls $d/*.7z 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp + ls $d/*.html 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp + ls $d/*.txt 2/dev/null | sed -e s|^$publicdir|$rsyncprefix| $outputfile.tmp +fi +} + +get_list_of_files() { +projectdirs=`ls -d $publicdir/$p/20* 2/dev/null` +declare -a complete_dumps +declare -a failed_dumps +listdumpsforproject +if [ ${#complete_dumps[@]} -ge $dumpsnumber ]; then + dumps_to_copy=${complete_dumps[@]:0:$dumpsnumber} + for d in $dumps_to_copy; do + list_files_in_dir + done +else + for d in ${complete_dumps[@]}; do + list_files_in_dir + done + left_to_get=$(( $dumpsnumber - ${#complete_dumps[@]} )) + if [ ${#failed_dumps[@]} -ge $left_to_get ]; then + dumps_to_copy=${failed_dumps[@]:0:$left_to_get} + for d in $dumps_to_copy; do + list_files_in_dir + done + else + for d in ${failed_dumps[@]}; do
[MediaWiki-CVS] SVN: [96353] branches/ariel/xmldumps-backup/worker.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/96353 Revision: 96353 Author: ariel Date: 2011-09-06 17:53:42 + (Tue, 06 Sep 2011) Log Message: --- missing parens in usage message; add 'latestlinks' option to just redo links and rss feed files in 'latest' directory; don't cleanup/delete for noop Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-09-06 17:12:09 UTC (rev 96352) +++ branches/ariel/xmldumps-backup/worker.py2011-09-06 17:53:42 UTC (rev 96353) @@ -505,6 +505,7 @@ if (self._singleJob[-5:] == 'table' or self._singleJob[-9:] == 'recombine' or self._singleJob == 'noop' or + self._singleJob == 'latestlinks' or self._singleJob == 'xmlpagelogsdump' or self._singleJob == 'pagetitlesdump' or self._singleJob.endswith('recombine')): @@ -514,6 +515,7 @@ if (self._singleJob[-5:] == 'table' or self._singleJob[-9:] == 'recombine' or self._singleJob == 'noop' or + self._singleJob == 'latestlinks' or self._singleJob == 'xmlpagelogsdump' or self._singleJob == 'pagetitlesdump' or self._singleJob == 'abstractsdump' or @@ -651,11 +653,12 @@ if (item.name() == job): item.setToBeRun(True) return True - if job == noop: + if job == noop or job == latestlinks: return True print No job of the name specified exists. Choose one of the following: - print noop (runs no job but rewrites md5sums file and resets latest links - print tables (includes all items below that end in 'table' + print noop (runs no job but rewrites md5sums file and resets latest links) + print latestlinks (runs no job but resets latest links) + print tables (includes all items below that end in 'table') for item in self.dumpItems: print %s % item.name() return False @@ -1518,6 +1521,22 @@ self._cleanupOldFilesEnabled = False self.jobRequested = job + + if self.jobRequested == latestlinks: + self._statusEnabled = False + self._checksummerEnabled = False + self._runInfoFileEnabled = False + self._noticeFileEnabled = False + self._makeDirEnabled = False + self._cleanOldDumpsEnabled = False + self._cleanupOldFilesEnabled = False + self._checkForTruncatedFilesEnabled = False + + if self.jobRequested == noop: + self._cleanOldDumpsEnabled = False + self._cleanupOldFilesEnabled = False + self._checkForTruncatedFilesEnabled = False + self.dbServerInfo = DbServerInfo(self.wiki, self.dbName, self.logAndPrint) self.dumpDir = DumpDir(self.wiki, self.dbName) @@ -1877,7 +1896,7 @@ for f in files: if f.endswith(-rss.xml): filename = f[:-8]; - link = os.path.join(latestDir,filename) + link = os.path.join(latestDir,f) if not exists(link): os.remove(os.path.join(latestDir,f)) ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [96272] branches/ariel/xmldumps-backup/worker.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/96272 Revision: 96272 Author: ariel Date: 2011-09-05 08:24:53 + (Mon, 05 Sep 2011) Log Message: --- check actual location of file being linked to before we claim it doesn't exist, rather than some buggy one Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-09-05 06:56:08 UTC (rev 96271) +++ branches/ariel/xmldumps-backup/worker.py2011-09-05 08:24:53 UTC (rev 96272) @@ -1831,7 +1831,7 @@ link = os.path.join(latestDir,f) if os.path.islink(link): realfile = os.readlink(link) - if not exists(realfile): + if not exists(os.path.join(latestDir,realfile)): os.remove(link) class Feeds(object): ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [95720] trunk/phase3/maintenance/dumpTextPass.php
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95720 Revision: 95720 Author: ariel Date: 2011-08-29 22:48:18 + (Mon, 29 Aug 2011) Log Message: --- check the checkpoint related options only if we specified checkpoints, duh Modified Paths: -- trunk/phase3/maintenance/dumpTextPass.php Modified: trunk/phase3/maintenance/dumpTextPass.php === --- trunk/phase3/maintenance/dumpTextPass.php 2011-08-29 22:42:18 UTC (rev 95719) +++ trunk/phase3/maintenance/dumpTextPass.php 2011-08-29 22:48:18 UTC (rev 95720) @@ -245,13 +245,15 @@ } } - $filenameList = $this-egress-getFilename(); - if (! is_array($filenameList)) { - $filenameList = array( $filenameList ); + if ($this-checkpointFiles) { + $filenameList = $this-egress-getFilename(); + if (! is_array($filenameList)) { + $filenameList = array( $filenameList ); + } + if (count($filenameList) != count($this-checkpointFiles)) { + wfDie(One checkpointfile must be specified for each output option, if maxtime is used.\n); + } } - if (count($filenameList) != count($this-checkpointFiles)) { - wfDie(One checkpointfile must be specified for each output option, if maxtime is used.\n); - } } function readDump( $input ) { ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [95732] branches/ariel/xmldumps-backup
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95732 Revision: 95732 Author: ariel Date: 2011-08-30 00:20:10 + (Tue, 30 Aug 2011) Log Message: --- first take at rerunning a checkpoint file Modified Paths: -- branches/ariel/xmldumps-backup/WikiDump.py branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/WikiDump.py === --- branches/ariel/xmldumps-backup/WikiDump.py 2011-08-30 00:10:32 UTC (rev 95731) +++ branches/ariel/xmldumps-backup/WikiDump.py 2011-08-30 00:20:10 UTC (rev 95732) @@ -191,6 +191,7 @@ cat: /bin/cat, grep: /bin/grep, checkforbz2footer: /usr/local/bin/checkforbz2footer, + writeuptopageid: /usr/local/bin/writeuptopageid, #cleanup: { keep: 3, #chunks: { @@ -271,6 +272,7 @@ self.cat = self.conf.get(tools, cat) self.grep = self.conf.get(tools, grep) self.checkforbz2footer = self.conf.get(tools,checkforbz2footer) + self.writeuptopageid = self.conf.get(tools,writeuptopageid) if not self.conf.has_section('cleanup'): self.conf.add_section('cleanup') Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-08-30 00:10:32 UTC (rev 95731) +++ branches/ariel/xmldumps-backup/worker.py2011-08-30 00:20:10 UTC (rev 95732) @@ -485,12 +485,13 @@ self._toBeRun = toBeRun class DumpItemList(object): - def __init__(self, wiki, prefetch, spawn, chunkToDo, singleJob, chunkInfo, runInfoFile, dumpDir): + def __init__(self, wiki, prefetch, spawn, chunkToDo, checkpointFile, singleJob, chunkInfo, runInfoFile, dumpDir): self.wiki = wiki self._hasFlaggedRevs = self.wiki.hasFlaggedRevs() self._prefetch = prefetch self._spawn = spawn self.chunkInfo = chunkInfo + self.checkpointFile = checkpointFile self._chunkToDo = chunkToDo self._singleJob = singleJob self._runInfoFile = runInfoFile @@ -505,9 +506,21 @@ self._singleJob[-9:] == 'recombine' or self._singleJob == 'noop' or self._singleJob == 'xmlpagelogsdump' or - self._singleJob == 'pagetitlesdump'): + self._singleJob == 'pagetitlesdump' or + self._singleJob.endswith('recombine')): raise BackupError(You cannot specify a chunk with the job %s, exiting.\n % self._singleJob) + if (self._singleJob and self.checkpointFile): + if (self._singleJob[-5:] == 'table' or + self._singleJob[-9:] == 'recombine' or + self._singleJob == 'noop' or + self._singleJob == 'xmlpagelogsdump' or + self._singleJob == 'pagetitlesdump' or + self._singleJob == 'abstractsdump' or + self._singleJob == 'xmlstubsdump' or + self._singleJob.endswith('recombine')): + raise BackupError(You cannot specify a checkpoint file with the job %s, exiting.\n % self._singleJob) + self.dumpItems = [PrivateTable(user, usertable, User account data.), PrivateTable(watchlist, watchlisttable, Users' watchlist settings.), PrivateTable(ipblocks, ipblockstable, Data for blocks of IP addresses, ranges, and users.), @@ -555,7 +568,7 @@ XmlDump(articles, articlesdump, bigbArticles, templates, image descriptions, and primary meta-pages./b/big, - This contains current versions of article content, and is the archive most mirror sites will probably want., self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, self._getChunkToDo(articlesdump), self.chunkInfo.getPagesPerChunkHistory(), checkpoints)) + This contains current versions of article content, and is the archive most mirror sites will probably want., self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, self._getChunkToDo(articlesdump), self.chunkInfo.getPagesPerChunkHistory(), checkpoints, self.checkpointFile)) if (self.chunkInfo.chunksEnabled()): self.dumpItems.append(RecombineXmlDump(articlesdumprecombine, bigbRecombine articles
[MediaWiki-CVS] SVN: [95633] branches/wmf/1.17wmf1/includes/upload/UploadFromStash.php
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95633 Revision: 95633 Author: ariel Date: 2011-08-28 18:00:02 + (Sun, 28 Aug 2011) Log Message: --- fix fatal Call to a member function getId() on a non-object Modified Paths: -- branches/wmf/1.17wmf1/includes/upload/UploadFromStash.php Modified: branches/wmf/1.17wmf1/includes/upload/UploadFromStash.php === --- branches/wmf/1.17wmf1/includes/upload/UploadFromStash.php 2011-08-28 17:57:11 UTC (rev 95632) +++ branches/wmf/1.17wmf1/includes/upload/UploadFromStash.php 2011-08-28 18:00:02 UTC (rev 95633) @@ -29,7 +29,12 @@ if( $stash ) { $this-stash = $stash; } else { - wfDebug( __METHOD__ . creating new UploadStash instance for . $user-getId() . \n ); + if ($user) { + wfDebug( __METHOD__ . creating new UploadStash instance for . $user-getId() . \n ); + } + else { + wfDebug( __METHOD__ . creating new UploadStash instance, no user\n); + } $this-stash = new UploadStash( $this-repo, $this-user ); } ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [95634] trunk/phase3/maintenance
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95634 Revision: 95634 Author: ariel Date: 2011-08-28 19:06:52 + (Sun, 28 Aug 2011) Log Message: --- move some member vars to parent class since they are needed there now, set lastTime and other vars so checkpoints without prefetch work, update progress reporting in parent class to give rates during interval and from start of run Modified Paths: -- trunk/phase3/maintenance/backup.inc trunk/phase3/maintenance/dumpTextPass.php Modified: trunk/phase3/maintenance/backup.inc === --- trunk/phase3/maintenance/backup.inc 2011-08-28 18:00:02 UTC (rev 95633) +++ trunk/phase3/maintenance/backup.inc 2011-08-28 19:06:52 UTC (rev 95634) @@ -51,6 +51,10 @@ var $stubText = false; // include rev_text_id instead of text; for 2-pass dump var $dumpUploads = false; var $dumpUploadFileContents = false; + var $lastTime = 0; + var $pageCountLast = 0; + var $revCountLast = 0; + var $ID = 0; function BackupDumper( $args ) { $this-stderr = fopen( php://stderr, wt ); @@ -233,6 +237,8 @@ $dbr = wfGetDB( DB_SLAVE ); $this-maxCount = $dbr-selectField( $table, MAX($field), '', __METHOD__ ); $this-startTime = wfTime(); + $this-lastTime = $this-startTime; + $this-ID = getmypid(); } /** @@ -281,21 +287,35 @@ function showReport() { if ( $this-reporting ) { - $delta = wfTime() - $this-startTime; $now = wfTimestamp( TS_DB ); - if ( $delta ) { - $rate = $this-pageCount / $delta; - $revrate = $this-revCount / $delta; + $nowts = wfTime(); + $deltaAll = wfTime() - $this-startTime; + $deltaPart = wfTime() - $this-lastTime; + $this-pageCountPart = $this-pageCount - $this-pageCountLast; + $this-revCountPart = $this-revCount - $this-revCountLast; + + if ( $deltaAll ) { $portion = $this-revCount / $this-maxCount; - $eta = $this-startTime + $delta / $portion; + $eta = $this-startTime + $deltaAll / $portion; $etats = wfTimestamp( TS_DB, intval( $eta ) ); + $pageRate = $this-pageCount / $deltaAll; + $revRate = $this-revCount / $deltaAll; } else { - $rate = '-'; - $revrate = '-'; + $pageRate = '-'; + $revRate = '-'; $etats = '-'; } - $this-progress( sprintf( %s: %s %d pages (%0.3f/sec), %d revs (%0.3f/sec), ETA %s [max %d], - $now, wfWikiID(), $this-pageCount, $rate, $this-revCount, $revrate, $etats, $this-maxCount ) ); + if ( $deltaPart ) { + $pageRatePart = $this-pageCountPart / $deltaPart; + $revRatePart = $this-revCountPart / $deltaPart; + } else { + $pageRatePart = '-'; + $revRatePart = '-'; + } + $this-progress( sprintf( %s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), %d revs (%0.1f|%0.1f/sec all|curr), ETA %s [max %d], + $now, wfWikiID(), $this-ID, $this-pageCount, $pageRate, $pageRatePart, $this-revCount, $revRate, $revRatePart, $etats, $this-maxCount ) ); + $this-lastTime = $nowts; + $this-revCountLast = $this-revCount; } } Modified: trunk/phase3/maintenance/dumpTextPass.php === --- trunk/phase3/maintenance/dumpTextPass.php 2011-08-28 18:00:02 UTC (rev 95633) +++ trunk/phase3/maintenance/dumpTextPass.php 2011-08-28 19:06:52 UTC (rev 95634) @@ -38,9 +38,6 @@ var $history = WikiExporter::FULL; var $fetchCount = 0; var $prefetchCount = 0; - var $lastTime = 0; - var $pageCountLast = 0; - var $revCountLast = 0; var $prefetchCountLast = 0; var $fetchCountLast = 0; @@ -57,8 +54,6 @@ var $spawnRead = false; var $spawnErr = false; - var $ID = 0; - var $xmlwriterobj = false; # when we spend more than maxTimeAllowed seconds on this run, we continue @@ -73,8 +68,6 @@ function initProgress( $history
[MediaWiki-CVS] SVN: [95636] branches/ariel/xmldumps-backup/README.config
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95636 Revision: 95636 Author: ariel Date: 2011-08-28 20:40:51 + (Sun, 28 Aug 2011) Log Message: --- descriptions of checkpointTime, temp, checkforbz2footer options Modified Paths: -- branches/ariel/xmldumps-backup/README.config Modified: branches/ariel/xmldumps-backup/README.config === --- branches/ariel/xmldumps-backup/README.config2011-08-28 19:58:59 UTC (rev 95635) +++ branches/ariel/xmldumps-backup/README.config2011-08-28 20:40:51 UTC (rev 95636) @@ -87,10 +87,13 @@ (wikiproject) being dumped, in subdirectories by date Default value: /dumps/public private -- full path to directory under which all dumps of private wikis - and all private tables will be created, in subdirs by project - name and underneath that in subdirs by date, similar to the - public dumps + and all private tables will be created, in subdirs by project + name and underneath that in subdirs by date, similar to the + public dumps Default value: /dumps/private +temp -- full path to directory under which temporary files will be created; + this should not be the same as the public or private directory. + Default value: /dumps/temp index -- name of the top-level index file for all projects that is automatically created by the monitoring process Default value: index.html @@ -103,7 +106,7 @@ are found Default value: home perdumpindex -- name of the index file created for a dump for a given project - on a given date + on a given date Default value: index.html The above options do not have to be specified in the config file, @@ -158,6 +161,11 @@ Default value: /bin/cat grep -- Location of the grep binary Default value:/bin/grep +checkforbz2footer -- Location of the checkforbz2footer binary + This is part of the mwbzutils package. If + the package is not installed, this value will + be ignored. + Default value: /usr/local/bin/checkforbz2footer The above options do not have to be specified in the config file, since default values are provided. @@ -197,6 +205,18 @@ pagesPerChunkAbstract -- as pagesPerChunkHistory but for the abstract generation phase Default value: False +checkpointTime -- save checkpoints of files containing revision text + (articles, metacurrent, metahistory dumps) + every checkpointTime minutes. This involves closing + the current output file, renaming it to a filename + which includes in the filename the first and last page + written, and opening a new file for the next portion + of the XML output. This can be useful if you want + to produce a large number of smaller files as input + to XML-crunching scripts, or if you are dumping + a very large wiki which has a tendency to fail + somewhere in the middle (*cough*en wikipedia*cough*). + Default value: 0 (no checkpoints produced) The above options do not have to be specified in the config file, since default values are provided. ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [95639] branches/ariel/xmldumps-backup/worker.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95639 Revision: 95639 Author: ariel Date: 2011-08-28 22:15:58 + (Sun, 28 Aug 2011) Log Message: --- lose empty options from dumpTextPass runs Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-08-28 22:04:49 UTC (rev 95638) +++ branches/ariel/xmldumps-backup/worker.py2011-08-28 22:15:58 UTC (rev 95639) @@ -2750,12 +2750,12 @@ prefetch = --prefetch=%s % (source) else: runner.showRunnerState(... building %s %s XML dump, no text prefetch... % (self._subset, chunkinfo)) - prefetch = None + prefetch = if self._spawn: spawn = --spawn=%s % (runner.wiki.config.php) else: - spawn = None + spawn = if (not exists( runner.wiki.config.php ) ): raise BackupError(php command %s not found % runner.wiki.config.php) @@ -2764,8 +2764,8 @@ checkpointTime = --maxtime=%s % (runner.wiki.config.checkpointTime) checkpointFile = --checkpointfile=%s % outputFile.newFilename(outputFile.dumpName, outputFile.fileType, outputFile.fileExt, outputFile.date, outputFile.chunk, p%sp%s, None) else: - checkpointTime = None - checkpointFile = None + checkpointTime = + checkpointFile = dumpCommand = [ %s % runner.wiki.config.php, -q, %s/maintenance/dumpTextPass.php % runner.wiki.config.wikiDir, --wiki=%s % runner.dbName, @@ -2777,6 +2777,7 @@ --report=1000, %s % spawn ] + dumpCommand = filter(None, dumpCommand) command = dumpCommand filters = self.buildFilters(runner, outputFile) eta = self.buildEta(runner) ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [95601] trunk/phase3/includes/Export.php
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95601 Revision: 95601 Author: ariel Date: 2011-08-27 15:50:17 + (Sat, 27 Aug 2011) Log Message: --- replace WfDie() Modified Paths: -- trunk/phase3/includes/Export.php Modified: trunk/phase3/includes/Export.php === --- trunk/phase3/includes/Export.php2011-08-27 15:00:08 UTC (rev 95600) +++ trunk/phase3/includes/Export.php2011-08-27 15:50:17 UTC (rev 95601) @@ -739,7 +739,7 @@ function closeRenameAndReopen( $newname ) { if ( is_array($newname) ) { if (count($newname) 1) { - WfDie(Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n); + throw new MWException(Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n); } else { $newname = $newname[0]; @@ -755,7 +755,7 @@ function rename( $newname ) { if ( is_array($newname) ) { if (count($newname) 1) { - WfDie(Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n); + throw new MWException(Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n); } else { $newname = $newname[0]; @@ -796,7 +796,7 @@ function closeRenameAndReopen( $newname ) { if ( is_array($newname) ) { if (count($newname) 1) { - WfDie(Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n); + throw new MWException(Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n); } else { $newname = $newname[0]; @@ -814,7 +814,7 @@ function rename( $newname ) { if ( is_array($newname) ) { if (count($newname) 1) { - WfDie(Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n); + throw new MWException(Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n); } else { $newname = $newname[0]; @@ -865,7 +865,7 @@ function closeRenameAndReopen( $newname ) { if ( is_array($newname) ) { if (count($newname) 1) { - WfDie(Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n); + throw new MWException(Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n); } else { $newname = $newname[0]; @@ -883,7 +883,7 @@ function rename( $newname ) { if ( is_array($newname) ) { if (count($newname) 1) { - WfDie(Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n); + throw new MWException(Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n); } else { $newname = $newname[0]; ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [95604] trunk/phase3
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95604 Revision: 95604 Author: ariel Date: 2011-08-27 18:31:03 + (Sat, 27 Aug 2011) Log Message: --- define and use closeAndRename() after last write of xml dump file; convert from popen (child inherits all open descriptors and there is no workaround) to proc_open (CLOEXEC set on all descriptors), needed so close and rename doesn't hang forever if a child (prefetcher) is forked Modified Paths: -- trunk/phase3/includes/Export.php trunk/phase3/maintenance/dumpTextPass.php Modified: trunk/phase3/includes/Export.php === --- trunk/phase3/includes/Export.php2011-08-27 18:28:04 UTC (rev 95603) +++ trunk/phase3/includes/Export.php2011-08-27 18:31:03 UTC (rev 95604) @@ -704,6 +704,10 @@ return; } + function closeAndRename( $newname ) { + return; + } + function rename( $newname ) { return; } @@ -752,6 +756,21 @@ } } + function closeAndRename( $newname ) { + if ( is_array($newname) ) { + if (count($newname) 1) { + throw new MWException(Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n); + } + else { + $newname = $newname[0]; + } + } + if ( $newname ) { + fclose( $this-handle ); + rename( $this-filename, $newname ); + } + } + function rename( $newname ) { if ( is_array($newname) ) { if (count($newname) 1) { @@ -784,11 +803,21 @@ if ( !is_null( $file ) ) { $command .= . wfEscapeShellArg( $file ); } - $this-handle = popen( $command, w ); + + $this-startCommand($command); $this-command = $command; $this-filename = $file; } + function startCommand($command) { + $spec = array( + 0 = array( pipe, r ), + ); + $pipes = array(); + $this-procOpenResource = proc_open( $command, $spec, $pipes ); + $this-handle = $pipes[0]; + } + /** * Close the old file, move it to a specified name, * and reopen new file with the old name. @@ -803,14 +832,32 @@ } } if ( $newname ) { - pclose( $this-handle ); + fclose( $this-handle ); + proc_close($this-procOpenResource); rename( $this-filename, $newname ); $command = $this-command; $command .= . wfEscapeShellArg( $this-filename ); - $this-handle = popen( $command, w ); + $this-startCommand($command); } } + function closeAndRename( $newname ) { + if ( is_array($newname) ) { + if (count($newname) 1) { + throw new MWException(Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n); + } + else { + $newname = $newname[0]; + } + } + if ( $newname ) { +# pclose( $this-handle ); + fclose( $this-handle ); + proc_close($this-procOpenResource); + rename( $this-filename, $newname ); + } + } + function rename( $newname ) { if ( is_array($newname) ) { if (count($newname) 1) { @@ -872,14 +919,31 @@ } } if ( $newname ) { - pclose( $this-handle ); + fclose( $this-handle ); + proc_close($this-procOpenResource); rename( $this-filename, $newname ); $command = 7za a -bd -si . wfEscapeShellArg( $file ); $command .= ' ' . wfGetNull() . ' 21'; - $this-handle = popen( $command, w ); + $this-startCommand($command); } } + function closeAndRename( $newname ) { + if ( is_array($newname) ) { + if (count($newname) 1) { + throw new MWException(Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n
[MediaWiki-CVS] SVN: [95404] branches/ariel/xmldumps-backup/worker.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95404 Revision: 95404 Author: ariel Date: 2011-08-24 17:26:34 + (Wed, 24 Aug 2011) Log Message: --- actually check user's y/n in a fashion that works; write dumpruninfo file in place, no need for temp file first Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-08-24 16:21:00 UTC (rev 95403) +++ branches/ariel/xmldumps-backup/worker.py2011-08-24 17:26:34 UTC (rev 95404) @@ -395,7 +395,7 @@ directory = self._getDumpRunInfoDirName() dumpRunInfoFilename = self._getDumpRunInfoFileName() # FileUtils.writeFile(directory, dumpRunInfoFilename, text, self.wiki.config.fileperms) - FileUtils.writeFile(self.wiki.config.tempDir, dumpRunInfoFilename, text, self.wiki.config.fileperms) + FileUtils.writeFileInPlace(self.wiki.config.tempDir, dumpRunInfoFilename, text, self.wiki.config.fileperms) # format: name:%; updated:%; status:% def _getStatusForJobFromRunInfoFileLine(self, line, jobName): @@ -1646,7 +1646,7 @@ print This means that the status information about the old run will be lost, and print only the information about the current (and future) runs will be kept. reply = raw_input(Continue anyways? [y/N]: ) - if (not reply in y, Y): + if (not reply in [ y, Y ]): raise RuntimeError( No run information available for previous dump, exiting ) if (not self.wiki.existsPerDumpIndex()): # AFAWK this is a new run (not updating or rerunning an old run), ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [95406] branches/ariel/xmldumps-backup/worker.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95406 Revision: 95406 Author: ariel Date: 2011-08-24 17:59:55 + (Wed, 24 Aug 2011) Log Message: --- fix dump run info file writing (again), clean up dup code in newFromFilename Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-08-24 17:56:49 UTC (rev 95405) +++ branches/ariel/xmldumps-backup/worker.py2011-08-24 17:59:55 UTC (rev 95406) @@ -395,7 +395,7 @@ directory = self._getDumpRunInfoDirName() dumpRunInfoFilename = self._getDumpRunInfoFileName() # FileUtils.writeFile(directory, dumpRunInfoFilename, text, self.wiki.config.fileperms) - FileUtils.writeFileInPlace(self.wiki.config.tempDir, dumpRunInfoFilename, text, self.wiki.config.fileperms) + FileUtils.writeFileInPlace(dumpRunInfoFilename, text, self.wiki.config.fileperms) # format: name:%; updated:%; status:% def _getStatusForJobFromRunInfoFileLine(self, line, jobName): @@ -973,6 +973,29 @@ Constructor. Arguments: the full file name including the chunk, the extension, etc BUT NOT the dir name. self.filename = filename + self.dbName = None + self.date = None + self.dumpName = None + + self.basename = None + self.fileExt = None + self.fileType = None + + self.filePrefix = + self.filePrefixLength = 0 + + self.isChunkFile = False + self.chunk = None + self.chunkInt = 0 + + self.isCheckpointFile = False + self.checkpoint = None + self.firstPageID = None + self.lastPageID = None + + self.isTempFile = False + self.temp = None + # example filenames: # elwikidb-20110729-all-titles-in-ns0.gz # elwikidb-20110729-abstract.xml @@ -983,32 +1006,14 @@ if self.filename.endswith(-tmp): self.isTempFile = True self.temp = -tmp - else: - self.isTempFile = False - self.temp = None if ('.' in self.filename): (fileBase, self.fileExt) = self.filename.rsplit('.',1) if (self.temp): self.fileExt = self.fileExt[:-4]; else: - self.dbName = None - self.date = None - self.dumpName = None - self.filePrefix = - self.filePrefixLength = 0 - self.isChunkFile = False - self.isCheckpointFile = False - self.checkpoint = None - self.firstPageID = None - self.lastPageID = None - self.isTempFile = False - self.fileExt = None - self.fileType = None return False - # FIXME could have -tmp at the end, when do we look for that?? - if not self.isExt(self.fileExt): self.fileType = self.fileExt # self.fileExt = None @@ -1019,35 +1024,17 @@ # some files are not of this form, we skip them if not '-' in fileBase: - self.dbName = None - self.date = None - self.dumpName = None - self.filePrefix = - self.filePrefixLength = 0 - self.isChunkFile = False - self.isCheckpointFile = False - self.checkpoint = None - self.firstPageID = None - self.lastPageID = None - self.isTempFile = False - self.temp = None return False (self.dbName, self.date, self.dumpName) = fileBase.split('-',2) if not self.date or not self.dumpName: - self.dbName = None - self.date = None self.dumpName = fileBase - self.filePrefix = - self.filePrefixLength = 0 else: self.filePrefix = %s-%s- % (self.dbName, self.date) self.filePrefixLength = len(self.filePrefix) if self.filename.startswith(self.filePrefix): self.basename = self.filename[self.filePrefixLength:] - else
[MediaWiki-CVS] SVN: [95443] trunk/phase3/maintenance/dumpTextPass.php
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95443 Revision: 95443 Author: ariel Date: 2011-08-24 20:43:09 + (Wed, 24 Aug 2011) Log Message: --- remove extraneous hyphen that crept in, grr Modified Paths: -- trunk/phase3/maintenance/dumpTextPass.php Modified: trunk/phase3/maintenance/dumpTextPass.php === --- trunk/phase3/maintenance/dumpTextPass.php 2011-08-24 20:32:00 UTC (rev 95442) +++ trunk/phase3/maintenance/dumpTextPass.php 2011-08-24 20:43:09 UTC (rev 95443) @@ -220,7 +220,7 @@ $pageRatePart = '-'; $revRatePart = '-'; } - $this-progress( sprintf( %s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), %d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% prefetched (all|curr), ETA %s [max %d],- + $this-progress( sprintf( %s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), %d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% prefetched (all|curr), ETA %s [max %d], $now, wfWikiID(), $this-ID, $this-pageCount, $pageRate, $pageRatePart, $this-revCount, $revRate, $revRatePart, $fetchRate, $fetchRatePart, $etats, $this-maxCount ) ); $this-lastTime = $nowts; $this-revCountLast = $this-revCount; ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [95455] branches/ariel/xmldumps-backup/README
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95455 Revision: 95455 Author: ariel Date: 2011-08-24 22:52:34 + (Wed, 24 Aug 2011) Log Message: --- bring main README up to date, minor cleanups Modified Paths: -- branches/ariel/xmldumps-backup/README Modified: branches/ariel/xmldumps-backup/README === --- branches/ariel/xmldumps-backup/README 2011-08-24 22:36:09 UTC (rev 95454) +++ branches/ariel/xmldumps-backup/README 2011-08-24 22:52:34 UTC (rev 95455) @@ -6,40 +6,45 @@ === Worker === -Each dump machine runs a worker process which continuously generates dumps. +Each dump machine runs a worker process, a shell script which continuously +calls a python script to generate a dump for the next available wiki. At each iteration, the set of wikis is ordered by last dump date, and the least-recently-touched wiki is selected. -Workers are kept from stomping on each other by creating a lock file in -the private dump directory. To aid in administration, the lock file contains +There are two directory trees used by the dumps processes, one for public +tables and files of public wikis, and one for private wikis or for private +tables and files (such as the user table) of public wikis. + +Workers (the python scripts) are kept from stomping on each other by creating +a lock file in the private dump directory for the specific wiki. The lock file contains the hostname and process ID of the worker process holding the lock. Lock files are touched every 10 seconds while the process runs, and removed at the end. -On each iteration, the script and configuration are reloaded, so additions -to the database list or dump code will be made available without manually -restarting things. +On each iteration, a new copy of the python script is run, which reads its +configuration files from scratch, so additions to the database list files or +changes to the dupm script introduced during the middle of one dump will +go into effect at the start of the next dump. - === Monitor === -One master machine runs the monitor process, which periodically sweeps all -wikis for their current status. This accomplishes two tasks: +One server runs the monitor process, which periodically sweeps all +public dump directories (one per wiki) for their current status. This accomplishes two tasks: * The index page is updated with a summary of dump states -* Aborted dumps are detected and cleaned up +* Aborted dumps are detected and cleaned up (how complete is this?) A lock file that has not been touched in some time is detected as stale, indicating that the worker process holding the lock has died. The status for that dump can then be updated from running to stopped, and the lock -file is removed so that the wiki will get redumped later. +file is removed so that the wiki will get dumped again later. +== Code == -== Code files == - worker.py -- Runs a dump for the least-recently dumped wiki in the stack. +- Runs a dump for the least-recently dumped wiki in the stack, or the desired wiki + can be specified from the command line monitor.py - Generates the site-wide index summary and removes stale locks. @@ -47,7 +52,16 @@ WikiDump.py - Shared classes and functions +CommandManagement.py +- Classes for running multiple commands at the same time, used for running some phases + of the dumps in multiple pieces at the same time, for speed +mwbzutils/ +- Library of utilities for working with bzip2 files, used for locating + an arbitrary XML page in a dump file, checking that the file was written + out completely without truncation, and other tools. See the README in + the directory for more details. + == Configuration == Configuration is done with an INI-style configuration file wikidump.conf. ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [95260] trunk/phase3/includes/Export.php
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95260 Revision: 95260 Author: ariel Date: 2011-08-22 22:01:32 + (Mon, 22 Aug 2011) Log Message: --- add functions that support close and rename of output files as they are being written, used to write out checkpoint files at regular intervals during XML dump production Modified Paths: -- trunk/phase3/includes/Export.php Modified: trunk/phase3/includes/Export.php === --- trunk/phase3/includes/Export.php2011-08-22 21:52:07 UTC (rev 95259) +++ trunk/phase3/includes/Export.php2011-08-22 22:01:32 UTC (rev 95260) @@ -354,6 +354,9 @@ * @ingroup Dump */ class XmlDumpWriter { + var $firstPageWritten = 0; + var $lastPageWritten = 0; + var $pageInProgress = 0; /** * Returns the export schema version. @@ -458,6 +461,7 @@ $title = Title::makeTitle( $row-page_namespace, $row-page_title ); $out .= '' . Xml::elementClean( 'title', array(), $title-getPrefixedText() ) . \n; $out .= '' . Xml::element( 'id', array(), strval( $row-page_id ) ) . \n; + $this-pageInProgress = $row-page_id; if ( $row-page_is_redirect ) { $out .= '' . Xml::element( 'redirect', array() ) . \n; } @@ -478,6 +482,10 @@ */ function closePage() { return /page\n; + if (! $this-firstPageWritten) { + $this-firstPageWritten = $this-pageInProgress; + } + $this-lastPageWritten = $this-pageInProgress; } /** @@ -691,6 +699,18 @@ function write( $string ) { print $string; } + + function closeRenameAndReopen( $newname ) { + return; + } + + function rename( $newname ) { + return; + } + + function getFilename() { + return NULL; + } } /** @@ -699,14 +719,56 @@ */ class DumpFileOutput extends DumpOutput { var $handle; + var $filename; function __construct( $file ) { $this-handle = fopen( $file, wt ); + $this-filename = $file; } function write( $string ) { fputs( $this-handle, $string ); } + + /** +* Close the old file, move it to a specified name, +* and reopen new file with the old name. Use this +* for writing out a file in multiple pieces +* at specified checkpoints (e.g. every n hours). +*/ + function closeRenameAndReopen( $newname ) { + if ( is_array($newname) ) { + if (count($newname) 1) { + WfDie(Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n); + } + else { + $newname = $newname[0]; + } + } + if ( $newname ) { + fclose( $this-handle ); + rename( $this-filename, $newname ); + $this-handle = fopen( $this-filename, wt ); + } + } + + function rename( $newname ) { + if ( is_array($newname) ) { + if (count($newname) 1) { + WfDie(Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n); + } + else { + $newname = $newname[0]; + } + } + if ( $newname ) { + rename( $this-filename, $newname ); + } + } + + function getFilename() { + return $this-filename; + } } /** @@ -716,12 +778,52 @@ * @ingroup Dump */ class DumpPipeOutput extends DumpFileOutput { + var $command; + function __construct( $command, $file = null ) { if ( !is_null( $file ) ) { $command .= . wfEscapeShellArg( $file ); } $this-handle = popen( $command, w ); + $this-command = $command; + $this-filename = $file; } + + /** +* Close the old file, move it to a specified name, +* and reopen new file with the old name. +*/ + function closeRenameAndReopen( $newname ) { + if ( is_array($newname) ) { + if (count($newname) 1) { + WfDie(Export closeRenameAndReopen: passed multiple argumnts for rename of single file\n); + } + else { + $newname = $newname[0
[MediaWiki-CVS] SVN: [95272] trunk/phase3/maintenance/dumpTextPass.php
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95272 Revision: 95272 Author: ariel Date: 2011-08-22 22:45:22 + (Mon, 22 Aug 2011) Log Message: --- add support for writing out checkpoint files of xml dump at regular intervals (close and rename file based on filename pattern which includes first and last page id written) Modified Paths: -- trunk/phase3/maintenance/dumpTextPass.php Modified: trunk/phase3/maintenance/dumpTextPass.php === --- trunk/phase3/maintenance/dumpTextPass.php 2011-08-22 22:45:21 UTC (rev 95271) +++ trunk/phase3/maintenance/dumpTextPass.php 2011-08-22 22:45:22 UTC (rev 95272) @@ -59,10 +59,23 @@ var $ID = 0; + var $xmlwriterobj = false; + + # when we spend more than maxTimeAllowed seconds on this run, we continue + # processing until we write out the next complete page, then save output file(s), + # rename it/them and open new one(s) + var $maxTimeAllowed = 0; // 0 = no limit + var $timeExceeded = false; + var $firstPageWritten = false; + var $lastPageWritten = false; + var $checkpointJustWritten = false; + var $checkpointFiles = array(); + function initProgress( $history ) { parent::initProgress(); $this-ID = getmypid(); $this-lastTime = $this-startTime; + $this-timeOfCheckpoint = $this-startTime; } function dump( $history, $text = WikiExporter::TEXT ) { @@ -80,6 +93,12 @@ $this-egress = new ExportProgressFilter( $this-sink, $this ); + # it would be nice to do it in the constructor, oh well. need egress set + $this-finalOptionCheck(); + + # we only want this so we know how to close a stream :-P + $this-xmlwriterobj = new XmlDumpWriter(); + $input = fopen( $this-input, rt ); $result = $this-readDump( $input ); @@ -106,6 +125,12 @@ case 'stub': $this-input = $url; break; + case 'maxtime': + $this-maxTimeAllowed = intval($val)*60; + break; + case 'checkpointfile': + $this-checkpointFiles[] = $val; + break; case 'current': $this-history = WikiExporter::CURRENT; break; @@ -204,6 +229,39 @@ } } + function setTimeExceeded() { + $this-timeExceeded = True; + } + + function checkIfTimeExceeded() { + if ( $this-maxTimeAllowed ( $this-lastTime - $this-timeOfCheckpoint $this-maxTimeAllowed ) ) { + return True; + } + return False; + } + + function finalOptionCheck() { + if (($this-checkpointFiles ! $this-maxTimeAllowed) || + ($this-maxTimeAllowed !$this-checkpointFiles)) { + wfDie(Options checkpointfile and maxtime must be specified together.\n); + } + foreach ($this-checkpointFiles as $checkpointFile) { + $count = substr_count ($checkpointFile,%s); + if (substr_count ($checkpointFile,%s) != 2) { + wfDie(Option checkpointfile must contain two '%s' for substitution of first and last pageids, count is $count instead, fil +e is $checkpointFile.\n); + } + } + + $filenameList = $this-egress-getFilename(); + if (! is_array($filenameList)) { + $filenameList = array( $filenameList ); + } + if (count($filenameList) != count($this-checkpointFiles)) { + wfDie(One checkpointfile must be specified for each output option, if maxtime is used.\n); + } + } + function readDump( $input ) { $this-buffer = ; $this-openElement = false; @@ -222,6 +280,9 @@ $offset = 0; // for context extraction on error reporting $bufferSize = 512 * 1024; do { + if ($this-checkIfTimeExceeded()) { + $this-setTimeExceeded(); + } $chunk = fread( $input, $bufferSize ); if ( !xml_parse( $parser, $chunk, feof( $input ) ) ) { wfDebug( TextDumpPass::readDump encountered XML parsing error\n ); @@ -229,6 +290,24 @@ } $offset += strlen( $chunk ); } while ( $chunk !== false !feof( $input ) ); + if ($this-maxTimeAllowed) { + $filenameList
[MediaWiki-CVS] SVN: [95288] trunk/phase3/maintenance/dumpTextPass.php
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95288 Revision: 95288 Author: ariel Date: 2011-08-23 00:04:45 + (Tue, 23 Aug 2011) Log Message: --- fix a couple bad lines in previous commit from bad merge attempt Modified Paths: -- trunk/phase3/maintenance/dumpTextPass.php Modified: trunk/phase3/maintenance/dumpTextPass.php === --- trunk/phase3/maintenance/dumpTextPass.php 2011-08-22 23:56:23 UTC (rev 95287) +++ trunk/phase3/maintenance/dumpTextPass.php 2011-08-23 00:04:45 UTC (rev 95288) @@ -222,7 +222,6 @@ $this-progress( sprintf( %s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), %d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% prefetched (all|curr), ETA %s [max %d],- $now, wfWikiID(), $this-ID, $this-pageCount, $pageRate, $pageRatePart, $this-revCount, $revRate, $revRatePart, $fetchRate, $fetchRatePart, $etats, $this-maxCount ) ); $this-lastTime = $now; - $this-partCountLast = $this-partCount; $this-revCountLast = $this-revCount; $this-prefetchCountLast = $this-prefetchCount; $this-fetchCountLast = $this-fetchCount; @@ -248,8 +247,7 @@ foreach ($this-checkpointFiles as $checkpointFile) { $count = substr_count ($checkpointFile,%s); if (substr_count ($checkpointFile,%s) != 2) { - wfDie(Option checkpointfile must contain two '%s' for substitution of first and last pageids, count is $count instead, fil -e is $checkpointFile.\n); + wfDie(Option checkpointfile must contain two '%s' for substitution of first and last pageids, count is $count instead, file is $checkpointFile.\n); } } ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [95290] trunk/phase3/maintenance/dumpTextPass.php
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95290 Revision: 95290 Author: ariel Date: 2011-08-23 00:36:15 + (Tue, 23 Aug 2011) Log Message: --- fix timestamp stuff, more fallout from bad merge attempt Modified Paths: -- trunk/phase3/maintenance/dumpTextPass.php Modified: trunk/phase3/maintenance/dumpTextPass.php === --- trunk/phase3/maintenance/dumpTextPass.php 2011-08-23 00:11:13 UTC (rev 95289) +++ trunk/phase3/maintenance/dumpTextPass.php 2011-08-23 00:36:15 UTC (rev 95290) @@ -181,6 +181,7 @@ if ( $this-reporting ) { $now = wfTimestamp( TS_DB ); + $nowts = wfTime(); $deltaAll = wfTime() - $this-startTime; $deltaPart = wfTime() - $this-lastTime; $this-pageCountPart = $this-pageCount - $this-pageCountLast; @@ -221,7 +222,7 @@ } $this-progress( sprintf( %s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), %d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% prefetched (all|curr), ETA %s [max %d],- $now, wfWikiID(), $this-ID, $this-pageCount, $pageRate, $pageRatePart, $this-revCount, $revRate, $revRatePart, $fetchRate, $fetchRatePart, $etats, $this-maxCount ) ); - $this-lastTime = $now; + $this-lastTime = $nowts; $this-revCountLast = $this-revCount; $this-prefetchCountLast = $this-prefetchCount; $this-fetchCountLast = $this-fetchCount; @@ -233,6 +234,10 @@ } function checkIfTimeExceeded() { + $m1 = $this-maxTimeAllowed; + $m2 = $this-lastTime; + $m3 = $this-timeOfCheckpoint; + $m4 = $this-lastTime - $this-timeOfCheckpoint; if ( $this-maxTimeAllowed ( $this-lastTime - $this-timeOfCheckpoint $this-maxTimeAllowed ) ) { return True; } ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [93307] branches/ariel/xmldumps-backup/worker.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/93307 Revision: 93307 Author: ariel Date: 2011-07-27 20:19:51 + (Wed, 27 Jul 2011) Log Message: --- get rid of last renameFile call (oops) Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-07-27 20:06:27 UTC (rev 93306) +++ branches/ariel/xmldumps-backup/worker.py2011-07-27 20:19:51 UTC (rev 93307) @@ -1908,7 +1908,7 @@ p.runPipelineAndGetOutput() if not p.exitedSuccessfully(): runner.logAndPrint(file %s is truncated, moving out of the way %f ) - os.renameFile( f, f + .truncated ) + os.rename( f, f + .truncated ) return 1 return 0 ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [92610] branches/ariel/xmldumps-backup/worker.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92610 Revision: 92610 Author: ariel Date: 2011-07-20 07:23:56 + (Wed, 20 Jul 2011) Log Message: --- add a few more enabled flags, get rid of a few more checks for dryrun etc Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-07-20 04:28:48 UTC (rev 92609) +++ branches/ariel/xmldumps-backup/worker.py2011-07-20 07:23:56 UTC (rev 92610) @@ -1021,7 +1021,6 @@ return os.path.join(self.wiki.publicDir(), self.date); class Runner(object): - def __init__(self, wiki, date=None, prefetch=True, spawn=True, job=None, restart=False, notice=, dryrun = False, loggingEnabled=False, chunkToDo = False): self.wiki = wiki self.dbName = wiki.dbName @@ -1029,17 +1028,22 @@ self.spawn = spawn self.chunkInfo = Chunk(wiki, self.dbName, self.logAndPrint) self.restart = restart - self.loggingEnabled = loggingEnabled self.htmlNoticeFile = None self.log = None self.dryrun = dryrun self._chunkToDo = chunkToDo + + self._loggingEnabled = loggingEnabled self._statusEnabled = True self._checksummerEnabled = True self._runInfoFileEnabled = True self._symLinksEnabled = True self._feedsEnabled = True self._noticeFileEnabled = True + self._makeDirEnabled = True + self._cleanOldDumpsEnabled = True + self._cleanupOldFilesEnabled = False + self._checkForTruncatedFilesEnabled = True if self.dryrun or self._chunkToDo: self._statusEnabled = False @@ -1048,8 +1052,13 @@ self._symLinksEnabled = False self._feedsEnabled = False self._noticeFileEnabled = False + self._makeDirEnabled = False + self._cleanOldDumpsEnabled = False + self._cleanupOldFilesEnables = False + if self.dryrun: - self.loggingEnabled = False + self._loggingEnabled = False + self._checkForTruncatedFilesEnabled = False if date: # Override, continuing a past dump? @@ -1065,7 +1074,7 @@ self.lastFailed = False # these must come after the dumpdir setup so we know which directory we are in - if (loggingEnabled): + if (self._loggingEnabled and self._makeDirEnabled): self.logFileName = self.dumpDir.publicPath(self.wiki.config.logFile) self.makeDir(join(self.wiki.publicDir(), self.date)) self.log = Logger(self.logFileName) @@ -1088,7 +1097,7 @@ done = log.doJobOnLogQueue() def logAndPrint(self, message): - if hasattr(self,'log') and self.log and not self.dryrun: + if hasattr(self,'log') and self.log and self._loggingEnabled: self.log.addToLogQueue(%s\n % message) print message @@ -1098,9 +1107,8 @@ else: return - def remove(self, filename): - if not self.dryrun: - os.remove(filename) + def removeFile(self, filename): + os.remove(filename) # returns 0 on success, 1 on error def saveTable(self, table, outfile): @@ -1224,9 +1232,8 @@ # mark all the following jobs to run as well self.dumpItemList.markFollowingJobsToRun() - if not self.dryrun: - self.makeDir(join(self.wiki.publicDir(), self.date)) - self.makeDir(join(self.wiki.privateDir(), self.date)) + self.makeDir(join(self.wiki.publicDir(), self.date)) + self.makeDir(join(self.wiki.privateDir(), self.date)) if (self.restart): self.logAndPrint(Preparing for restart from job %s of %s % (self.jobRequested, self.dbName)) @@ -1250,12 +1257,12 @@ except Exception, ex: self.debug(*** exception! + str(ex)) item.setStatus(failed) - if item.status() == failed and not self.dryrun and not self._chunkToDo: + if item.status() == failed
[MediaWiki-CVS] SVN: [92524] branches/ariel/xmldumps-backup/worker.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92524 Revision: 92524 Author: ariel Date: 2011-07-19 11:36:21 + (Tue, 19 Jul 2011) Log Message: --- use hashlib instead of deprecated md5 module Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-07-19 11:30:38 UTC (rev 92523) +++ branches/ariel/xmldumps-backup/worker.py2011-07-19 11:36:21 UTC (rev 92524) @@ -1,7 +1,7 @@ # Worker process, does the actual dumping import getopt -import md5 +import hashlib import os import popen2 import re @@ -760,7 +760,7 @@ return (self.dumpDir.publicPath(self.getChecksumFileNameBasename() + . + self.timestamp + .tmp)) def _md5File(self, filename): - summer = md5.new() + summer = hashlib.md5() infile = file(filename, rb) bufsize = 4192 * 32 buffer = infile.read(bufsize) ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [92536] branches/ariel/xmldumps-backup/worker.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92536 Revision: 92536 Author: ariel Date: 2011-07-19 14:37:05 + (Tue, 19 Jul 2011) Log Message: --- copy over partial md5 results after every job, so users who download files before the entire dump is done can check file integrity Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-07-19 14:30:50 UTC (rev 92535) +++ branches/ariel/xmldumps-backup/worker.py2011-07-19 14:37:05 UTC (rev 92536) @@ -747,6 +747,13 @@ realFileName = self._getChecksumFileName() os.rename(tmpFileName, realFileName) + def cpMd5TmpFileToPermFile(self): + if (self._enabled): + tmpFileName = self._getChecksumFileNameTmp() + realFileName = self._getChecksumFileName() + text = FileUtils.readFile(tmpFileName) + FileUtils.writeFile(self._getMd5FileDirName(), realFileName, text, self.wiki.config.fileperms) + def getChecksumFileNameBasename(self): return (md5sums.txt) @@ -780,6 +787,9 @@ checksum = self._md5FileLine(path) output.write(checksum) + def _getMd5FileDirName(self): + return os.path.join(self.wiki.publicDir(), self.wiki.date); + class DumpDir(object): def __init__(self, wiki, dbName, date): self._wiki = wiki @@ -1281,6 +1291,7 @@ else: if not self.dryrun and not self._chunkToDo: self.runUpdateItemFileInfo(item) + self.checksums.cpMd5TmpFileToPermFile() self.lastFailed = False self.status.updateStatusFiles(done) ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [92543] branches/ariel/xmldumps-backup/worker.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92543 Revision: 92543 Author: ariel Date: 2011-07-19 17:25:01 + (Tue, 19 Jul 2011) Log Message: --- clean up use of the config var Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-07-19 17:13:26 UTC (rev 92542) +++ branches/ariel/xmldumps-backup/worker.py2011-07-19 17:25:01 UTC (rev 92543) @@ -69,19 +69,20 @@ def __init__(self, wiki, dbName, errorCallback = None): self._dbName = dbName - self._chunksEnabled = wiki.config.chunksEnabled + self.wiki = wiki + self._chunksEnabled = self.wiki.config.chunksEnabled if (self._chunksEnabled): - self._pagesPerChunkHistory = self.convertCommaSepLineToNumbers(wiki.config.pagesPerChunkHistory) - self._revsPerChunkHistory = self.convertCommaSepLineToNumbers(wiki.config.revsPerChunkHistory) - self._pagesPerChunkAbstract = self.convertCommaSepLineToNumbers(wiki.config.pagesPerChunkAbstract) - self._recombineHistory = wiki.config.recombineHistory + self._pagesPerChunkHistory = self.convertCommaSepLineToNumbers(self.wiki.config.pagesPerChunkHistory) + self._revsPerChunkHistory = self.convertCommaSepLineToNumbers(self.wiki.config.revsPerChunkHistory) + self._pagesPerChunkAbstract = self.convertCommaSepLineToNumbers(self.wiki.config.pagesPerChunkAbstract) + self._recombineHistory = self.wiki.config.recombineHistory else: self._pagesPerChunkHistory = False self._revsPerChunkHistory = False self._pagesPerChunkAbstract = False self._recombineHistory = False if (self._chunksEnabled): - self.Stats = PageAndEditStats(wiki,dbName, errorCallback) + self.Stats = PageAndEditStats(self.wiki,dbName, errorCallback) if (not self.Stats.totalEdits or not self.Stats.totalPages): raise BackupError(Failed to get DB stats, exiting) if (self._revsPerChunkHistory): @@ -155,17 +156,17 @@ class DbServerInfo(object): def __init__(self, wiki, dbName, errorCallback = None): - self.config = wiki.config + self.wiki = wiki self.dbName = dbName self.errorCallback = errorCallback self.selectDatabaseServer() def defaultServer(self): # if this fails what do we do about it? Not a bleeping thing. *ugh* FIXME!! - if (not exists( self.config.php ) ): - raise BackupError(php command %s not found % self.config.php); + if (not exists( self.wiki.config.php ) ): + raise BackupError(php command %s not found % self.wiki.config.php); command = %s -q %s/maintenance/getSlaveServer.php --wiki=%s --group=dump % MiscUtils.shellEscape(( - self.config.php, self.config.wikiDir, self.dbName)) + self.wiki.config.php, self.wiki.config.wikiDir, self.dbName)) return RunSimpleCommand.runAndReturn(command, self.errorCallback).strip() def selectDatabaseServer(self): @@ -173,12 +174,12 @@ def buildSqlCommand(self, query, pipeto = None): Put together a command to execute an sql query to the server for this DB. - if (not exists( self.config.mysql ) ): - raise BackupError(mysql command %s not found % self.config.mysql); + if (not exists( self.wiki.config.mysql ) ): + raise BackupError(mysql command %s not found % self.wiki.config.mysql); command = [ [ /bin/echo, %s % query ], - [ %s % self.config.mysql, -h, + [ %s % self.wiki.config.mysql, -h, %s % self.dbServer, - -u, %s % self.config.dbUser, + -u, %s % self.wiki.config.dbUser, %s % self.passwordOption(), %s % self.dbName, -r ] ] @@ -189,11 +190,11 @@ def buildSqlDumpCommand(self, table, pipeto = None): Put together a command to dump a table from the current DB with mysqldump and save to a gzipped sql file. - if (not exists( self.config.mysqldump ) ): - raise BackupError(mysqldump command %s not found
[MediaWiki-CVS] SVN: [92427] branches/ariel/xmldumps-backup/WikiDump.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92427 Revision: 92427 Author: ariel Date: 2011-07-18 12:48:58 + (Mon, 18 Jul 2011) Log Message: --- class name before method invocation Modified Paths: -- branches/ariel/xmldumps-backup/WikiDump.py Modified: branches/ariel/xmldumps-backup/WikiDump.py === --- branches/ariel/xmldumps-backup/WikiDump.py 2011-07-18 12:29:53 UTC (rev 92426) +++ branches/ariel/xmldumps-backup/WikiDump.py 2011-07-18 12:48:58 UTC (rev 92427) @@ -326,7 +326,7 @@ try: # tack on the file mtime so that if we have multiple wikis # dumped on the same day, they get ordered properly - date = int(today()) - int(last) + date = int(TimeUtils.today()) - int(last) age = FileUtils.fileAge(dumpStatus) status = FileUtils.readFile(dumpStatus) except: ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [92370] branches/ariel/xmldumps-backup
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92370 Revision: 92370 Author: ariel Date: 2011-07-16 17:15:10 + (Sat, 16 Jul 2011) Log Message: --- allow per project items in conf file; formatting cleanup; bug from previous commit, wrong indentation Modified Paths: -- branches/ariel/xmldumps-backup/WikiDump.py branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/WikiDump.py === --- branches/ariel/xmldumps-backup/WikiDump.py 2011-07-16 16:56:09 UTC (rev 92369) +++ branches/ariel/xmldumps-backup/WikiDump.py 2011-07-16 17:15:10 UTC (rev 92370) @@ -127,10 +127,12 @@ class Config(object): def __init__(self, configFile=False): + self.projectName = False + home = os.path.dirname(sys.argv[0]) if (not configFile): configFile = wikidump.conf - files = [ + self.files = [ os.path.join(home,configFile), /etc/wikidump.conf, os.path.join(os.getenv(HOME), .wikidump.conf)] @@ -193,97 +195,123 @@ # whether or not to recombine the history pieces recombineHistory : 1, } - conf = ConfigParser.SafeConfigParser(defaults) - conf.read(files) + self.conf = ConfigParser.SafeConfigParser(defaults) + self.conf.read(self.files) - if not conf.has_section(wiki): + if not self.conf.has_section(wiki): print The mandatory configuration section 'wiki' was not defined. raise ConfigParser.NoSectionError('wiki') - if not conf.has_option(wiki,dir): + if not self.conf.has_option(wiki,dir): print The mandatory setting 'dir' in the section 'wiki' was not defined. raise ConfigParser.NoOptionError('wiki','dir') - self.dbList = MiscUtils.dbList(conf.get(wiki, dblist)) - self.skipDbList = MiscUtils.dbList(conf.get(wiki, skipdblist)) - self.privateList = MiscUtils.dbList(conf.get(wiki, privatelist)) - self.bigList = MiscUtils.dbList(conf.get(wiki, biglist)) - self.flaggedRevsList = MiscUtils.dbList(conf.get(wiki, flaggedrevslist)) - self.wikiDir = conf.get(wiki, dir) - self.forceNormal = conf.getint(wiki, forceNormal) - self.halt = conf.getint(wiki, halt) + self.parseConfFileGlobally() + self.parseConfFilePerProject() + def parseConfFileGlobally(self): + self.dbList = MiscUtils.dbList(self.conf.get(wiki, dblist)) + self.skipDbList = MiscUtils.dbList(self.conf.get(wiki, skipdblist)) + self.privateList = MiscUtils.dbList(self.conf.get(wiki, privatelist)) + self.bigList = MiscUtils.dbList(self.conf.get(wiki, biglist)) + self.flaggedRevsList = MiscUtils.dbList(self.conf.get(wiki, flaggedrevslist)) + self.wikiDir = self.conf.get(wiki, dir) + self.forceNormal = self.conf.getint(wiki, forceNormal) + self.halt = self.conf.getint(wiki, halt) + self.dbList = list(set(self.dbList) - set(self.skipDbList)) - if not conf.has_section('output'): - conf.add_section('output') - self.publicDir = conf.get(output, public) - self.privateDir = conf.get(output, private) - self.webRoot = conf.get(output, webroot) - self.index = conf.get(output, index) - self.templateDir = conf.get(output, templateDir) - self.perDumpIndex = conf.get(output, perdumpindex) - self.logFile = conf.get(output, logfile) - self.fileperms = conf.get(output, fileperms) + if not self.conf.has_section('output'): + self.conf.add_section('output') + self.publicDir = self.conf.get(output, public) + self.privateDir = self.conf.get(output, private) + self.webRoot = self.conf.get(output, webroot) + self.index = self.conf.get(output, index) + self.templateDir = self.conf.get(output, templateDir) + self.perDumpIndex = self.conf.get(output, perdumpindex) + self.logFile = self.conf.get(output, logfile) + self.fileperms = self.conf.get(output, fileperms) self.fileperms = int(self.fileperms,0) - if not conf.has_section('reporting'): - conf.add_section('reporting') - self.adminMail = conf.get(reporting, adminmail) - self.mailFrom = conf.get(reporting
[MediaWiki-CVS] SVN: [92230] branches/ariel/xmldumps-backup/worker.py
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92230 Revision: 92230 Author: ariel Date: 2011-07-15 07:53:31 + (Fri, 15 Jul 2011) Log Message: --- check for existence of various commands, exception if missing (thanks to reeve of irc for noticing the issue) Modified Paths: -- branches/ariel/xmldumps-backup/worker.py Modified: branches/ariel/xmldumps-backup/worker.py === --- branches/ariel/xmldumps-backup/worker.py2011-07-15 07:42:20 UTC (rev 92229) +++ branches/ariel/xmldumps-backup/worker.py2011-07-15 07:53:31 UTC (rev 92230) @@ -160,6 +160,8 @@ def defaultServer(self): # if this fails what do we do about it? Not a bleeping thing. *ugh* FIXME!! + if (not exists( self.config.php ) ): + raise BackupError(php command %s not found % self.config.php); command = %s -q %s/maintenance/getSlaveServer.php --wiki=%s --group=dump % MiscUtils.shellEscape(( self.config.php, self.config.wikiDir, self.dbName)) return RunSimpleCommand.runAndReturn(command, self.errorCallback).strip() @@ -169,6 +171,8 @@ def buildSqlCommand(self, query, pipeto = None): Put together a command to execute an sql query to the server for this DB. + if (not exists( self.config.mysql ) ): + raise BackupError(mysql command %s not found % self.config.mysql); command = [ [ /bin/echo, %s % query ], [ %s % self.config.mysql, -h, %s % self.dbServer, @@ -183,6 +187,8 @@ def buildSqlDumpCommand(self, table, pipeto = None): Put together a command to dump a table from the current DB with mysqldump and save to a gzipped sql file. + if (not exists( self.config.mysqldump ) ): + raise BackupError(mysqldump command %s not found % self.config.mysqldump); command = [ [ %s % self.config.mysqldump, -h, %s % self.dbServer, -u, %s % self.config.dbUser, @@ -215,6 +221,8 @@ def getDBTablePrefix(self): Get the prefix for all tables for the specific wiki ($wgDBprefix) # FIXME later full path + if (not exists( self.config.php ) ): + raise BackupError(php command %s not found % self.config.php); command = echo 'print $wgDBprefix; ' | %s -q %s/maintenance/eval.php --wiki=%s % MiscUtils.shellEscape(( self.config.php, self.config.wikiDir, self.dbName)) return RunSimpleCommand.runAndReturn(command, self.errorCallback).strip() @@ -953,11 +961,15 @@ # returns 0 on success, 1 on error def saveTable(self, table, outfile): Dump a table from the current DB with mysqldump, save to a gzipped sql file. + if (not exists( self.config.gzip ) ): + raise BackupError(gzip command %s not found % self.config.gzip); commands = self.dbServerInfo.buildSqlDumpCommand(table, self.config.gzip) return self.saveCommand(commands, outfile) def saveSql(self, query, outfile): Pass some SQL commands to the server for this DB and save output to a gzipped file. + if (not exists( self.config.gzip ) ): + raise BackupError(gzip command %s not found % self.config.gzip); command = self.dbServerInfo.buildSqlCommand(query, self.config.gzip) return self.saveCommand(command, outfile) @@ -1354,8 +1366,14 @@ outputFilename = runner.dumpDir.publicPath(outputFileBasename) chunkNum = 0 recombines = [] + if (not exists( runner.config.head ) ): + raise BackupError(head command %s not found % runner.config.head); head = runner.config.head + if (not exists( runner.config.tail ) ): + raise BackupError(tail command %s not found % runner.config.tail); tail = runner.config.tail + if (not exists( runner.config.grep ) ): + raise BackupError(grep command %s not found % runner.config.grep); grep = runner.config.grep # we assume the result is always going to be run in a subshell. @@ -1486,6 +1504,8 @@ current = self.buildCurrentOutputFilename(runner, chunk) articles = self.buildArticlesOutputFilename(runner, chunk) + if (not exists( runner.config.php ) ): + raise BackupError(php command %s not found % runner.config.php); command = [ %s % runner.config.php
[MediaWiki-CVS] SVN: [92144] branches/ariel/xmldumps-backup/mwbzutils
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92144 Revision: 92144 Author: ariel Date: 2011-07-14 08:35:29 + (Thu, 14 Jul 2011) Log Message: --- version bump; for finding pageid in xml file, workaround for pages with giant cumulative rev text (*cough en pedia pageid 3976790), uses api (relatively fast) with fallback to stub file (much slower but not nearly as slow as a straight decompress and read) Modified Paths: -- branches/ariel/xmldumps-backup/mwbzutils/Makefile branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c branches/ariel/xmldumps-backup/mwbzutils/mwbzlib.c branches/ariel/xmldumps-backup/mwbzutils/mwbzutils.h Added Paths: --- branches/ariel/xmldumps-backup/mwbzutils/httptiny.c Modified: branches/ariel/xmldumps-backup/mwbzutils/Makefile === --- branches/ariel/xmldumps-backup/mwbzutils/Makefile 2011-07-14 07:00:25 UTC (rev 92143) +++ branches/ariel/xmldumps-backup/mwbzutils/Makefile 2011-07-14 08:35:29 UTC (rev 92144) @@ -34,8 +34,8 @@ dumplastbz2block: $(OBJSBZ) mwbzlib.o dumplastbz2block.o $(CC) $(CFLAGS) $(LDFLAGS) -o dumplastbz2block dumplastbz2block.o mwbzlib.o $(OBJSBZ) -lbz2 -findpageidinbz2xml: $(OBJSBZ) mwbzlib.o findpageidinbz2xml.o - $(CC) $(CFLAGS) $(LDFLAGS) -o findpageidinbz2xml findpageidinbz2xml.o mwbzlib.o $(OBJSBZ) -lbz2 +findpageidinbz2xml: $(OBJSBZ) mwbzlib.o httptiny.o findpageidinbz2xml.o + $(CC) $(CFLAGS) $(LDFLAGS) -o findpageidinbz2xml findpageidinbz2xml.o httptiny.o mwbzlib.o $(OBJSBZ) -lbz2 -lz checkforbz2footer: $(OBJSBZ) mwbzlib.o checkforbz2footer.o $(CC) $(CFLAGS) $(LDFLAGS) -o checkforbz2footer checkforbz2footer.o mwbzlib.o $(OBJSBZ) -lbz2 @@ -62,6 +62,8 @@ $(CC) $(CFLAGS) -c bzlibfuncs.c mwbzlib.o: mwbzlib.c bzlib.h bzlib_private.h mwbzutils.h $(CC) $(CFLAGS) -c mwbzlib.c +httptiny.o: httptiny.c + $(CC) $(CFLAGS) -c httptiny.c dumplastbz2block.o: dumplastbz2block.c $(CC) $(CFLAGS) -c dumplastbz2block.c findpageidinbz2xml.o: findpageidinbz2xml.c @@ -73,7 +75,7 @@ distclean: clean -DISTNAME=mwbzutils-0.0.1 +DISTNAME=mwbzutils-0.0.2 dist: rm -f $(DISTNAME) ln -s -f . $(DISTNAME) @@ -82,6 +84,7 @@ $(DISTNAME)/findpageidinbz2xml.c \ $(DISTNAME)/checkforbz2footer.c \ $(DISTNAME)/dumpbz2filefromoffset.c \ + $(DISTNAME)/httptiny.c \ $(DISTNAME)/mwbzlib.c \ $(DISTNAME)/mwbzutils.h \ $(DISTNAME)/bzlibfuncs.c \ Modified: branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c === --- branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c 2011-07-14 07:00:25 UTC (rev 92143) +++ branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c 2011-07-14 08:35:29 UTC (rev 92144) @@ -1,4 +1,5 @@ #include unistd.h +#include getopt.h #include stdio.h #include string.h #include sys/types.h @@ -9,9 +10,9 @@ #include sys/types.h #include regex.h #include inttypes.h +#include zlib.h #include mwbzutils.h - /* find the first bz2 block marker in the file, from its current position, @@ -23,6 +24,13 @@ int init_and_read_first_buffer_bz2_file(bz_info_t *bfile, int fin) { int res; + bfile-bufin_size = BUFINSIZE; + bfile-marker = init_marker(); + bfile-bytes_read = 0; + bfile-bytes_written = 0; + bfile-eof = 0; + bfile-file_size = get_file_size(fin); + bfile-initialized++; res = find_next_bz2_block_marker(fin, bfile, FORWARD); @@ -32,35 +40,244 @@ setup_first_buffer_to_decompress(fin, bfile); return(0); } + else { +fprintf(stderr,failed to find the next frigging block marker\n); +return(-1); + } +} + +extern char * geturl(char *hostname, int port, char *url); + +char *get_hostname_from_xml_header(int fin) { + int res; + regmatch_t *match_base_expr; + regex_t compiled_base_expr; + /*basehttp://el.wiktionary.org/wiki/.../base */ + /* basehttp://trouble.localdomain/wiki/ */ + char *base_expr = basehttp://([^/]+)/; + int length=5000; /* output buffer size */ + + buf_info_t *b; + bz_info_t bfile; + + int hostname_length = 0; + + off_t old_position, seek_result; + static char hostname[256]; + + bfile.initialized = 0; + + res = regcomp(compiled_base_expr, base_expr, REG_EXTENDED); + match_base_expr = (regmatch_t *)malloc(sizeof(regmatch_t)*2); + + b = init_buffer(length); + bfile.bytes_read = 0; + + bfile.position = (off_t)0; + old_position = lseek(fin,(off_t)0,SEEK_CUR); + seek_result = lseek(fin,(off_t)0,SEEK_SET); + + while ((get_buffer_of_uncompressed_data(b, fin, bfile, FORWARD)=0) (! bfile.eof)) { +/* so someday the header might grow enough that base isn't in the first 1000 characters but we'll ignore that for now */ +if (bfile.bytes_read b-bytes_avail 1000) { + /* get project name
[MediaWiki-CVS] SVN: [92149] trunk/extensions/Renameuser/renameUserCleanup.php
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92149 Revision: 92149 Author: ariel Date: 2011-07-14 11:07:41 + (Thu, 14 Jul 2011) Log Message: --- search for older format log entries in case the new format ones aren't there, clean up check for existence Modified Paths: -- trunk/extensions/Renameuser/renameUserCleanup.php Modified: trunk/extensions/Renameuser/renameUserCleanup.php === --- trunk/extensions/Renameuser/renameUserCleanup.php 2011-07-14 10:47:56 UTC (rev 92148) +++ trunk/extensions/Renameuser/renameUserCleanup.php 2011-07-14 11:07:41 UTC (rev 92149) @@ -67,12 +67,29 @@ ), __METHOD__ ); - if (! $result ) { - print(No log entry found for a rename of .$olduser-getName(). to .$newuser-getName()., giving up\n); - exit(1); + if (! $result || ! $result-numRows() ) { + // try the old format + $result = $dbr-select( 'logging', '*', + array( 'log_type' = 'renameuser', + 'log_action'= 'renameuser', + 'log_title' = $olduser-getName(), +), + __METHOD__ + ); + if (! $result || ! $result-numRows() ) { + print(No log entry found for a rename of .$olduser-getName(). to .$newuser-getName()., giving up\n); + exit(1); + } + else { + foreach ( $result as $row ) { + print(Found possible log entry of the rename, please check: .$row-log_title. with comment .$row-log_comment. on $row-log_timestamp\n); + } + } } - foreach ( $result as $row ) { - print(Found log entry of the rename: .$olduser-getName(). to .$newuser-getName(). on $row-log_timestamp\n); + else { + foreach ( $result as $row ) { + print(Found log entry of the rename: .$olduser-getName(). to .$newuser-getName(). on $row-log_timestamp\n); + } } if ($result-numRows() 1) { print(More than one rename entry found in the log, not sure what to do. Continue anyways? [N/y] ); @@ -91,7 +108,7 @@ $this-updateTable('logging', 'log_user_text', 'log_user', 'log_timestamp', $olduser, $newuser, $dbw); $this-updateTable('image', 'img_user_text', 'img_user', 'img_timestamp', $olduser, $newuser, $dbw); $this-updateTable('oldimage', 'oi_user_text', 'oi_user', 'oi_timestamp', $olduser, $newuser, $dbw); -# FIXME: updateTable('filearchive', 'fa_user_text','fa_user', 'fa_timestamp', $olduser, $newuser, $dbw); (not indexed yet) + $this-updateTable('filearchive', 'fa_user_text','fa_user', 'fa_timestamp', $olduser, $newuser, $dbw); print Done!\n; exit(0); } ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [92154] branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92154 Revision: 92154 Author: ariel Date: 2011-07-14 11:48:42 + (Thu, 14 Jul 2011) Log Message: --- this time without the debugging values for frequency of api calls Modified Paths: -- branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c Modified: branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c === --- branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c 2011-07-14 11:42:14 UTC (rev 92153) +++ branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c 2011-07-14 11:48:42 UTC (rev 92154) @@ -336,8 +336,7 @@ at least one rev id in there. 20 million / 5000 or whatever it is, is 4000 buffers full of crap hopefully that doesn't take forever. */ - /* if (buffer_count(2000/BUFINSIZE) rev_id) { */ - if (buffer_count3 rev_id) { + if (buffer_count(2000/BUFINSIZE) rev_id) { if (use_api) { page_id_found = get_page_id_from_rev_id_via_api(rev_id, fin); } ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
[MediaWiki-CVS] SVN: [91967] trunk/phase3
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/91967 Revision: 91967 Author: ariel Date: 2011-07-12 15:01:58 + (Tue, 12 Jul 2011) Log Message: --- libxml = 2.7.3 ha a 10mb cap on the size of a text node and the LIBXML_PARSEHUGE lets us override that, needed for lucid since there are a few revs in the db larger than that limit Modified Paths: -- trunk/phase3/includes/Import.php trunk/phase3/maintenance/backupPrefetch.inc Modified: trunk/phase3/includes/Import.php === --- trunk/phase3/includes/Import.php2011-07-12 14:58:58 UTC (rev 91966) +++ trunk/phase3/includes/Import.php2011-07-12 15:01:58 UTC (rev 91967) @@ -45,7 +45,12 @@ stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' ); $id = UploadSourceAdapter::registerSource( $source ); - $this-reader-open( uploadsource://$id ); + if (defined( 'LIBXML_PARSEHUGE' ) ) { + $this-reader-open( uploadsource://$id, null, LIBXML_PARSEHUGE ); + } + else { + $this-reader-open( uploadsource://$id ); + } // Default callbacks $this-setRevisionCallback( array( $this, importRevision ) ); Modified: trunk/phase3/maintenance/backupPrefetch.inc === --- trunk/phase3/maintenance/backupPrefetch.inc 2011-07-12 14:58:58 UTC (rev 91966) +++ trunk/phase3/maintenance/backupPrefetch.inc 2011-07-12 15:01:58 UTC (rev 91967) @@ -51,7 +51,12 @@ $this-infiles = explode(';',$infile); $this-reader = new XMLReader(); $infile = array_shift($this-infiles); - $this-reader-open( $infile ); + if (defined( 'LIBXML_PARSEHUGE' ) ) { + $this-reader-open( $infile, null, LIBXML_PARSEHUGE ); + } + else { + $this-reader-open( $infile ); + } } /** ___ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs