[MediaWiki-CVS] SVN: [111156] branches/ariel/xmldumps-phase3/

2012-02-10 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/56

Revision: 56
Author:   ariel
Date: 2012-02-10 14:09:34 + (Fri, 10 Feb 2012)
Log Message:
---
remove phase3 dir unused since its creation

Removed Paths:
-
branches/ariel/xmldumps-phase3/


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [110757] branches/ariel/xmldumps-backup/create-rsync-list.sh

2012-02-06 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/110757

Revision: 110757
Author:   ariel
Date: 2012-02-06 16:39:21 + (Mon, 06 Feb 2012)
Log Message:
---
pick up the abstract files for rsync too

Modified Paths:
--
branches/ariel/xmldumps-backup/create-rsync-list.sh

Modified: branches/ariel/xmldumps-backup/create-rsync-list.sh
===
--- branches/ariel/xmldumps-backup/create-rsync-list.sh 2012-02-06 16:29:58 UTC 
(rev 110756)
+++ branches/ariel/xmldumps-backup/create-rsync-list.sh 2012-02-06 16:39:21 UTC 
(rev 110757)
@@ -96,12 +96,14 @@
ls $d/*.7z 2/dev/null  $outputfile.tmp
ls $d/*.html 2/dev/null  $outputfile.tmp
ls $d/*.txt 2/dev/null  $outputfile.tmp
+   ls $d/.xml 2/dev/null  $outputfile.tmp
else
ls $d/*.gz 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
ls $d/*.bz2 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
ls $d/*.7z 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
ls $d/*.html 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
ls $d/*.txt 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
+   ls $d/*.xml 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
fi
 else
list_dir_only


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [110561] branches/ariel/xmldumps-backup/create-rsync-list.sh

2012-02-02 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/110561

Revision: 110561
Author:   ariel
Date: 2012-02-02 11:26:23 + (Thu, 02 Feb 2012)
Log Message:
---
option to list dump dirs only w/o file contents

Modified Paths:
--
branches/ariel/xmldumps-backup/create-rsync-list.sh

Modified: branches/ariel/xmldumps-backup/create-rsync-list.sh
===
--- branches/ariel/xmldumps-backup/create-rsync-list.sh 2012-02-02 11:15:10 UTC 
(rev 110560)
+++ branches/ariel/xmldumps-backup/create-rsync-list.sh 2012-02-02 11:26:23 UTC 
(rev 110561)
@@ -5,6 +5,7 @@
 # are not n successful dumps available.  
 
 # Options:
+# dirsonly-- list only the directories to include
 # dumpsnumber -- number of dumps to list
 # outputfile  -- path to file in which to write the list
 # configfile  -- path to config file used to generate dumps
@@ -12,6 +13,7 @@
 usage() {
 echo Usage: $0 --dumpsnumber n --outputfile filename --configfile 
filename --rsyncprefix path
 echo 
+echo   dirsonly  list only directories to include
 echo   dumpsnumber   number of dumps to list
 echo   outputfilename of file to which we will write iw action 
list
 echo   configfilename of configuration file for dump generation
@@ -21,6 +23,7 @@
 echo 
 echo For example:
 echo$0 --dumpsnumber 5 --outputfile 
/data/dumps/public/dumpsfiles_for_rsync.txt --configfile wikidump.conf.testing
+
 exit 1
 }
 
@@ -74,22 +77,34 @@
 done
 }
 
+list_dir_only() {
+if [ $rsyncprefix == false ]; then
+   ls -d $d 2/dev/null  $outputfile.tmp
+else
+   ls -d $d 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
+fi
+}
+
 list_files_in_dir() {
 if [ ! -f $outputfile.tmp ]; then
touch $outputfile.tmp
 fi
-if [ $rsyncprefix == false ]; then
-   ls $d/*.gz 2/dev/null  $outputfile.tmp
-   ls $d/*.bz2 2/dev/null  $outputfile.tmp
-   ls $d/*.7z 2/dev/null  $outputfile.tmp
-   ls $d/*.html 2/dev/null  $outputfile.tmp
-   ls $d/*.txt 2/dev/null  $outputfile.tmp
+if [ $dirsonly == false ]; then
+   if [ $rsyncprefix == false ]; then
+   ls $d/*.gz 2/dev/null  $outputfile.tmp
+   ls $d/*.bz2 2/dev/null  $outputfile.tmp
+   ls $d/*.7z 2/dev/null  $outputfile.tmp
+   ls $d/*.html 2/dev/null  $outputfile.tmp
+   ls $d/*.txt 2/dev/null  $outputfile.tmp
+   else
+   ls $d/*.gz 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
+   ls $d/*.bz2 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
+   ls $d/*.7z 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
+   ls $d/*.html 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
+   ls $d/*.txt 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
+   fi
 else
-   ls $d/*.gz 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
-   ls $d/*.bz2 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
-   ls $d/*.7z 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
-   ls $d/*.html 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
-   ls $d/*.txt 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
+   list_dir_only
 fi
 }
 
@@ -121,7 +136,7 @@
 fi
 }
 
-if [ $# -lt 4 -o $# -gt 8 ]; then
+if [ $# -lt 4 -o $# -gt 9 ]; then
 usage
 fi
 
@@ -129,21 +144,28 @@
 outputfile=
 configfile=wikidump.conf
 rsyncprefix=false
+dirsonly=false
 
 while [ $# -gt 0 ]; do
-if [ $1 == --dumpsnumber ]; then
+if [ $1 == --dirsonly ]; then
+   dirsonly=true
+   shift
+elif [ $1 == --dumpsnumber ]; then
dumpsnumber=$2
+   shift; shift
 elif [ $1 == --outputfile ]; then
outputfile=$2
+   shift; shift
 elif [ $1 == --configfile ]; then
configfile=$2
+   shift; shift
 elif [ $1 == --rsyncprefix ]; then
rsyncprefix=$2
+   shift; shift
 else
echo $0: Unknown option $1
usage
 fi
-shift; shift
 done
 
 check_args


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [109918] branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py

2012-01-24 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/109918

Revision: 109918
Author:   ariel
Date: 2012-01-24 10:34:43 + (Tue, 24 Jan 2012)
Log Message:
---
make getlatestdir actually get latest instead of first :-P

Modified Paths:
--
branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py

Modified: branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
===
--- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py  2012-01-24 
10:30:05 UTC (rev 109917)
+++ branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py  2012-01-24 
10:34:43 UTC (rev 109918)
@@ -406,7 +406,7 @@
 dirs = self.getIncDumpDirs()
 if dirs:
 if ok:
-for dump in dirs:
+for dump in reversed(dirs):
 statusInfo = StatusInfo(self._config, dump, self.wikiName)
 if statusInfo.getStatus(dump) == done:
 return dump


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [109919] branches/ariel/xmldumps-backup/create-rsync-list.sh

2012-01-24 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/109919

Revision: 109919
Author:   ariel
Date: 2012-01-24 10:53:26 + (Tue, 24 Jan 2012)
Log Message:
---
eh, don't exit before creating the actual useful rsync file

Modified Paths:
--
branches/ariel/xmldumps-backup/create-rsync-list.sh

Modified: branches/ariel/xmldumps-backup/create-rsync-list.sh
===
--- branches/ariel/xmldumps-backup/create-rsync-list.sh 2012-01-24 10:34:43 UTC 
(rev 109918)
+++ branches/ariel/xmldumps-backup/create-rsync-list.sh 2012-01-24 10:53:26 UTC 
(rev 109919)
@@ -180,7 +180,6 @@
 fi
 if [ -f $outputfile.tmp ]; then
 mv $outputfile.tmp $outputfile
-exit 0
 else
 echo $0: no output file created. Something is wrong.
 exit 1


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [109922] branches/ariel/xmldumps-backup/worker.py

2012-01-24 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/109922

Revision: 109922
Author:   ariel
Date: 2012-01-24 11:17:27 + (Tue, 24 Jan 2012)
Log Message:
---
don't send email on failure for runs of isolated jobs

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2012-01-24 11:14:18 UTC (rev 
109921)
+++ branches/ariel/xmldumps-backup/worker.py2012-01-24 11:17:27 UTC (rev 
109922)
@@ -1383,7 +1383,7 @@
 # everything that has to do with reporting the status of a piece
 # of a dump is collected here
 class Status(object):
-   def __init__(self, wiki, dumpDir, items, checksums, enabled, noticeFile 
= None, errorCallback=None, verbose = False):
+   def __init__(self, wiki, dumpDir, items, checksums, enabled, email = 
True, noticeFile = None, errorCallback=None, verbose = False):
self.wiki = wiki
self.dbName = wiki.dbName
self.dumpDir = dumpDir
@@ -1394,13 +1394,14 @@
self.failCount = 0
self.verbose = verbose
self._enabled = enabled
+   self.email = email
 
def updateStatusFiles(self, done=False):
if self._enabled:
self._saveStatusSummaryAndDetail(done)

def reportFailure(self):
-   if self._enabled:
+   if self._enabled and self.email:
if self.wiki.config.adminMail:
subject = Dump failure for  + self.dbName
message = 
self.wiki.config.readTemplate(errormail.txt) % {
@@ -1698,7 +1699,12 @@
 
# some or all of these dumpItems will be marked to run
self.dumpItemList = DumpItemList(self.wiki, self.prefetch, 
self.spawn, self._chunkToDo, self.checkpointFile, self.jobRequested, 
self.chunkInfo, self.pageIDRange, self.runInfoFile, self.dumpDir)
-   self.status = Status(self.wiki, self.dumpDir, 
self.dumpItemList.dumpItems, self.checksums, self._statusEnabled, 
self.htmlNoticeFile, self.logAndPrint, self.verbose)
+   # only send email failure notices for full runs
+   if (self.jobRequested):
+   email = False
+   else:
+   email = True
+   self.status = Status(self.wiki, self.dumpDir, 
self.dumpItemList.dumpItems, self.checksums, self._statusEnabled, email, 
self.htmlNoticeFile, self.logAndPrint, self.verbose)
 
def logQueueReader(self,log):
if not log:


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [109614] branches/ariel/xmldumps-backup/worker.py

2012-01-21 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/109614

Revision: 109614
Author:   ariel
Date: 2012-01-20 13:22:02 + (Fri, 20 Jan 2012)
Log Message:
---
typo in log file initalization

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2012-01-20 13:05:44 UTC (rev 
109613)
+++ branches/ariel/xmldumps-backup/worker.py2012-01-20 13:22:02 UTC (rev 
109614)
@@ -1685,7 +1685,7 @@
# these must come after the dumpdir setup so we know which 
directory we are in 
if (self._loggingEnabled and self._makeDirEnabled):
fileObj = DumpFilename(self.wiki)
-   fileObj.newFromfilename(self.wiki.config.logFile)
+   fileObj.newFromFilename(self.wiki.config.logFile)
self.logFileName = 
self.dumpDir.filenamePublicPath(fileObj)
self.makeDir(os.path.join(self.wiki.publicDir(), 
self.wiki.date))
self.log = Logger(self.logFileName)


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [109616] branches/ariel/xmldumps-backup/worker

2012-01-21 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/109616

Revision: 109616
Author:   ariel
Date: 2012-01-20 13:47:57 + (Fri, 20 Jan 2012)
Log Message:
---
make failout actually check fails in a row, not total; logging option; standard 
syntax for options now

Modified Paths:
--
branches/ariel/xmldumps-backup/worker

Modified: branches/ariel/xmldumps-backup/worker
===
--- branches/ariel/xmldumps-backup/worker   2012-01-20 13:44:13 UTC (rev 
109615)
+++ branches/ariel/xmldumps-backup/worker   2012-01-20 13:47:57 UTC (rev 
109616)
@@ -1,32 +1,60 @@
 #!/bin/bash
 
-# number of failures of worker.py in a row before we decide
+# default number of failures of worker.py in a row before we decide
 # something serious is broken and we refuse to run
 MAXFAILS=3
+# default: don't pass special config file
+CONFIGFILE=
+# default: no logging to file
+LOG=
+
 failures=0
-
 WIKIDUMP_BASE=`dirname $0`
 
-if [ ! -z $1 ]; then
-configFile=$1
+while [ $# -gt 0 ]; do
+if [ $1 == --configfile ]; then
+   CONFIGFILE=$2
+   shift; shift
+elif [ $1 == --maxfails ]; then
+   MAXFAILS=$2
+   shift; shift
+elif [ $1 == --log ]; then
+   LOG=true
+   shift;
+else
+   echo $0: Unknown option $1
+   echo Usage: $0 [--configfile filename] [--log] [--maxfails num]
+   echo --configfile   use specified file for config file (default: 
wikidump.conf)
+   echo --log  write log of (almost) everything written to stderr 
(default: no logging)
+   echo --maxfails if more than this many dumps fail in a row, exit 
(default: 3)
+   exit 1
+fi
+done
+
+# set up the command
+pythonargs=( $WIKIDUMP_BASE/worker.py )
+if [ ! -z $CONFIGFILE ]; then
+pythonargs=( ${pythonargs[@]} --configfile $CONFIGFILE )
 fi
+if [ ! -z $LOG ]; then
+pythonargs=( ${pythonargs[@]} --log )
+fi
 
 while true; do
 if [ -e $WIKIDUMP_BASE/maintenance.txt ]; then
echo in maintenance mode, sleeping 5 minutes
sleep 300
 else
-   if [ ! -z $configFile ]; then
-   python $WIKIDUMP_BASE/worker.py --configfile $configFile
-   else
-   python $WIKIDUMP_BASE/worker.py
-   fi
+   echo python ${pythonargs[@]}
+   python ${pythonargs[@]}
if [ $? -ne 0 ]; then
failures=$(($failures+1))
if [ $failures -gt $MAXFAILS ]; then
echo more than $MAXFAILS failures in a row, halting.
exit 1
fi
+   else
+   failures=0
fi
echo sleeping
sleep 30


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [109202] trunk/extensions/CongressLookup/CongressLookup.db.php

2012-01-17 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/109202

Revision: 109202
Author:   ariel
Date: 2012-01-17 20:03:50 + (Tue, 17 Jan 2012)
Log Message:
---
make the friggin dash in the zip work

Modified Paths:
--
trunk/extensions/CongressLookup/CongressLookup.db.php

Modified: trunk/extensions/CongressLookup/CongressLookup.db.php
===
--- trunk/extensions/CongressLookup/CongressLookup.db.php   2012-01-17 
20:02:44 UTC (rev 109201)
+++ trunk/extensions/CongressLookup/CongressLookup.db.php   2012-01-17 
20:03:50 UTC (rev 109202)
@@ -21,7 +21,7 @@
   table or to have NULL for the rep id. */
if ( ( !$row ) || ( !$row-clz5_rep_id ) ) {
/* if we got the extra 4 digits, use them */
-   $zip9 = intval( $zip );
+   $zip9 = intval( self::trimZip( $zip, 9 ) ); // remove 
the dash and pad if needed
if ( $zip9 = 1 ) {
$row = $dbr-selectRow( 'cl_zip9', 
'clz9_rep_id', array( 'clz9_zip' = $zip9 ) );
if ( $row ) {
@@ -131,14 +131,22 @@
 */ 
public static function trimZip( $zip, $length ) {
$zip = trim( $zip );
-   if ( strlen( $zip )  5 ) {
-   $zip = sprintf( %05d, $zip );
+   if ( strpos( $zip, '-' ) === False ) {
+   if ( strlen( $zip )  5 ) {
+   $zip = sprintf( %05d, $zip );
+   }
+   elseif ( strlen( $zip )  5 ) {
+   $zip = sprintf( %09d, $zip );
+   }
}
-   elseif ( strlen( $zip )  5 ) {
-   $zip = sprintf( %09d, $zip );
+   else {
+   $zipPieces = explode( '-', $zip, 2 );
+   if (! $zipPieces[1]) {
+   $zipPieces[1] = 0;
+   }
+   $zip = sprintf( %05d%04d, $zipPieces[0], $zipPieces[1] );
}
-   $zipPieces = explode( '-', $zip, 2 );
-   $zip = substr( $zipPieces[0], 0, $length );
+   $zip = substr( $zip, 0, $length );
return $zip;
}
 }


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [108383] branches/ariel/xmldumps-backup/mwbzutils/mwbzlib.c

2012-01-09 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/108383

Revision: 108383
Author:   ariel
Date: 2012-01-09 08:32:59 + (Mon, 09 Jan 2012)
Log Message:
---
tweak a couple error messages

Modified Paths:
--
branches/ariel/xmldumps-backup/mwbzutils/mwbzlib.c

Modified: branches/ariel/xmldumps-backup/mwbzutils/mwbzlib.c
===
--- branches/ariel/xmldumps-backup/mwbzutils/mwbzlib.c  2012-01-09 06:36:40 UTC 
(rev 108382)
+++ branches/ariel/xmldumps-backup/mwbzutils/mwbzlib.c  2012-01-09 08:32:59 UTC 
(rev 108383)
@@ -313,7 +313,7 @@
   }
   seekresult = lseek(fin, bfile-position, SEEK_SET);
   if (seekresult == (off_t)-1) {
-fprintf(stderr,lseek of file to %PRId64 failed (7)\n,bfile-position);
+fprintf(stderr,lseek of file to %PRId64 failed (9)\n,bfile-position);
 return(-1);
   }
 
@@ -682,7 +682,7 @@
/* leave the file at the right position */
seekresult = lseek(fin, bfile-block_start, SEEK_SET);
if (seekresult == (off_t)-1) {
- fprintf(stderr,lseek of file to %PRId64 failed 
(7)\n,bfile-position);
+ fprintf(stderr,lseek of file to %PRId64 failed 
(8)\n,bfile-position);
  return(-1);
}
bfile-position = seekresult;


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [108384] branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c

2012-01-09 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/108384

Revision: 108384
Author:   ariel
Date: 2012-01-09 08:36:00 + (Mon, 09 Jan 2012)
Log Message:
---
catch the case where the page id requested is less than first page id in file

Modified Paths:
--
branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c

Modified: branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c
===
--- branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c   
2012-01-09 08:32:59 UTC (rev 108383)
+++ branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c   
2012-01-09 08:36:00 UTC (rev 108384)
@@ -41,7 +41,7 @@
 return(0);
   }
   else {
-fprintf(stderr,failed to find the next frigging block marker\n);
+fprintf(stderr,Failed to find the next block marker\n);
 return(-1);
   }
 }
@@ -91,7 +91,7 @@
if (match_base_expr[1].rm_so =0) {
  hostname_length = match_base_expr[1].rm_eo - match_base_expr[1].rm_so;
  if (hostname_length  sizeof(hostname)) {
-   fprintf(stderr,very long hostname, giving up\n);
+   fprintf(stderr,Very long hostname, giving up\n);
break;
  }
  else {
@@ -339,7 +339,7 @@
   hopefully that doesn't take forever. 
*/
if (buffer_count(2000/BUFINSIZE)  rev_id) {
- if (verbose) fprintf(stderr, passed cutoff for using api\n);
+ if (verbose) fprintf(stderr, passed retries cutoff for using api\n);
  if (use_api) {
page_id_found = get_page_id_from_rev_id_via_api(rev_id, fin);
  }
@@ -442,19 +442,23 @@
   /* if we're this close, we'll check this value and be done with it */
   if (iinfo-right_end -iinfo-left_end  (off_t)2) {
 new_position = iinfo-left_end;
+if (verbose = 2) fprintf(stderr, choosing new position (1) 
%PRId64\n,new_position);
 iinfo-right_end = iinfo-left_end;
   }
   else {
 if (iinfo-last_value  iinfo-value_wanted) {
-  if (verbose =2) fprintf(stderr,resetting left end\n);
+  if (verbose = 2) fprintf(stderr,resetting left end\n);
   iinfo-left_end = iinfo-last_position;
   new_position = iinfo-last_position + interval;
+  if (verbose = 2) fprintf(stderr, choosing new position (2) 
%PRId64\n,new_position);
 }
 /* iinfo-last_value  iinfo-value_wanted */
 else {
   if (verbose =2) fprintf(stderr,resetting right end\n);
   iinfo-right_end = iinfo-last_position;
   new_position = iinfo-last_position - interval;
+  if (new_position  0) new_position = 0;
+  if (verbose = 2) fprintf(stderr, choosing new position (3) 
%PRId64\n,new_position);
 }
   }
   res = get_first_page_id_after_offset(fin, new_position, pinfo, use_api, 
use_stub, stubfilename, verbose);
@@ -550,7 +554,7 @@
 else if (optc=='v') 
   verbose++;
 else if (optc==-1) break;
-else usage(argv[0],unknown option or other error\n);
+else usage(argv[0],Unknown option or other error\n);
   }
 
   if (! filename || ! page_id) {
@@ -558,12 +562,12 @@
   }
 
   if (page_id 1) {
-usage(argv[0], please specify a page_id = 1.\n);
+usage(argv[0], Please specify a page_id = 1.\n);
   }
 
   fin = open (filename, O_RDONLY);
   if (fin  0) {
-fprintf(stderr,failed to open file %s for read\n, argv[1]);
+fprintf(stderr,Failed to open file %s for read\n, argv[1]);
 exit(1);
   }
 
@@ -585,7 +589,7 @@
 iinfo.last_position = (off_t)0;
   }
   else {
-fprintf(stderr,failed to get anything useful from the beginning of the 
file even, bailing.\n);
+fprintf(stderr,Failed to find any page from start of file, exiting\n);
 exit(1);
   }
   if (pinfo.page_id == page_id) {
@@ -593,18 +597,26 @@
 fprintf(stdout,position:%PRId64 page_id:%d\n,pinfo.position, 
pinfo.page_id);
 exit(0);
   }
-
+  if (pinfo.page_id  page_id) {
+fprintf(stderr,Page requested is less than first page id in file\n);
+exit(-1);
+  }
   while (1) {
 res = do_iteration(iinfo, fin, pinfo, use_api, use_stub, stubfile, 
verbose);
-/* things to check: bad return? interval is 0 bytes long? */
-if (iinfo.left_end == iinfo.right_end) {
-  fprintf(stdout,position:%PRId64 page_id:%d\n,pinfo.position, 
pinfo.page_id);
-  exit(0);
-}
-else if (res  0) {
-  fprintf(stderr,broken and quitting\n);
+if (res  0) {
+  fprintf(stderr,Error encountered during search\n);
   exit(-1);
 }
+else if (iinfo.left_end == iinfo.right_end) {
+  if ( pinfo.page_id = page_id) {
+   fprintf(stdout,position:%PRId64 page_id:%d\n,pinfo.position, 
pinfo.page_id);
+   exit(0);
+  }
+  else {
+   fprintf(stderr,File does not contain requested page id\n);
+   exit(-1);
+  }
+}
   }
   exit(0);
 }


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org

[MediaWiki-CVS] SVN: [108204] branches/ariel/xmldumps-backup/writeuptopageid.c

2012-01-06 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/108204

Revision: 108204
Author:   ariel
Date: 2012-01-06 09:16:53 + (Fri, 06 Jan 2012)
Log Message:
---
skip potential garbage after siteinfo header and before page tag

Modified Paths:
--
branches/ariel/xmldumps-backup/writeuptopageid.c

Modified: branches/ariel/xmldumps-backup/writeuptopageid.c
===
--- branches/ariel/xmldumps-backup/writeuptopageid.c2012-01-06 09:14:45 UTC 
(rev 108203)
+++ branches/ariel/xmldumps-backup/writeuptopageid.c2012-01-06 09:16:53 UTC 
(rev 108204)
@@ -4,7 +4,7 @@
 #include errno.h
 #include string.h
 
-typedef enum { None, StartHeader, StartPage, AtPageID, WriteMem, Write, 
EndPage, AtLastPageID } States;
+typedef enum { None, StartHeader, EndHeader, StartPage, AtPageID, WriteMem, 
Write, EndPage, AtLastPageID } States;
 
 /* assume the header is never going to be longer than 1000 x 80 4-byte 
characters... how many
namespaces will one project want? */
@@ -29,9 +29,20 @@
 States setState (char *line, States currentState, int startPageID, int 
endPageID) {
   int pageID = 0;
 
+  if (currentState == EndHeader) {
+/* if we have junk after the header we don't write it.
+ commands like dumpbz2filefromoffset can produce such streams. */
+if (strncmp(line,page,6)) {
+  return(None);
+}
+  }
+
   if (!strncmp(line,mediawiki,10)) {
 return(StartHeader);
   }
+  else if (!strncmp(line,/siteinfo,11)) {
+return(EndHeader);
+  }
   else if (!strncmp(line,page,6)) {
 return(StartPage);
   }
@@ -87,7 +98,7 @@
 
 /* returns 1 on success, 0 on error */
 int writeIfNeeded(char *line, States state) {
-  if (state == StartHeader || state == WriteMem || state == Write || state == 
EndPage) {
+  if (state == StartHeader || state == EndHeader || state == WriteMem || state 
== Write || state == EndPage) {
 return(fwrite(line,strlen(line),1,stdout));
   }
 }


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [108011] branches/ariel/xmldumps-backup/mwbzutils/Makefile

2012-01-04 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/108011

Revision: 108011
Author:   ariel
Date: 2012-01-04 10:58:29 + (Wed, 04 Jan 2012)
Log Message:
---
bump version number

Modified Paths:
--
branches/ariel/xmldumps-backup/mwbzutils/Makefile

Modified: branches/ariel/xmldumps-backup/mwbzutils/Makefile
===
--- branches/ariel/xmldumps-backup/mwbzutils/Makefile   2012-01-04 10:51:57 UTC 
(rev 108010)
+++ branches/ariel/xmldumps-backup/mwbzutils/Makefile   2012-01-04 10:58:29 UTC 
(rev 108011)
@@ -84,7 +84,7 @@
 
 distclean: clean
 
-DISTNAME=mwbzutils-0.0.2
+DISTNAME=mwbzutils-0.0.3
 dist: 
rm -f $(DISTNAME)
ln -s -f . $(DISTNAME)


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [107870] branches/ariel/xmldumps-backup

2012-01-03 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/107870

Revision: 107870
Author:   ariel
Date: 2012-01-03 09:06:41 + (Tue, 03 Jan 2012)
Log Message:
---
add support for creation of a multiple bz2 stream dump of articles

Modified Paths:
--
branches/ariel/xmldumps-backup/WikiDump.py
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/WikiDump.py
===
--- branches/ariel/xmldumps-backup/WikiDump.py  2012-01-03 09:05:11 UTC (rev 
107869)
+++ branches/ariel/xmldumps-backup/WikiDump.py  2012-01-03 09:06:41 UTC (rev 
107870)
@@ -203,6 +203,7 @@
grep: /bin/grep,
checkforbz2footer: /usr/local/bin/checkforbz2footer,
writeuptopageid: /usr/local/bin/writeuptopageid,
+   recompressxml: /usr/local/bin/recompressxml,
#cleanup: {
keep: 3,
#chunks: {
@@ -284,6 +285,7 @@
self.grep = self.conf.get(tools, grep)
self.checkforbz2footer = 
self.conf.get(tools,checkforbz2footer)
self.writeuptopageid = self.conf.get(tools,writeuptopageid)
+   self.recompressxml = self.conf.get(tools,recompressxml)
 
if not self.conf.has_section('cleanup'):
self.conf.add_section('cleanup')

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2012-01-03 09:05:11 UTC (rev 
107869)
+++ branches/ariel/xmldumps-backup/worker.py2012-01-03 09:06:41 UTC (rev 
107870)
@@ -688,6 +688,19 @@
   Recombine all pages 
with complete edit history (.7z),
   These dumps can be 
*very* large, uncompressing up to 100 times the archive download size.  +
   Suitable for 
archival and statistical use, most mirror sites won't want or need this., 
self.findItemByName('metahistory7zdump'), self.wiki))
+   # doing this only for recombined/full articles dump
+   if (self.chunkInfo.chunksEnabled()):
+   inputForMultistream = articlesdumprecombine
+   else:
+   inputForMultistream = articlesdump
+   self.dumpItems.append(
+   XmlMultiStreamDump(articles,
+  articlesmultistreamdump,
+  Articles, templates, media/file 
descriptions, and primary meta-pages, in multiple bz2 streams, 100 pages per 
stream,
+  This contains current versions of 
article content, in concatenated bz2 streams, 100 pages per stream, plus a 
separate +
+  index of page titles/ids and 
offsets into the file.  Useful for offline readers, or for parallel processing 
of pages.,
+  
self.findItemByName(inputForMultistream), self.wiki, None))
+
results = self._runInfoFile.getOldRunInfoFromFile()
if (results):
for runInfoObj in results:
@@ -3326,6 +3339,161 @@
if (error):
raise BackupError(error recombining xml bz2 files)
 
+class XmlMultiStreamDump(XmlDump):
+#class XmlRecompressDump(Dump):
+   Take a .bz2 and recompress it as multistream bz2, 100 pages per 
stream.
+
+   def __init__(self, subset, name, desc, detail, itemForRecompression, 
wiki, chunkToDo, chunks = False, checkpoints = False, checkpointFile = None):
+   self._subset = subset
+   self._detail = detail
+   self._chunks = chunks
+   if self._chunks:
+   self._chunksEnabled = True
+   self._chunkToDo = chunkToDo
+   self.wiki = wiki
+   self.itemForRecompression = itemForRecompression
+   if checkpoints:
+   self._checkpointsEnabled = True
+   self.checkpointFile = checkpointFile
+   Dump.__init__(self, name, desc)
+
+   def getDumpName(self):
+   return pages- + self._subset
+
+   def getFileType(self):
+   return xml
+
+   def getFileExt(self):
+   return bz2
+
+   def getDumpNameMultistream(self, name):
+   return name + -multistream
+
+   def getDumpNameMultistreamIndex(self, name):
+   return self.getDumpNameMultistream(name) + -index
+
+   def getFileMultistreamName(self, f):
+   assuming that f is the name of an input file,
+   return the name of the associated multistream output file

[MediaWiki-CVS] SVN: [107871] branches/ariel/xmldumps-backup/worker.py

2012-01-03 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/107871

Revision: 107871
Author:   ariel
Date: 2012-01-03 09:22:24 + (Tue, 03 Jan 2012)
Log Message:
---
check job dependencies for article bz2 multistream

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2012-01-03 09:06:41 UTC (rev 
107870)
+++ branches/ariel/xmldumps-backup/worker.py2012-01-03 09:22:24 UTC (rev 
107871)
@@ -801,6 +801,14 @@
if ((job == metahistorybz2dump) or (job == metacurrentdump) 
or (job == articlesdump)):
if (not self.jobDoneSuccessfully(xmlstubsdump)):
return False
+   if (job == articlesmultistreamdump):
+   if (self.chunkInfo.chunksEnabled()):
+   if (not 
self.jobDoneSuccessfully(articlesdumprecombine)):
+   return False
+   else:
+   if (not 
self.jobDoneSuccessfully(articlesdump)):
+   return False
+
return True
  
def _getChunkToDo(self, jobName):


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [107839] branches/ariel/xmldumps-backup/mwbzutils

2012-01-02 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/107839

Revision: 107839
Author:   ariel
Date: 2012-01-02 17:20:24 + (Mon, 02 Jan 2012)
Log Message:
---
utility to compress an input stream into multiple bz2 streams on output, with 
index of pages and offsets

Modified Paths:
--
branches/ariel/xmldumps-backup/mwbzutils/Makefile
branches/ariel/xmldumps-backup/mwbzutils/README

Added Paths:
---
branches/ariel/xmldumps-backup/mwbzutils/recompressxml.c

Modified: branches/ariel/xmldumps-backup/mwbzutils/Makefile
===
--- branches/ariel/xmldumps-backup/mwbzutils/Makefile   2012-01-02 17:16:10 UTC 
(rev 107838)
+++ branches/ariel/xmldumps-backup/mwbzutils/Makefile   2012-01-02 17:20:24 UTC 
(rev 107839)
@@ -29,7 +29,8 @@
 all: checkforbz2footer \
dumpbz2filefromoffset \
dumplastbz2block \
-   findpageidinbz2xml
+   findpageidinbz2xml \
+   recompressxml
 
 dumplastbz2block: $(OBJSBZ) mwbzlib.o dumplastbz2block.o
$(CC) $(CFLAGS) $(LDFLAGS) -o dumplastbz2block dumplastbz2block.o 
mwbzlib.o  $(OBJSBZ) -lbz2
@@ -43,20 +44,26 @@
 dumpbz2filefromoffset: $(OBJSBZ) mwbzlib.o dumpbz2filefromoffset.o
$(CC) $(CFLAGS) $(LDFLAGS) -o dumpbz2filefromoffset 
dumpbz2filefromoffset.o mwbzlib.o $(OBJSBZ) -lbz2
 
-install: dumplastbz2block findpageidinbz2xml checkforbz2footer 
dumpbz2filefromoffset
+recompressxml: $(OBJSBZ) recompressxml.o
+   $(CC) $(CFLAGS) $(LDFLAGS) -o recompressxml recompressxml.o -lbz2
+
+install: dumplastbz2block findpageidinbz2xml checkforbz2footer 
dumpbz2filefromoffset recompressxml
if ( test ! -d $(PREFIX)/bin ) ; then mkdir -p $(PREFIX)/bin ; fi
cp -f dumplastbz2block $(PREFIX)/bin/dumplastbz2block
cp -f findpageidinbz2xml $(PREFIX)/bin/findpageidinbz2xml
cp -f checkforbz2footer $(PREFIX)/bin/checkforbz2footer
cp -f dumpbz2filefromoffset $(PREFIX)/bin/dumpbz2filefromoffset
+   cp -f recompressxml $(PREFIX)/bin/recompressxml
chmod a+x $(PREFIX)/bin/dumplastbz2block
chmod a+x $(PREFIX)/bin/findpageidinbz2xml
chmod a+x $(PREFIX)/bin/checkforbz2footer
chmod a+x $(PREFIX)/bin/dumpbz2filefromoffset
+   chmod a+x $(PREFIX)/bin/recompressxml
 
 clean: 
rm -f *.o *.a dumplastbz2block findpageidinbz2xml \
-   checkforbz2footer dumpbz2filefromoffset
+   checkforbz2footer dumpbz2filefromoffset \
+   recompressxml
 
 bzlibfuncs.o: bzlibfuncs.c bzlib.h bzlib_private.h
$(CC) $(CFLAGS) -c bzlibfuncs.c
@@ -72,6 +79,8 @@
$(CC) $(CFLAGS) -c checkforbz2footer.c
 dumpbz2filefromoffset.o: dumpbz2filefromoffset.c
$(CC) $(CFLAGS) -c dumpbz2filefromoffset.c
+recompressxml.o: recompressxml.c
+   $(CC) $(CFLAGS) -c recompressxml.c
 
 distclean: clean
 
@@ -80,6 +89,7 @@
rm -f $(DISTNAME)
ln -s -f . $(DISTNAME)
tar cvf $(DISTNAME).tar \
+  $(DISTNAME)/recompressxml.c \
   $(DISTNAME)/dumplastbz2block.c \
   $(DISTNAME)/findpageidinbz2xml.c \
   $(DISTNAME)/checkforbz2footer.c \

Modified: branches/ariel/xmldumps-backup/mwbzutils/README
===
--- branches/ariel/xmldumps-backup/mwbzutils/README 2012-01-02 17:16:10 UTC 
(rev 107838)
+++ branches/ariel/xmldumps-backup/mwbzutils/README 2012-01-02 17:20:24 UTC 
(rev 107839)
@@ -42,6 +42,18 @@
 position:x pageid:nnn
It exits with 0 on success, -1 on error.
 
+recompresszml - Reads an xml stream of pages and writes multiple bz2 
compressed
+   streams, concatenated, to stdout, with the specified 
number of 
+   pages per stream. The mediawiki site info header is in 
its
+   own bz2 stream.  Each stream can be extracted as a 
separate file
+   by an appropriate tool, checking for the byte-aligned 
string BZh91AYSY
+   and a following page tag (after uncompressing the 
first chunk
+   of data after that string).  Alternatively, a tool can 
seek to
+   the location of one of the streams in order to find a 
particular
+   page.  An index of file-offset:page-id:page-title lines
+   is written to a specified file if desired; the index 
file will be
+   bz2 compressed if the filename given ends with .bz2.
+
 Library routines:
 
 mwbz2lib.c- various utility functions (bitmasks, shifting and 
comparing bytes,

Added: branches/ariel/xmldumps-backup/mwbzutils/recompressxml.c
===
--- branches/ariel/xmldumps-backup/mwbzutils/recompressxml.c
(rev 0)
+++ branches/ariel/xmldumps-backup/mwbzutils

[MediaWiki-CVS] SVN: [107841] branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c

2012-01-02 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/107841

Revision: 107841
Author:   ariel
Date: 2012-01-02 17:24:54 + (Mon, 02 Jan 2012)
Log Message:
---
add verbose option

Modified Paths:
--
branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c

Modified: branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c
===
--- branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c   
2012-01-02 17:22:25 UTC (rev 107840)
+++ branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c   
2012-01-02 17:24:54 UTC (rev 107841)
@@ -226,12 +226,12 @@
format: 
?xml version=1.0?apiquerypagespage pageid=6215 ns=0 
title=hystérique //pages/query/api
 */
-match_page_id_expr = (regmatch_t *)malloc(sizeof(regmatch_t)*2);
+match_page_id_expr = (regmatch_t *)malloc(sizeof(regmatch_t)*3);
 res = regcomp(compiled_page_id_expr, page_id_expr, REG_EXTENDED);
 
-if (regexec(compiled_page_id_expr, buffer,  2,  match_page_id_expr, 0 ) 
== 0) {
-  if (match_page_id_expr[1].rm_so =0) {
-   page_id = atol(buffer + match_page_id_expr[1].rm_so);
+if (regexec(compiled_page_id_expr, buffer,  3,  match_page_id_expr, 0 ) 
== 0) {
+  if (match_page_id_expr[2].rm_so =0) {
+   page_id = atol(buffer + match_page_id_expr[2].rm_so);
   }
 }
 return(page_id);
@@ -250,13 +250,13 @@
   0 if no pageid found,
   -1 on error
 */
-int get_first_page_id_after_offset(int fin, off_t position, page_info_t 
*pinfo, int use_api, int use_stub, char *stubfilename) {
+int get_first_page_id_after_offset(int fin, off_t position, page_info_t 
*pinfo, int use_api, int use_stub, char *stubfilename, int verbose) {
   int res;
   regmatch_t *match_page, *match_page_id, *match_rev, *match_rev_id;
   regex_t compiled_page, compiled_page_id, compiled_rev, compiled_rev_id;
   int length=5000; /* output buffer size */
   char *page = page;
-  char *page_id = page\n[ ]+title[^]+/title\n[ ]+id([0-9]+)/id\n; 
+  char *page_id = page\n[ ]+title[^]+/title\n([ ]+ns[0-9]+/ns\n)?[ 
]+id([0-9]+)/id\n; 
   char *rev = revision;
   char *rev_id_expr = revision\n[ ]+id([0-9]+)/id\n;
 
@@ -275,7 +275,7 @@
   res = regcomp(compiled_rev_id, rev_id_expr, REG_EXTENDED);
 
   match_page = (regmatch_t *)malloc(sizeof(regmatch_t)*1);
-  match_page_id = (regmatch_t *)malloc(sizeof(regmatch_t)*2);
+  match_page_id = (regmatch_t *)malloc(sizeof(regmatch_t)*3);
   match_rev = (regmatch_t *)malloc(sizeof(regmatch_t)*1);
   match_rev_id = (regmatch_t *)malloc(sizeof(regmatch_t)*2);
 
@@ -288,21 +288,23 @@
   bfile.bytes_read = 0;
 
   if (find_first_bz2_block_from_offset(bfile, fin, position, FORWARD) = 
(off_t)0) {
-/* fprintf(stderr,failed to find block in bz2file (1)\n); */
+if (verbose) fprintf(stderr,failed to find block in bz2file after offset 
%PRId64 (1)\n, position);
 return(-1);
   }
 
+  if (verbose) fprintf(stderr,found first block in bz2file after offset 
%PRId64\n, position);
+
   while (!get_buffer_of_uncompressed_data(b, fin, bfile, FORWARD)  (! 
bfile.eof)) {
 buffer_count++;
+if (verbose =2) fprintf(stderr,buffers read: %d\n, buffer_count);
 if (bfile.bytes_written) {
-  while (regexec(compiled_page_id, (char *)b-next_to_read,  2,  
match_page_id, 0 ) == 0) {
-   if (match_page_id[1].rm_so =0) {
- /* write page_id to stderr */
- /*
-   fwrite(b-next_to_read+match_page_id[1].rm_so, sizeof(unsigned 
char), match_page_id[1].rm_eo - match_page_id[1].rm_so, stderr);
+  while (regexec(compiled_page_id, (char *)b-next_to_read,  3,  
match_page_id, 0 ) == 0) {
+   if (match_page_id[2].rm_so =0) {
+ if (verbose){
+   fwrite(b-next_to_read+match_page_id[2].rm_so, sizeof(unsigned 
char), match_page_id[2].rm_eo - match_page_id[2].rm_so, stderr);
fwrite(\n,1,1,stderr);
- */
- pinfo-page_id = atoi((char 
*)(b-next_to_read+match_page_id[1].rm_so));
+ }
+ pinfo-page_id = atoi((char 
*)(b-next_to_read+match_page_id[2].rm_so));
  pinfo-position = bfile.block_start;
  pinfo-bits_shifted = bfile.bits_shifted;
  return(1);
@@ -337,6 +339,7 @@
   hopefully that doesn't take forever. 
*/
if (buffer_count(2000/BUFINSIZE)  rev_id) {
+ if (verbose) fprintf(stderr, passed cutoff for using api\n);
  if (use_api) {
page_id_found = get_page_id_from_rev_id_via_api(rev_id, fin);
  }
@@ -420,7 +423,7 @@
 
return value from guess, or -1 on error. 
  */
-int do_iteration(iter_info_t *iinfo, int fin, page_info_t *pinfo, int use_api, 
int use_stub, char *stubfilename) {
+int do_iteration(iter_info_t *iinfo, int fin, page_info_t *pinfo, int use_api, 
int use_stub, char *stubfilename, int verbose) {
   int res;
   off_t new_position;
   off_t interval;
@@ -434,7 +437,8 @@
   if (interval == (off_t)0

[MediaWiki-CVS] SVN: [106443] branches/ariel/xmldumps-backup/worker.py

2011-12-16 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/106443

Revision: 106443
Author:   ariel
Date: 2011-12-16 15:57:06 + (Fri, 16 Dec 2011)
Log Message:
---
* Add aftercheckpoint option which will restart a job step from immediately
after the specified checkpoint file
* When doing dump using pageidrange, delete old files from same date covering
that same range of pages; also true for aftercheckpoint option
* Clean up old symlinks and rss feed entries when running latestlinks job
* verbose option for more ... verbosity (debugging)
* In human-readable description of various dump files, change image to
media/files (thanks to Danny_B for that)

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-12-16 15:45:55 UTC (rev 
106442)
+++ branches/ariel/xmldumps-backup/worker.py2011-12-16 15:57:06 UTC (rev 
106443)
@@ -18,6 +18,7 @@
 import CommandManagement
 import Queue
 import thread
+import traceback
 
 from os.path import exists
 from subprocess import Popen, PIPE
@@ -369,9 +370,10 @@
pass
 
 class RunInfoFile(object):
-   def __init__(self, wiki, enabled):
+   def __init__(self, wiki, enabled, verbose = False):
self.wiki = wiki
self._enabled = enabled
+   self.verbose = verbose
 
def saveDumpRunInfoFile(self, text):
Write out a simple text file with the status for this wiki's 
dump.
@@ -379,6 +381,9 @@
try:
self._writeDumpRunInfoFile(text)
except:
+   if (self.verbose):
+   exc_type, exc_value, exc_traceback = 
sys.exc_info()
+   print 
repr(traceback.format_exception(exc_type, exc_value, exc_traceback))
print Couldn't save dump run info file. 
Continuing anyways
 
def statusOfOldDumpIsDone(self, runner, date, jobName, jobDesc):
@@ -410,6 +415,9 @@
infile.close
return results
except:
+   if (self.verbose):
+   exc_type, exc_value, exc_traceback = 
sys.exc_info()
+   print repr(traceback.format_exception(exc_type, 
exc_value, exc_traceback))
return False
 
#
@@ -481,6 +489,9 @@
infile.close
return None
except:
+   if (self.verbose):
+   exc_type, exc_value, exc_traceback = 
sys.exc_info()
+   print repr(traceback.format_exception(exc_type, 
exc_value, exc_traceback))
return None
 
# find desc in there, look for class='done'
@@ -506,6 +517,9 @@
infile.close
return None
except:
+   if (self.verbose):
+   exc_type, exc_value, exc_traceback = 
sys.exc_info()
+   print repr(traceback.format_exception(exc_type, 
exc_value, exc_traceback))
return None
 
 
@@ -591,11 +605,11 @@
#PrivateTable(filearchive, filearchivetable, 
Deleted image data),
 
PublicTable(site_stats, sitestatstable, A few 
statistics such as the page count.),
-   PublicTable(image, imagetable, Metadata on current 
versions of uploaded images.),
-   PublicTable(oldimage, oldimagetable, Metadata on 
prior versions of uploaded images.),
+   PublicTable(image, imagetable, Metadata on current 
versions of uploaded media/files.),
+   PublicTable(oldimage, oldimagetable, Metadata on 
prior versions of uploaded media/files.),
PublicTable(pagelinks, pagelinkstable, Wiki 
page-to-page link records.),
PublicTable(categorylinks, categorylinkstable, 
Wiki category membership link records.),
-   PublicTable(imagelinks, imagelinkstable, Wiki 
image usage records.),
+   PublicTable(imagelinks, imagelinkstable, Wiki 
media/files usage records.),
PublicTable(templatelinks, templatelinkstable, 
Wiki template inclusion link records.),
PublicTable(externallinks, externallinkstable, 
Wiki external URL link records.),
PublicTable(langlinks, langlinkstable, Wiki 
interlanguage link records.),
@@ -627,10 +641,10 @@
self.dumpItems.append(
XmlDump(articles,
articlesdump

[MediaWiki-CVS] SVN: [106020] branches/ariel

2011-12-13 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/106020

Revision: 106020
Author:   ariel
Date: 2011-12-13 13:25:12 + (Tue, 13 Dec 2011)
Log Message:
---
some scripts we used for looking at thumbs counts, sizes, etc... in case we 
need 'em again

Added Paths:
---
branches/ariel/tools/
branches/ariel/tools/thumbs/
branches/ariel/tools/thumbs/crunchinglogs/
branches/ariel/tools/thumbs/crunchinglogs/README
branches/ariel/tools/thumbs/crunchinglogs/datascripts/
branches/ariel/tools/thumbs/crunchinglogs/datascripts/thumbDateAnalysis.py

branches/ariel/tools/thumbs/crunchinglogs/datascripts/thumbFilesSizesCounts.py
branches/ariel/tools/thumbs/crunchinglogs/datascripts/thumbPxSize.py
branches/ariel/tools/thumbs/crunchinglogs/otherscripts/

branches/ariel/tools/thumbs/crunchinglogs/otherscripts/checkExistingThumbDirs.py
branches/ariel/tools/thumbs/crunchinglogs/otherscripts/listFileNames.py

branches/ariel/tools/thumbs/crunchinglogs/otherscripts/listThumbFilesByDir.py
branches/ariel/tools/thumbs/crunchinglogs/otherscripts/removeThumbDirs.py
branches/ariel/tools/thumbs/crunchinglogs/samples/

branches/ariel/tools/thumbs/crunchinglogs/samples/do-dateanal-dates-created.sh
branches/ariel/tools/thumbs/crunchinglogs/samples/do-dateanal-dates.sh
branches/ariel/tools/thumbs/crunchinglogs/samples/do-pixel-sizes.sh

Added: branches/ariel/tools/thumbs/crunchinglogs/README
===
--- branches/ariel/tools/thumbs/crunchinglogs/README
(rev 0)
+++ branches/ariel/tools/thumbs/crunchinglogs/README2011-12-13 13:25:12 UTC 
(rev 106020)
@@ -0,0 +1,16 @@
+These scripts were written so we could get some notion of what was going on 
with thumbs, given that
+we don't keep logs and the host is i/o-bound so we can't just do a pile of 
finds.
+
+I can't imagine they will be useful to someone else but they might be useful 
to us sometime, who knows
+
+stats on the thumbs files on the filesystem:
+* go to (for example) commons/thumb/0/00, run an ls --sort=none, capture 
results into some file
+* cat the input of that to python listThumbFilesByDir.py and save the output 
of that to a file
+* filter it as needed for crap names, results into 0-00-files.txt.nobad
+* now you can run the following: do-dateanal-dates-created.sh  
do-dateanal-dates.sh  do-pixel-sizes.sh
+  they will create a little pile of files aug*txt sept*txt etc.  
+
+stats on googlebot requests:
+* go to locke, zcat sample*log*gz | grep Googlebot-Image and gzip the output 
into googlebot-image-requests.gz
+* now you can run check-all-dates.sh and it produces a small pile of output 
files 
+

Added: 
branches/ariel/tools/thumbs/crunchinglogs/datascripts/thumbDateAnalysis.py
===
--- branches/ariel/tools/thumbs/crunchinglogs/datascripts/thumbDateAnalysis.py  
(rev 0)
+++ branches/ariel/tools/thumbs/crunchinglogs/datascripts/thumbDateAnalysis.py  
2011-12-13 13:25:12 UTC (rev 106020)
@@ -0,0 +1,111 @@
+# -*- coding: utf-8  -*-
+
+import os
+import re
+import sys
+import time
+import getopt
+
+def usage(message=None):
+print Usage: %s [--sdate=date --edate=date --created [filename] % 
sys.argv[0]
+print sdate: start date for which to print stats, default: earliest date 
in file 
+print edate: end date for which to print stats, default: latest date in 
file
+print created: show only the number of files and sizes on the date the 
first thumb
+print was created (presumably the date the image itself was first 
uploaded)
+print 
+print Date format for sdate and edate: -mm-dd
+print 
+print If no filename is specified, input is read from stdin
+print
+print Format of input file: (sample line)
+print 2011-10-29  01:57:51   100311   
Festiwal_Słowian_i_Wikingów_2009_121.jpg/640px-Festiwal_Słowian_i_Wikingów_2009_121.jpg
+print date in -mm-dd format, time in hh:mm::ss format, size in bytes, 
thumb directory/thumb filename
+sys.exit(1)
+
+if __name__ == __main__:
+sdate = None
+edate = None
+created = False
+try:
+(options, remainder) = getopt.gnu_getopt(sys.argv[1:], ,
+ [ 'sdate=', 'edate=', 
'created' ])
+except:
+usage(Unknown option specified)
+
+for (opt, val) in options:
+if opt == --sdate:
+sdate = val
+elif opt == --edate:
+edate = val
+elif opt == --created:
+created = True
+
+dateexp = re.compile(r^\d{4}-\d{2}-\d{2}$)
+for d in filter(None, [ sdate, edate ]):
+if not dateexp.match(d):
+usage(Bad date format.)
+
+if len(remainder) == 1:
+inputFile = remainder[0]
+fHandle = open(inputFile,r)
+elif len(remainder) == 0:
+fHandle = sys.stdin

[MediaWiki-CVS] SVN: [105887] branches/ariel/xmldumps-backup

2011-12-12 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/105887

Revision: 105887
Author:   ariel
Date: 2011-12-12 15:23:04 + (Mon, 12 Dec 2011)
Log Message:
---
run specified query on list of wikis, one gzipped output file for each, files 
named by date and project

Added Paths:
---
branches/ariel/xmldumps-backup/wikiqueries/
branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.conf.sample
branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.py

Added: branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.conf.sample
===
--- branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.conf.sample  
(rev 0)
+++ branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.conf.sample  
2011-12-12 15:23:04 UTC (rev 105887)
@@ -0,0 +1,23 @@
+[wiki]
+mediawiki=/home/wmf/mediawiki/1.18
+allwikislist=/home/wmf/conf/all.dblist
+privatewikislist=/home/wmf/conf/private.dblist
+closedwikislist=/home/wmf/conf/closed.dblist
+
+[output]
+wikiqueriesdir=/home/wmf/output/files
+temp=/var/tmp
+fileperms=0644
+
+[database]
+user=dbadmin
+password=X
+
+[tools]
+php=/usr/bin/php
+mysql=/usr/bin/mysql
+gzip=/usr/bin/gzip
+bzip2=/usr/bin/bzip2
+
+[query]
+queryfile=/home/wmf/scripts/query.sql

Added: branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.py
===
--- branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.py   
(rev 0)
+++ branches/ariel/xmldumps-backup/wikiqueries/wikiqueries.py   2011-12-12 
15:23:04 UTC (rev 105887)
@@ -0,0 +1,352 @@
+# for every wiki, run a specified query, gzipping the output.
+# there's a config file which needs to be set up.
+
+import getopt
+import os
+import re
+import sys
+import ConfigParser
+import subprocess
+import socket
+import time
+from subprocess import Popen, PIPE
+from os.path import exists
+import hashlib
+import traceback
+import shutil
+
+class ContentFile(object):
+def __init__(self, config, date, wikiName):
+self._config = config
+self.date = date
+self.queryDir = QueryDir(self._config)
+self.wikiName = wikiName
+
+# override this.
+def getFileName(self):
+return content.txt
+
+def getPath(self):
+return os.path.join(self.queryDir.getQueryDir(),self.getFileName())
+
+class OutputFile(ContentFile):
+def getFileName(self):
+return %s-%s-wikiquery.gz % ( self.wikiName, self.date )
+
+class Config(object):
+def __init__(self, configFile=False):
+self.projectName = False
+
+home = os.path.dirname(sys.argv[0])
+if (not configFile):
+configFile = wikiqueries.conf
+self.files = [
+os.path.join(home,configFile),
+/etc/wikqueries.conf,
+os.path.join(os.getenv(HOME), .wikiqueries.conf)]
+defaults = {
+#wiki: {
+allwikislist: ,
+privatewikislist: ,
+closedwikislist: ,
+#output: {
+wikiqueriesdir: /wikiqueries,
+temp:/wikiqueries/temp,
+fileperms: 0640,
+#database: {
+user: root,
+password: ,
+#tools: {
+php: /bin/php,
+gzip: /usr/bin/gzip,
+bzip2: /usr/bin/bzip2,
+mysql: /usr/bin/mysql,
+multiversion: ,
+#query:{
+queryfile: wikiquery.sql
+}
+
+self.conf = ConfigParser.SafeConfigParser(defaults)
+self.conf.read(self.files)
+
+if not self.conf.has_section(wiki):
+print The mandatory configuration section 'wiki' was not defined.
+raise ConfigParser.NoSectionError('wiki')
+
+if not self.conf.has_option(wiki,mediawiki):
+print The mandatory setting 'mediawiki' in the section 'wiki' was 
not defined.
+raise ConfigParser.NoOptionError('wiki','mediawiki')
+
+self.parseConfFile()
+
+def parseConfFile(self):
+self.mediawiki = self.conf.get(wiki, mediawiki)
+self.allWikisList = MiscUtils.dbList(self.conf.get(wiki, 
allwikislist))
+self.privateWikisList = MiscUtils.dbList(self.conf.get(wiki, 
privatewikislist))
+self.closedWikisList = MiscUtils.dbList(self.conf.get(wiki, 
closedwikislist))
+
+if not self.conf.has_section('output'):
+self.conf.add_section('output')
+self.wikiQueriesDir = self.conf.get(output, wikiqueriesdir)
+self.tempDir = self.conf.get(output, temp)
+self.fileperms = self.conf.get(output, fileperms)
+self.fileperms = int(self.fileperms,0)
+
+if not self.conf.has_section('database'):
+self.conf.add_section('database')
+self.dbUser = self.conf.get(database, user)
+self.dbPassword = self.conf.get(database, password)
+
+if not self.conf.has_section('tools

[MediaWiki-CVS] SVN: [104422] branches/ariel/xmldumps-backup/incrementals

2011-11-28 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/104422

Revision: 104422
Author:   ariel
Date: 2011-11-28 11:53:49 + (Mon, 28 Nov 2011)
Log Message:
---
config option determines when locks for maxrevid phase are stale; cmd line 
option to clean up stale locks encountered during run

Modified Paths:
--
branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py

Modified: branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
===
--- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py  2011-11-28 
11:41:47 UTC (rev 104421)
+++ branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py  2011-11-28 
11:53:49 UTC (rev 104422)
@@ -10,6 +10,7 @@
 import subprocess
 from subprocess import Popen, PIPE
 import shutil
+import time
 
 class ContentFile(object):
 def __init__(self, config, date, wikiName):
@@ -114,6 +115,18 @@
 except:
 return False
 
+def isStaleLock(self):
+if not self.isLocked():
+return False
+try:
+timestamp = os.stat(self.lockFile.getPath()).st_mtime
+except:
+return False
+if (time.time() - timestamp)  self._config.staleInterval:
+return True
+else:
+return False
+
 def unlock(self):
 os.remove(self.lockFile.getPath())
 
@@ -161,6 +174,7 @@
 webroot: http://localhost/dumps/incr;,
 fileperms: 0640,
 delay: 43200,
+maxrevidstaleinterval: 3600,
 #database: {
 user: root,
 password: ,
@@ -206,6 +220,8 @@
 self.fileperms = int(self.fileperms,0)
 self.delay = self.conf.get(output, delay)
 self.delay = int(self.delay,0)
+self.staleInterval = self.conf.get(output, maxrevidstaleinterval)
+self.staleInterval = int(self.staleInterval,0)
 
 if not self.conf.has_section('tools'):
 self.conf.add_section('tools')

Modified: branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py
===
--- branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py
2011-11-28 11:41:47 UTC (rev 104421)
+++ branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py
2011-11-28 11:53:49 UTC (rev 104422)
@@ -40,11 +40,12 @@
 return exists(self.maxRevIdFile.getPath())
 
 class MaxIDDump(object):
-def __init__(self,config, date, verbose):
+def __init__(self,config, date, verbose, cleanupStale):
 self._config = config
 self.date = date
 self.incrDir = IncrementDir(self._config, self.date)
 self.verbose = verbose
+self.cleanupStale = cleanupStale
 
 def doOneWiki(self, w):
 success = True
@@ -52,7 +53,23 @@
 if not exists(self.incrDir.getIncDir(w)):
 os.makedirs(self.incrDir.getIncDir(w))
 lock = MaxRevIDLock(self._config, self.date, w)
-if lock.getLock():
+lockResult = lock.getLock()
+if not lockResult:
+if (self.verbose):
+print failed to get lock for wiki, w
+if lock.isStaleLock():
+if (self.verbose):
+print lock is stale for wiki, w
+# this option should be given to one process only, or you 
could have trouble.
+if (self.cleanupStale):
+lock.unlock()
+lockResult = lock.getLock()
+if (self.verbose):
+print stale lock removed and trying again to get 
for wiki, w
+if lockResult:
+if (self.verbose):
+print got lock ,lock.lockFile.getFileName()
+print checking max rev id for wiki, w
 try:
 maxRevID = MaxRevID(self._config, w, self.date)
 if not maxRevID.exists():
@@ -66,10 +83,10 @@
 else:
 if (self.verbose):
 print Wiki , w, failed to get lock.
-traceback.print_exc(file=sys.stdout)
+success = False
 if success:
 if (self.verbose):
-print Success!  Wiki, w, adds/changes dump complete.
+print Success!  Wiki, w, rev max id for adds/changes dump 
complete.
 return success
 
 def doRunOnAllWikis(self):
@@ -96,11 +113,13 @@
 print message
 print Usage: python generateincrementals.py [options] [wikidbname]
 print Options: --configfile, --date, --verbose
-print --configfile:  Specify an alternate config file to read. 
Default file is 'dumpincr.conf' in the current directory.
-print --date:(Re)run

[MediaWiki-CVS] SVN: [104139] branches/ariel/xmldumps-backup/incrementals/incrmonitor.py

2011-11-24 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/104139

Revision: 104139
Author:   ariel
Date: 2011-11-24 08:45:18 + (Thu, 24 Nov 2011)
Log Message:
---
be able to run with cutoff of specific date; add link to all runs for a wiki; 
fix error returns so they print; more verbosity for verbose option

Modified Paths:
--
branches/ariel/xmldumps-backup/incrementals/incrmonitor.py

Modified: branches/ariel/xmldumps-backup/incrementals/incrmonitor.py
===
--- branches/ariel/xmldumps-backup/incrementals/incrmonitor.py  2011-11-24 
08:44:49 UTC (rev 104138)
+++ branches/ariel/xmldumps-backup/incrementals/incrmonitor.py  2011-11-24 
08:45:18 UTC (rev 104139)
@@ -26,8 +26,9 @@
 makeLink = staticmethod(makeLink)
 
 class Index(object):
-def __init__(self, config, verbose):
+def __init__(self, config, date, verbose):
 self._config = config
+self.date = date
 self.indexFile = IndexFile(self._config)
 self.incrDir = IncrementDir(self._config)
 self.verbose = verbose
@@ -37,6 +38,8 @@
 for w in self._config.allWikisList:
 result = self.doOneWiki(w)
 if result:
+if (self.verbose):
+print result for wiki , w, is , result
 text = text + li+ result + /li\n
 indexText = self._config.readTemplate(incrs-index.html) %  { items 
: text }
 FileUtils.writeFileInPlace(self.indexFile.getPath(), indexText, 
self._config.fileperms)
@@ -48,8 +51,10 @@
 if (self.verbose):
 print No dump for wiki , w
 next
-
-incrDate = self.incrDumpsDirs.getLatestIncrDate()
+if date:
+incrDate = date
+   else:
+incrDate = self.incrDumpsDirs.getLatestIncrDate()
 if not incrDate:
 if (self.verbose):
 print No dump for wiki , w
@@ -69,7 +74,8 @@
 except:
 if (self.verbose):
 traceback.print_exc(file=sys.stdout)
-return Error encountered, no information available for wiki, 
w
+print Error encountered, no information available for 
wiki, w
+return Error encountered, no information available for wiki 
+ w
 
 try:
 wikinameText = strong%s/strong % w
@@ -85,17 +91,19 @@
 revsText = revs: %s (size %s) %  
(Link.makeLink(os.path.join(w, incrDate, revs.getFileName()),revsDate), 
revsSize)
 else:
 revsText = None
+   otherRunsText = other runs: %s % Link.makeLink(w,w)
 if statContents:
 statText = (%s) % (statContents)
 else:
 statText = None
 
 wikiInfo =  .join( filter( None, [ wikinameText, lockText, 
statText ] ) ) + br /
-wikiInfo = wikiInfo +  nbsp;nbsp;  +  |  .join( filter( 
None, [ stubText, revsText ] ))
+wikiInfo = wikiInfo +  nbsp;nbsp;  +  |  .join( filter( 
None, [ stubText, revsText, otherRunsText ] ))
 except:
 if (self.verbose):
 traceback.print_exc(file=sys.stdout)
-return Error encountered formatting information for wiki, w
+print Error encountered formatting information for wiki, 
w
+return Error encountered formatting information for wiki + w
 
 return wikiInfo
 
@@ -103,25 +111,29 @@
 if message:
 print message
 print Usage: python monitor.py [options] [wikidbname]
-print Options: --configfile, --verbose
+print Options: --configfile, --date, --verbose
 print --configfile:  Specify an alternate config file to read. 
Default file is 'dumpincr.conf' in the current directory.
+print --date:Look at runs starting on specified date or 
earler
 print --verbose: Print error messages and other informative 
messages (normally the
 printscript runs silently).
 sys.exit(1)
 
 if __name__ == __main__:
 configFile = False
+date = False
 verbose = False
 
 try:
 (options, remainder) = getopt.gnu_getopt(sys.argv[1:], ,
- ['configfile=', 'verbose' ])
+ ['configfile=', 'date=', 
'verbose' ])
 except:
 usage(Unknown option specified)
 
 for (opt, val) in options:
 if opt == --configfile:
 configFile = val
+elif opt == --date:
+date=val
 elif opt == '--verbose':
 verbose = True
 
@@ -130,5 +142,5 @@
 else:
 config = Config()
 
-index = Index(config, verbose)
+index = Index(config, date

[MediaWiki-CVS] SVN: [104140] branches/ariel/xmldumps-backup/incrementals/incrs-index.html

2011-11-24 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/104140

Revision: 104140
Author:   ariel
Date: 2011-11-24 08:46:33 + (Thu, 24 Nov 2011)
Log Message:
---
more disclaimers, bolder disclaimers, info on the other files in each dir

Modified Paths:
--
branches/ariel/xmldumps-backup/incrementals/incrs-index.html

Modified: branches/ariel/xmldumps-backup/incrementals/incrs-index.html
===
--- branches/ariel/xmldumps-backup/incrementals/incrs-index.html
2011-11-24 08:45:18 UTC (rev 104139)
+++ branches/ariel/xmldumps-backup/incrementals/incrs-index.html
2011-11-24 08:46:33 UTC (rev 104140)
@@ -65,18 +65,20 @@
for documentation on the provided data formats.
/p
p
- Here's the big fat disclaimer.
+ strongHere's the big fat disclaimer./strong
/p
p
- This service is experimental.  At any time it may not be working, for 
a day, a week or a month.
+ emThis service is experimental./em  At any time it may not be 
working, for a day, a week or a month.
  It is not intended to replace the full XML dumps.  We don't expect 
users to be able to construct
- full dumps of a given date from the incrementals and an older dump.
+ full dumps of a given date from the incrementals and an older dump.  
We don't guarantee that the data
+ included in these dumps is complete, or correct, or won't break your 
Xbox.  In short: don't blame us (but
+ do get on the email list and send mail: see a 
href=https://lists.wikimedia.org/mailman/listinfo/xmldatadumps-l;xmldatadumps-l/a).
/p
p
  The data provided in these files is ''partial data''.  To be precise: 
  ul
li* Revisions included in these dumps are not up to the minute.  
We write out those that were
-   created up to 18 hours ago; this gives local editing communities 
time to delete revisions 
+   created up to 12 hours ago; this gives local editing communities 
time to delete revisions 
with sensitive information, vulgarities and other vandalism, 
etc./li
li* New pages entered for the first time during the time interval 
are included/li
li* Revisions of undeleted pages will be included only if new 
revision IDs need to be assigned to 
@@ -85,7 +87,8 @@
li* Imported revisions will be included if they were imported 
during the time interval, since they
will have new revisions IDs./li
li* As with all dumps, hidden revisions or more generally 
revisions not readable by the general public
- are not provided./li
+   are not provided./li
+   li* When a wiki is closed, it no longer shows up in this 
list./li
  /ul
/p
p
@@ -104,7 +107,19 @@
  you get articles, user pages, discussion pages, etc.  If you want 
articles only, you will need to write a
  filter to grab just those entries.
/p
-   h2Adds/changes dump listing/h2
+   p
+ The md5sums.txt file contains the md5 hash of the stubs file and the 
revs file, so that downloaders can verify
+ the integrity of the files after download.
+   /p
+   p
+ The file maxrevid.txt contains the largest revision ID on the project 
at the time we checked, which should be the 
+ same as the timestamp of that file. 
+   /p
+   p
+ The file status.txt, if it exists, will contain the value done in 
cases where the run is complete and was 
+ successful.
+   /p
+   h2Adds/changes dump listing (links to latest complete run)/h2
ul
%(items)s
/ul


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [104142] branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py

2011-11-24 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/104142

Revision: 104142
Author:   ariel
Date: 2011-11-24 08:58:58 + (Thu, 24 Nov 2011)
Log Message:
---
flexibility in retrieving previous dump date ('must be successful run' is now a 
parameter)

Modified Paths:
--
branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py

Modified: branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
===
--- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py  2011-11-24 
08:48:14 UTC (rev 104141)
+++ branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py  2011-11-24 
08:58:58 UTC (rev 104142)
@@ -366,9 +366,10 @@
 toRemove = 
os.path.join(self.incrDir.getIncDirNoDate(self.wikiName), dump)
 shutil.rmtree(%s % toRemove)
 
-def getPrevIncrDate(self, date):
+def getPrevIncrDate(self, date, ok = False):
 # find the most recent incr dump before the
-# specified date that completed successfully
+# specified date
+# if ok is True, find most recent dump that completed successfully
 previous = None
 old = self.getIncDumpDirs()
 if old:
@@ -376,8 +377,11 @@
 if dump == date:
 return previous
 else:
-statusInfo = StatusInfo(self._config, dump, self.wikiName)
-if statusInfo.getStatus(dump) == done:
+if ok:
+statusInfo = StatusInfo(self._config, dump, 
self.wikiName)
+if statusInfo.getStatus(dump) == done:
+previous = dump
+else:
 previous = dump
 return previous
 


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [104143] branches/ariel/xmldumps-backup/incrementals/ generateincrementals.py

2011-11-24 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/104143

Revision: 104143
Author:   ariel
Date: 2011-11-24 09:01:52 + (Thu, 24 Nov 2011)
Log Message:
---
more verbosity

Modified Paths:
--
branches/ariel/xmldumps-backup/incrementals/generateincrementals.py

Modified: branches/ariel/xmldumps-backup/incrementals/generateincrementals.py
===
--- branches/ariel/xmldumps-backup/incrementals/generateincrementals.py 
2011-11-24 08:58:58 UTC (rev 104142)
+++ branches/ariel/xmldumps-backup/incrementals/generateincrementals.py 
2011-11-24 09:01:52 UTC (rev 104143)
@@ -74,6 +74,7 @@
 self.incrDumpsDirs.cleanupOldIncrDumps(self.date)
 maxRevID = self.getMaxRevIdFromFile()
if (self.verbose):
+print Doing run for wiki: ,self.wikiName
 if maxRevID:
 print maxRevID is , maxRevID
 else:


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [104144] branches/ariel/xmldumps-backup/incrementals

2011-11-24 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/104144

Revision: 104144
Author:   ariel
Date: 2011-11-24 09:29:16 + (Thu, 24 Nov 2011)
Log Message:
---
get latest incr date now has optional check for successful run

Modified Paths:
--
branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
branches/ariel/xmldumps-backup/incrementals/incrmonitor.py

Modified: branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
===
--- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py  2011-11-24 
09:01:52 UTC (rev 104143)
+++ branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py  2011-11-24 
09:29:16 UTC (rev 104144)
@@ -385,10 +385,16 @@
 previous = dump
 return previous
 
-def getLatestIncrDate(self):
+def getLatestIncrDate(self, ok = False):
 # find the most recent incr dump 
 dirs = self.getIncDumpDirs()
 if dirs:
-return(dirs[-1])
+if ok:
+for dump in dirs:
+statusInfo = StatusInfo(self._config, dump, self.wikiName)
+if statusInfo.getStatus(dump) == done:
+return dump
+else:
+return(dirs[-1])
 else:
 return(None)

Modified: branches/ariel/xmldumps-backup/incrementals/incrmonitor.py
===
--- branches/ariel/xmldumps-backup/incrementals/incrmonitor.py  2011-11-24 
09:01:52 UTC (rev 104143)
+++ branches/ariel/xmldumps-backup/incrementals/incrmonitor.py  2011-11-24 
09:29:16 UTC (rev 104144)
@@ -54,7 +54,7 @@
 if date:
 incrDate = date
else:
-incrDate = self.incrDumpsDirs.getLatestIncrDate()
+incrDate = self.incrDumpsDirs.getLatestIncrDate(True)
 if not incrDate:
 if (self.verbose):
 print No dump for wiki , w


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [104026] branches/ariel/xmldumps-backup/incrementals/ generateincrementals.py

2011-11-23 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/104026

Revision: 104026
Author:   ariel
Date: 2011-11-23 12:10:29 + (Wed, 23 Nov 2011)
Log Message:
---
missing arg for IncrDump() call, add some verbosity generally

Modified Paths:
--
branches/ariel/xmldumps-backup/incrementals/generateincrementals.py

Modified: branches/ariel/xmldumps-backup/incrementals/generateincrementals.py
===
--- branches/ariel/xmldumps-backup/incrementals/generateincrementals.py 
2011-11-23 11:42:04 UTC (rev 104025)
+++ branches/ariel/xmldumps-backup/incrementals/generateincrementals.py 
2011-11-23 12:10:29 UTC (rev 104026)
@@ -73,10 +73,25 @@
 if not dryrun:
 self.incrDumpsDirs.cleanupOldIncrDumps(self.date)
 maxRevID = self.getMaxRevIdFromFile()
+   if (self.verbose):
+if maxRevID:
+print maxRevID is , maxRevID
+else:
+print no maxRevID found
 prevDate = self.incrDumpsDirs.getPrevIncrDate(self.date)
+   if (self.verbose):
+if prevDate:
+print prevDate is, prevDate
+else:
+print no prevDate found
 prevRevID = None
 if prevDate:
-prevRevID = self.getMaxRevIdFromFile(prevDate)
+   prevRevID = self.getMaxRevIdFromFile(prevDate)
+   if (self.verbose):
+   if prevRevID:
+print prevRevId is , prevRevID
+   else:
+print no prevRevID found
 if not prevRevID:
 prevRevID = str(int(maxRevID) - 10)
 if int(prevRevID)  1:
@@ -266,7 +281,7 @@
 date = TimeUtils.today()
 
 if len(remainder)  0:
-dump = IncrDump(config, date, remainder[0], doStubs, doRevs, dryrun, 
verbose)
+dump = IncrDump(config, date, remainder[0], doStubs, doRevs, dryrun, 
verbose, forcerun)
 dump.doOneWiki()
 else:
 dump = IncrDumpLoop(config, date, doStubs, doRevs, dryrun, verbose, 
forcerun)


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [104054] branches/ariel/xmldumps-backup/worker.py

2011-11-23 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/104054

Revision: 104054
Author:   ariel
Date: 2011-11-23 17:36:55 + (Wed, 23 Nov 2011)
Log Message:
---
check for maintenance mode for regular run, not just specific job

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-11-23 17:27:08 UTC (rev 
104053)
+++ branches/ariel/xmldumps-backup/worker.py2011-11-23 17:36:55 UTC (rev 
104054)
@@ -1834,6 +1834,7 @@
self.checksums.prepareChecksums()
 
for item in self.dumpItemList.dumpItems:
+   Maintenance.exitIfInMaintenanceMode(In 
maintenance mode, exiting dump of %s at step %s % ( self.dbName, item.name() ) 
)
item.start(self)
self.status.updateStatusFiles()

self.runInfoFile.saveDumpRunInfoFile(self.dumpItemList.reportDumpRunInfo())


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [103821] branches/ariel/xmldumps-backup/incrementals/incrs-index.html

2011-11-21 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/103821

Revision: 103821
Author:   ariel
Date: 2011-11-21 16:37:23 + (Mon, 21 Nov 2011)
Log Message:
---
relative paths for links in index.html

Modified Paths:
--
branches/ariel/xmldumps-backup/incrementals/incrs-index.html

Modified: branches/ariel/xmldumps-backup/incrementals/incrs-index.html
===
--- branches/ariel/xmldumps-backup/incrementals/incrs-index.html
2011-11-21 16:28:12 UTC (rev 103820)
+++ branches/ariel/xmldumps-backup/incrementals/incrs-index.html
2011-11-21 16:37:23 UTC (rev 103821)
@@ -110,9 +110,9 @@
/ul
hr
  p
-   Return to a href=http://dumps.wikimedia.org/other/;our other 
datasets/a, the
-   a href=http://dumps.wikimedia.org/backup-index.html;XML data 
dumps/a, or
-   a href=http://dumps.wikimedia.org/index.html;the main index/a.
+   Return to a href=/other/our other datasets/a, the
+   a href=/backup-index.htmlXML data dumps/a, or
+   a href=/index.htmlthe main index/a.
  p/
 /body
 /html


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [103582] trunk/phase3/includes/Export.php

2011-11-18 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/103582

Revision: 103582
Author:   ariel
Date: 2011-11-18 09:08:29 + (Fri, 18 Nov 2011)
Log Message:
---
followup to r103448, tighten up code

Modified Paths:
--
trunk/phase3/includes/Export.php

Modified: trunk/phase3/includes/Export.php
===
--- trunk/phase3/includes/Export.php2011-11-18 08:10:56 UTC (rev 103581)
+++ trunk/phase3/includes/Export.php2011-11-18 09:08:29 UTC (rev 103582)
@@ -614,17 +614,11 @@
 
function writeContributor( $id, $text ) {
$out =   contributor\n;
-   if ( $id ) {
+   if ( $id || !IP::isValid( $text ) ) {
$out .=  . Xml::elementClean( 'username', 
null, strval( $text ) ) . \n;
$out .=  . Xml::element( 'id', null, strval( 
$id ) ) . \n;
} else {
-   if ( IP::isValid( $text ) ) {
-   $out .=  . Xml::elementClean( 'ip', 
null, strval( $text ) ) . \n;
-   }
-   else {
-   $out .=  . Xml::elementClean( 
'username', null, strval( $text ) ) . \n;
-   $out .=  . Xml::element( 'id', null, 
strval( $id ) ) . \n;
-   }
+   $out .=  . Xml::elementClean( 'ip', null, 
strval( $text ) ) . \n;
}
$out .=   /contributor\n;
return $out;


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [103448] trunk/phase3/includes/Export.php

2011-11-17 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/103448

Revision: 103448
Author:   ariel
Date: 2011-11-17 09:20:51 + (Thu, 17 Nov 2011)
Log Message:
---
if user id is 0 and username is actually an IP, write it as ip, not username

Modified Paths:
--
trunk/phase3/includes/Export.php

Modified: trunk/phase3/includes/Export.php
===
--- trunk/phase3/includes/Export.php2011-11-17 08:03:14 UTC (rev 103447)
+++ trunk/phase3/includes/Export.php2011-11-17 09:20:51 UTC (rev 103448)
@@ -618,7 +618,13 @@
$out .=  . Xml::elementClean( 'username', 
null, strval( $text ) ) . \n;
$out .=  . Xml::element( 'id', null, strval( 
$id ) ) . \n;
} else {
-   $out .=  . Xml::elementClean( 'ip', null, 
strval( $text ) ) . \n;
+   if ( IP::isValid( $text ) ) {
+   $out .=  . Xml::elementClean( 'ip', 
null, strval( $text ) ) . \n;
+   }
+   else {
+   $out .=  . Xml::elementClean( 
'username', null, strval( $text ) ) . \n;
+   $out .=  . Xml::element( 'id', null, 
strval( $id ) ) . \n;
+   }
}
$out .=   /contributor\n;
return $out;


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [103314] trunk/phase3

2011-11-16 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/103314

Revision: 103314
Author:   ariel
Date: 2011-11-16 12:01:58 + (Wed, 16 Nov 2011)
Log Message:
---
add checkbox for listauthors on export form if wgExportAllowListContributors set

Modified Paths:
--
trunk/phase3/includes/specials/SpecialExport.php
trunk/phase3/languages/messages/MessagesEn.php

Modified: trunk/phase3/includes/specials/SpecialExport.php
===
--- trunk/phase3/includes/specials/SpecialExport.php2011-11-16 11:45:22 UTC 
(rev 103313)
+++ trunk/phase3/includes/specials/SpecialExport.php2011-11-16 12:01:58 UTC 
(rev 103314)
@@ -216,6 +216,15 @@
$request-wasPosted() ? $request-getCheck( 
'wpDownload' ) : true
) . 'br /';
 
+   if ( $wgExportAllowListContributors ) {
+   $form .= Xml::checkLabel(
+   wfMsg( 'exportlistauthors' ),
+   'listauthors',
+   'listauthors',
+   $request-wasPosted() ? $request-getCheck( 
'listauthors' ) : false
+   ) . 'br /';
+   }
+
$form .= Xml::submitButton( wfMsg( 'export-submit' ), 
Linker::tooltipAndAccesskeyAttribs( 'export' ) );
$form .= Xml::closeElement( 'form' );
 

Modified: trunk/phase3/languages/messages/MessagesEn.php
===
--- trunk/phase3/languages/messages/MessagesEn.php  2011-11-16 11:45:22 UTC 
(rev 103313)
+++ trunk/phase3/languages/messages/MessagesEn.php  2011-11-16 12:01:58 UTC 
(rev 103314)
@@ -3278,6 +3278,7 @@
 'exportcuronly' = 'Include only the current revision, not the full 
history',
 'exportnohistory'   = 
 '''Note:''' Exporting the full history of pages through this form has been 
disabled due to performance reasons.,
+'exportlistauthors' = 'Include a full list of contributors for each page',
 'export-submit' = 'Export',
 'export-addcattext' = 'Add pages from category:',
 'export-addcat' = 'Add',


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [101971] branches/ariel/xmldumps-backup/mysql2txt.py

2011-11-04 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/101971

Revision: 101971
Author:   ariel
Date: 2011-11-04 09:46:30 + (Fri, 04 Nov 2011)
Log Message:
---
possibily useful, likely buggy script for grabbing specific columns from 
desired rows of a mysql table dump

Added Paths:
---
branches/ariel/xmldumps-backup/mysql2txt.py

Added: branches/ariel/xmldumps-backup/mysql2txt.py
===
--- branches/ariel/xmldumps-backup/mysql2txt.py (rev 0)
+++ branches/ariel/xmldumps-backup/mysql2txt.py 2011-11-04 09:46:30 UTC (rev 
101971)
@@ -0,0 +1,405 @@
+# this script reads from stdin a sql file created by mysqldump, grabs the 
requested columns from
+# the requested table from each tuple, and writes them out one tuple per line 
+# with a comma between columns, keeping the original escaping of values as 
done by mysql.
+
+import getopt
+import os
+import re
+import sys
+
+class ConverterError(Exception):
+pass
+
+class MysqlFile:
+def __init__(self, f, tableRequested, columnsRequested, 
valuesRequestedCols, valuesRequestedVals, fieldSeparator):
+self.file = f
+self.tableRequested = tableRequested
+self.columnsRequested = columnsRequested
+self.valuesRequestedCols = valuesRequestedCols
+self.valuesRequestedVals = valuesRequestedVals
+self.fieldSeparator = fieldSeparator
+
+self.buffer = 
+self.bufferInd = 0
+self.eof = False
+self.rowsDone = False
+self.GET = 1
+self.CHECK = 2
+self.SKIP = 0
+
+def findCreateStatement(self):
+tableFound = False
+toFind = CREATE TABLE `%s` (\n % self.tableRequested
+line = self.getLine(len(toFind))
+if (not line.endswith(\n)):
+self.skipLineRemainder()
+while line != :
+if line == toFind:
+tableFound = True
+break
+line = self.getLine(len(toFind))
+if (not line.endswith(\n)):
+self.skipLineRemainder()
+if not tableFound:
+raise ConverterError(create statement for requested table not 
found in file)
+
+def getLine(self, maxbytes = 0):
+returns line including the \n, up to maxbytes
+line = 
+length = 0
+if self.eof:
+return False
+while self.buffer[self.bufferInd] != '\n':
+line = line + self.buffer[self.bufferInd]
+if not self.incrementBufferPtr():
+   return False
+length = length + 1
+if maxbytes and length == maxbytes:
+return line
+
+if not self.skipChar('\n'):
+return False
+return line + \n
+
+def skipLineRemainder(self):
+# skip up to the newline...
+while self.buffer[self.bufferInd] != '\n':
+if not self.incrementBufferPtr():
+   return False
+# and now the newline.
+return self.incrementBufferPtr()
+
+def findInsertStatement(self):
+leave the file contents at the line immediately following
+an INSERT statement
+if m.eof:
+return False
+insertFound = False
+toFind = INSERT INTO `%s` VALUES  % self.tableRequested
+line = self.getLine(len(toFind))
+while line and not self.eof:
+if line.startswith(toFind):
+insertFound = True
+break
+if (not line.endswith(\n)):
+self.skipLineRemainder()
+line = self.getLine(len(toFind))
+return insertFound
+
+def setupColumnRetrieval(self):
+self.columnsInTable = []
+columnNameExpr = re.compile('\s+`([^`]+)`')
+line = self.getLine()
+while (line and not self.eof and line[0] != ')' ):
+columnNameMatch = columnNameExpr.match(line)
+if (columnNameMatch):
+self.columnsInTable.append(columnNameMatch.group(1))
+line = self.getLine()
+
+for c in self.columnsRequested:
+if not c in self.columnsInTable:
+raise ConverterError(requested column %s not found in table 
% c)
+
+#print columns in table: , self.columnsInTable
+#print columnsRequested: , self.columnsRequested
+
+self.columnsToGet = []
+for c in self.columnsInTable:
+v = self.SKIP
+if c in self.columnsRequested:
+v = v | self.GET
+if c in self.valuesRequestedCols:
+v = v | self.CHECK
+self.columnsToGet.append( v )
+
+#print columns to get: , self.columnsToGet
+
+self.columnOrder = []
+# we want here a list which tells us to
+# write the ith column we read from tuple first,
+# the jth one second, the kth one third etc

[MediaWiki-CVS] SVN: [101975] trunk/extensions/Renameuser/renameUserCleanup.php

2011-11-04 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/101975

Revision: 101975
Author:   ariel
Date: 2011-11-04 11:10:54 + (Fri, 04 Nov 2011)
Log Message:
---
typo in email; ability to override result of check for rename of user in logs

Modified Paths:
--
trunk/extensions/Renameuser/renameUserCleanup.php

Modified: trunk/extensions/Renameuser/renameUserCleanup.php
===
--- trunk/extensions/Renameuser/renameUserCleanup.php   2011-11-04 11:08:11 UTC 
(rev 101974)
+++ trunk/extensions/Renameuser/renameUserCleanup.php   2011-11-04 11:10:54 UTC 
(rev 101975)
@@ -20,7 +20,7 @@
  * http://www.gnu.org/copyleft/gpl.html
  *
  * @ingroup Maintenance
- * @author Ariel Glenn ar...@wikimedia.orf
+ * @author Ariel Glenn ar...@wikimedia.org
  */
 
 $IP = getenv( 'MW_INSTALL_PATH' );
@@ -42,6 +42,7 @@
$this-output( Rename User Cleanup starting...\n\n );
$olduser = User::newFromName( $this-getOption( 'olduser' ) );
$newuser = User::newFromName( $this-getOption( 'newuser' ) );
+
if ( !$newuser-getId() ) {
$this-error( No such user:  . $this-getOption( 
'newuser' ), true );
exit(1);
@@ -79,8 +80,14 @@
__METHOD__
);
if (! $result ||  ! $result-numRows() ) {
-   print(No log entry found for a rename of 
.$olduser-getName(). to .$newuser-getName()., giving up\n);
-   exit(1);
+   print(No log entry found for a rename of 
.$olduser-getName(). to .$newuser-getName()., proceed anyways??? [N/y] );
+   $stdin = fopen (php://stdin,rt);
+   $line = fgets($stdin);
+   fclose($stdin);
+   if ( $line[0] != Y  $line[0] != y ) {
+   print(Exiting at user's request\n);
+   exit(1);
+   }
}
else {
foreach ( $result as $row ) {
@@ -93,7 +100,7 @@
print(Found log entry of the rename: 
.$olduser-getName(). to .$newuser-getName(). on $row-log_timestamp\n);
}
}
-   if ($result-numRows()  1) {
+   if ($result  $result-numRows()  1) {
print(More than one rename entry found in the log, not 
sure what to do. Continue anyways? [N/y]  );
$stdin = fopen (php://stdin,rt);
$line = fgets($stdin);


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [101992] trunk/extensions/Renameuser/renameUserCleanup.php

2011-11-04 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/101992

Revision: 101992
Author:   ariel
Date: 2011-11-04 14:19:52 + (Fri, 04 Nov 2011)
Log Message:
---
allow renames for user with specific uid set in row; minor formatting cleanup; 
refactor (one large function - several smaller ones)

Modified Paths:
--
trunk/extensions/Renameuser/renameUserCleanup.php

Modified: trunk/extensions/Renameuser/renameUserCleanup.php
===
--- trunk/extensions/Renameuser/renameUserCleanup.php   2011-11-04 13:59:02 UTC 
(rev 101991)
+++ trunk/extensions/Renameuser/renameUserCleanup.php   2011-11-04 14:19:52 UTC 
(rev 101992)
@@ -35,6 +35,7 @@
$this-mDescription = Maintenance script to finish incomplete 
rename user, in particular to reassign edits that were missed;
$this-addOption( 'olduser', 'Old user name', true, true );
$this-addOption( 'newuser', 'New user name', true, true );
+   $this-addOption( 'olduid', 'Old user id in revision records 
(DANGEROUS)', false, true );
$this-mBatchSize = 1000;
}
 
@@ -42,23 +43,42 @@
$this-output( Rename User Cleanup starting...\n\n );
$olduser = User::newFromName( $this-getOption( 'olduser' ) );
$newuser = User::newFromName( $this-getOption( 'newuser' ) );
+   $olduid = $this-getOption( 'olduid' );
 
+   $this-checkUserExistence( $olduser, $newuser );
+   $this-checkRenameLog( $olduser, $newuser );
+
+   if ( $olduid ) {
+   $this-doUpdates( $olduser, $newuser, $olduid, $dbw );
+   }
+   $this-doUpdates( $olduser, $newuser, $newuser-getId(), $dbw );
+   $this-doUpdates( $olduser, $newuser, 0, $dbw );
+   
+   print Done!\n;
+   exit(0);
+   }
+
+
+   public function checkUserExistence( $olduser, $newuser ) {
if ( !$newuser-getId() ) {
$this-error( No such user:  . $this-getOption( 
'newuser' ), true );
exit(1);
}
if ($olduser-getId() ) {
-   print( WARNING!!: Old user still exists:  . 
$this-getOption( 'olduser' ) . \n);
-   print(proceed anyways? We'll only re-attribute edits 
that have the new user uid (or 0) and the old user name.  [N/y]  );
+   print WARNING!!: Old user still exists:  . 
$this-getOption( 'olduser' ) . \n;
+   print proceed anyways? We'll only re-attribute edits 
that have the new user uid (or 0);
+   print  or the uid specified by the caller, and the old 
user name.  [N/y]   ;
$stdin = fopen (php://stdin,rt);
$line = fgets($stdin);
fclose($stdin);
if ( $line[0] != Y  $line[0] != y ) {
-   print(Exiting at user's request\n);
+   print Exiting at user's request\n;
exit(0);
}
}
+   }
 
+   public function checkRenameLog( $olduser, $newuser ) {
$dbr = wfGetDB( DB_SLAVE );
$result = $dbr-select( 'logging', '*',
array( 'log_type' = 'renameuser',
@@ -80,130 +100,111 @@
__METHOD__
);
if (! $result ||  ! $result-numRows() ) {
-   print(No log entry found for a rename of 
.$olduser-getName(). to .$newuser-getName()., proceed anyways??? [N/y] );
+   print No log entry found for a rename of 
.$olduser-getName(). to .$newuser-getName()., proceed anyways??? [N/y] ;
$stdin = fopen (php://stdin,rt);
$line = fgets($stdin);
fclose($stdin);
if ( $line[0] != Y  $line[0] != y ) {
-   print(Exiting at user's request\n);
+   print Exiting at user's request\n;
exit(1);
}
}
else {
foreach ( $result as $row ) {
-   print(Found possible log entry of the 
rename, please check: .$row-log_title. with comment .$row-log_comment. on 
$row-log_timestamp\n);
+   print Found possible log entry of the 
rename, please check: .$row-log_title. with comment .$row-log_comment. on 
$row-log_timestamp\n;
}
}
}
else

[MediaWiki-CVS] SVN: [101998] trunk/extensions/Renameuser/renameUserCleanup.php

2011-11-04 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/101998

Revision: 101998
Author:   ariel
Date: 2011-11-04 15:08:05 + (Fri, 04 Nov 2011)
Log Message:
---
use getdbkey() form of user name for log check

Modified Paths:
--
trunk/extensions/Renameuser/renameUserCleanup.php

Modified: trunk/extensions/Renameuser/renameUserCleanup.php
===
--- trunk/extensions/Renameuser/renameUserCleanup.php   2011-11-04 15:05:00 UTC 
(rev 101997)
+++ trunk/extensions/Renameuser/renameUserCleanup.php   2011-11-04 15:08:05 UTC 
(rev 101998)
@@ -80,12 +80,16 @@
 
public function checkRenameLog( $olduser, $newuser ) {
$dbr = wfGetDB( DB_SLAVE );
+
+   $oldTitle = Title::makeTitle( NS_USER, $olduser-getName() );
+   $newTitle = Title::makeTitle( NS_USER, $newuser-getName() );
+
$result = $dbr-select( 'logging', '*',
array( 'log_type' = 'renameuser',
'log_action'= 'renameuser',
'log_namespace' = NS_USER,
-   'log_title' = $olduser-getName(),
-   'log_params'= $newuser-getName()
+   'log_title' = $oldTitle-getDBkey(),
+   'log_params'= $newTitle-getDBkey()
 ),
__METHOD__
);


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [102000] trunk/extensions/Renameuser/renameUserCleanup.php

2011-11-04 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/102000

Revision: 102000
Author:   ariel
Date: 2011-11-04 15:28:14 + (Fri, 04 Nov 2011)
Log Message:
---
bah, only the log_title needs the dbkey format; make 'no edits' message similar 
to 'found edits' message

Modified Paths:
--
trunk/extensions/Renameuser/renameUserCleanup.php

Modified: trunk/extensions/Renameuser/renameUserCleanup.php
===
--- trunk/extensions/Renameuser/renameUserCleanup.php   2011-11-04 15:16:54 UTC 
(rev 101999)
+++ trunk/extensions/Renameuser/renameUserCleanup.php   2011-11-04 15:28:14 UTC 
(rev 102000)
@@ -82,14 +82,13 @@
$dbr = wfGetDB( DB_SLAVE );
 
$oldTitle = Title::makeTitle( NS_USER, $olduser-getName() );
-   $newTitle = Title::makeTitle( NS_USER, $newuser-getName() );
 
$result = $dbr-select( 'logging', '*',
array( 'log_type' = 'renameuser',
'log_action'= 'renameuser',
'log_namespace' = NS_USER,
'log_title' = $oldTitle-getDBkey(),
-   'log_params'= $newTitle-getDBkey()
+   'log_params'= $newuser-getName()
 ),
__METHOD__
);
@@ -153,7 +152,7 @@
array( $usernamefield = $olduser-getName(), 
$useridfield = $uid ), __METHOD__ );
 
if ( $contribs == 0 ) {
-   print No edits to be re-attributed from table 
$table\n ;
+   print No edits to be re-attributed from table $table 
for uid $uid\n ;
return(0);
}
 


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [101591] trunk/phase3

2011-11-02 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/101591

Revision: 101591
Author:   ariel
Date: 2011-11-02 07:58:43 + (Wed, 02 Nov 2011)
Log Message:
---
export specified range of revisions (as stubs)

Modified Paths:
--
trunk/phase3/includes/Export.php
trunk/phase3/maintenance/backup.inc
trunk/phase3/maintenance/dumpBackup.php

Modified: trunk/phase3/includes/Export.php
===
--- trunk/phase3/includes/Export.php2011-11-02 07:47:00 UTC (rev 101590)
+++ trunk/phase3/includes/Export.php2011-11-02 07:58:43 UTC (rev 101591)
@@ -41,6 +41,7 @@
const CURRENT = 2;
const STABLE = 4; // extension defined
const LOGS = 8;
+   const RANGE = 16;
 
const BUFFER = 0;
const STREAM = 1;
@@ -56,7 +57,8 @@
 * main query is still running.
 *
 * @param $db Database
-* @param $history Mixed: one of WikiExporter::FULL or 
WikiExporter::CURRENT,
+* @param $history Mixed: one of WikiExporter::FULL, 
WikiExporter::CURRENT,
+* WikiExporter::RANGE or WikiExporter::STABLE,
 * or an associative array:
 *   offset: non-inclusive offset at which to start the 
query
 *   limit: maximum number of rows to return
@@ -120,6 +122,21 @@
}
 
/**
+* Dumps a series of page and revision records for those pages
+* in the database with revisions falling within the rev_id range given.
+* @param $start Int: inclusive lower limit (this id is included)
+* @param $end   Int: Exclusive upper limit (this id is not included)
+*   If 0, no upper limit.
+*/
+   public function revsByRange( $start, $end ) {
+   $condition = 'rev_id = ' . intval( $start );
+   if ( $end ) {
+   $condition .= ' AND rev_id  ' . intval( $end );
+   }
+   return $this-dumpFrom( $condition );
+   }
+
+   /**
 * @param $title Title
 */
public function pageByTitle( $title ) {
@@ -259,6 +276,10 @@
wfProfileOut( __METHOD__ );
throw new MWException( __METHOD__ .  
given invalid history dump type. );
}
+   } elseif ( $this-history  WikiExporter::RANGE ) {
+   # Dump of revisions within a specified range
+   $join['revision'] = array( 'INNER JOIN', 
'page_id=rev_page' );
+   $opts['ORDER BY'] = 'rev_page ASC, rev_id ASC';
} else {
# Uknown history specification parameter?
wfProfileOut( __METHOD__ );

Modified: trunk/phase3/maintenance/backup.inc
===
--- trunk/phase3/maintenance/backup.inc 2011-11-02 07:47:00 UTC (rev 101590)
+++ trunk/phase3/maintenance/backup.inc 2011-11-02 07:58:43 UTC (rev 101591)
@@ -217,6 +217,8 @@
} else if ( is_null( $this-pages ) ) {
if ( $this-startId || $this-endId ) {
$exporter-pagesByRange( $this-startId, 
$this-endId );
+   } elseif ( $this-revStartId || $this-revEndId ) {
+   $exporter-revsByRange( $this-revStartId, 
$this-revEndId );
} else {
$exporter-allPages();
}

Modified: trunk/phase3/maintenance/dumpBackup.php
===
--- trunk/phase3/maintenance/dumpBackup.php 2011-11-02 07:47:00 UTC (rev 
101590)
+++ trunk/phase3/maintenance/dumpBackup.php 2011-11-02 07:58:43 UTC (rev 
101591)
@@ -27,7 +27,7 @@
 
 $originalDir = getcwd();
 
-$optionsWithArgs = array( 'pagelist', 'start', 'end' );
+$optionsWithArgs = array( 'pagelist', 'start', 'end', 'revstart', 'revend');
 
 require_once( dirname( __FILE__ ) . '/commandLine.inc' );
 require_once( 'backup.inc' );
@@ -57,6 +57,13 @@
 if ( isset( $options['end'] ) ) {
$dumper-endId = intval( $options['end'] );
 }
+
+if ( isset( $options['revstart'] ) ) {
+   $dumper-revStartId = intval( $options['revstart'] );
+}
+if ( isset( $options['revend'] ) ) {
+   $dumper-revEndId = intval( $options['revend'] );
+}
 $dumper-skipHeader = isset( $options['skip-header'] );
 $dumper-skipFooter = isset( $options['skip-footer'] );
 $dumper-dumpUploads = isset( $options['uploads'] );
@@ -72,6 +79,8 @@
$dumper-dump( WikiExporter::STABLE, $textMode );
 } elseif ( isset( $options['logs'] ) ) {
$dumper-dump( WikiExporter::LOGS );
+} elseif ( isset($options['revrange'] ) ) {
+   $dumper-dump( WikiExporter::RANGE

[MediaWiki-CVS] SVN: [101606] trunk/phase3/includes/Export.php

2011-11-02 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/101606

Revision: 101606
Author:   ariel
Date: 2011-11-02 09:55:43 + (Wed, 02 Nov 2011)
Log Message:
---
corrections for fixme in r96486

Modified Paths:
--
trunk/phase3/includes/Export.php

Modified: trunk/phase3/includes/Export.php
===
--- trunk/phase3/includes/Export.php2011-11-02 09:34:11 UTC (rev 101605)
+++ trunk/phase3/includes/Export.php2011-11-02 09:55:43 UTC (rev 101606)
@@ -871,11 +871,19 @@
protected $filename;
 
function __construct( $file ) {
-   $command = setup7zCommand( $file );
+   $command = $this-setup7zCommand( $file );
parent::__construct( $command );
$this-filename = $file;
}
 
+   function setup7zCommand( $file ) {
+   $command = 7za a -bd -si  . wfEscapeShellArg( $file );
+   // Suppress annoying useless crap from p7zip
+   // Unfortunately this could suppress real error messages too
+   $command .= ' ' . wfGetNull() . ' 21';
+   return( $command );
+   }
+
function closeRenameAndReopen( $newname ) {
$this-closeAndRename( $newname, true );
}
@@ -895,10 +903,7 @@
throw new MWException( __METHOD__ . : rename 
of file {$this-filename} to $newname failed\n );
}
elseif ( $open ) {
-   $command = 7za a -bd -si  . wfEscapeShellArg( 
$file );
-   // Suppress annoying useless crap from p7zip
-   // Unfortunately this could suppress real error 
messages too
-   $command .= ' ' . wfGetNull() . ' 21';
+   $command = setup7zCommand( $file );
$this-startCommand( $command );
}
}


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [101614] trunk/phase3/includes/Export.php

2011-11-02 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/101614

Revision: 101614
Author:   ariel
Date: 2011-11-02 11:08:06 + (Wed, 02 Nov 2011)
Log Message:
---
clean up duplicated code, for fixme in r97178

Modified Paths:
--
trunk/phase3/includes/Export.php

Modified: trunk/phase3/includes/Export.php
===
--- trunk/phase3/includes/Export.php2011-11-02 10:59:34 UTC (rev 101613)
+++ trunk/phase3/includes/Export.php2011-11-02 11:08:06 UTC (rev 101614)
@@ -763,7 +763,13 @@
$this-closeAndRename( $newname, true );
}
 
-   function closeAndRename( $newname, $open = false ) {
+   function renameOrException( $newname ) {
+   if (! rename( $this-filename, $newname ) ) {
+   throw new MWException( __METHOD__ . : rename 
of file {$this-filename} to $newname failed\n );
+   }
+   }
+
+   function checkRenameArgCount( $newname ) {
if ( is_array( $newname ) ) {
if ( count( $newname )  1 ) {
throw new MWException( __METHOD__ . : passed 
multiple arguments for rename of single file\n );
@@ -771,12 +777,15 @@
$newname = $newname[0];
}
}
+   return $newname;
+   }
+
+   function closeAndRename( $newname, $open = false ) {
+   $newname = $this-checkRenameArgCount( $newname );
if ( $newname ) {
fclose( $this-handle );
-   if (! rename( $this-filename, $newname ) ) {
-   throw new MWException( __METHOD__ . : rename 
of file {$this-filename} to $newname failed\n );
-   }
-   elseif ( $open ) {
+   $this-renameOrException( $newname );
+   if ( $open ) {
$this-handle = fopen( $this-filename, wt );
}
}
@@ -820,20 +829,12 @@
}
 
function closeAndRename( $newname, $open = false ) {
-   if ( is_array( $newname ) ) {
-   if ( count( $newname )  1 ) {
-   throw new MWException( __METHOD__ . : passed 
multiple arguments for rename of single file\n );
-   } else {
-   $newname = $newname[0];
-   }
-   }
+   $newname = $this-checkRenameArgCount( $newname );
if ( $newname ) {
fclose( $this-handle );
proc_close( $this-procOpenResource );
-   if (! rename( $this-filename, $newname ) ) {
-   throw new MWException( __METHOD__ . : rename 
of file {$this-filename} to $newname failed\n );
-   }
-   elseif ( $open ) {
+   $this-renameOrException( $newname );
+   if ( $open ) {
$command = $this-command;
$command .= . wfEscapeShellArg( 
$this-filename );
$this-startCommand( $command );
@@ -889,20 +890,12 @@
}
 
function closeAndRename( $newname, $open = false ) {
-   if ( is_array( $newname ) ) {
-   if ( count( $newname )  1 ) {
-   throw new MWException( __METHOD__ . : passed 
multiple arguments for rename of single file\n );
-   } else {
-   $newname = $newname[0];
-   }
-   }
+   $newname = $this-checkRenameArgCount( $newname );
if ( $newname ) {
fclose( $this-handle );
proc_close( $this-procOpenResource );
-   if (! rename( $this-filename, $newname ) ) {
-   throw new MWException( __METHOD__ . : rename 
of file {$this-filename} to $newname failed\n );
-   }
-   elseif ( $open ) {
+   $this-renameOrException( $newname );
+   if ( $open ) {
$command = setup7zCommand( $file );
$this-startCommand( $command );
}


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [101625] branches/ariel/xmldumps-backup/incrementals

2011-11-02 Thread ariel
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/101625

Revision: 101625
Author:   ariel
Date: 2011-11-02 14:29:09 + (Wed, 02 Nov 2011)
Log Message:
---
add forcerun option which will do a run even if a completed run exists for the 
given date; fix status check (which in turn fixes retrieving max revid of 
previous good run)

Modified Paths:
--
branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
branches/ariel/xmldumps-backup/incrementals/generateincrementals.py

Modified: branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
===
--- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py  2011-11-02 
14:03:29 UTC (rev 101624)
+++ branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py  2011-11-02 
14:29:09 UTC (rev 101625)
@@ -85,11 +85,10 @@
 self.statusFile = StatusFile(self._config, self.date, self.wikiName)
 
 def getStatus(self, date = None):
+status = 
 if exists(self.statusFile.getPath(date)):
 status = FileUtils.readFile(self.statusFile.getPath(date)).rstrip()
-if status == done:
-return True
-return False
+return(status)
 
 def setStatus(self, status):
 FileUtils.writeFileInPlace(self.statusFile.getPath(),status, 
self._config.fileperms)

Modified: branches/ariel/xmldumps-backup/incrementals/generateincrementals.py
===
--- branches/ariel/xmldumps-backup/incrementals/generateincrementals.py 
2011-11-02 14:03:29 UTC (rev 101624)
+++ branches/ariel/xmldumps-backup/incrementals/generateincrementals.py 
2011-11-02 14:29:09 UTC (rev 101625)
@@ -27,7 +27,7 @@
 self.OK = 0
 
 class IncrDump(object):
-def __init__(self,config, date, wikiName, doStubs, doRevs, dryrun, 
verbose):
+def __init__(self,config, date, wikiName, doStubs, doRevs, dryrun, 
verbose, forcerun):
 self._config = config
 self.date = date
 self.wikiName = wikiName
@@ -35,6 +35,7 @@
 self.doStubs = doStubs
 self.doRevs = doRevs
 self.dryrun = dryrun
+self.forcerun = forcerun
 self.maxRevIDFile = MaxRevIDFile(self._config, self.date, 
self.wikiName)
 self.statusInfo = StatusInfo(self._config, self.date, self.wikiName)
 self.stubFile = StubFile(self._config, self.date, self.wikiName)
@@ -54,7 +55,7 @@
 if not exists(self.incrDir.getIncDir(self.wikiName)):
 os.makedirs(self.incrDir.getIncDir(self.wikiName))
 status = self.statusInfo.getStatus()
-if status == done:
+if status == done and not forcerun:
 if (self.verbose):
 print wiki,self.wikiName,skipped, adds/changes dump 
already complete
 return retCodes.OK
@@ -170,20 +171,21 @@
return False
 
 class IncrDumpLoop(object):
-def __init__(self, config, date, doStubs, doRevs, dryrun, verbose):
+def __init__(self, config, date, doStubs, doRevs, dryrun, verbose, 
forcerun):
 self._config = config
 self.date = date
 self.doStubs = doStubs
 self.doRevs = doRevs
 self.dryrun = dryrun
 self.verbose = verbose
+self.forcerun = forcerun
 
 def doRunOnAllWikis(self):
 retCodes = DumpResults()
 failures = 0
 todos = 0
 for w in self._config.allWikisList:
-dump = IncrDump(config, date, w, doStubs, doRevs, dryrun, 
self.verbose)
+dump = IncrDump(self._config, self.date, w, self.doStubs, 
self.doRevs, self.dryrun, self.verbose, self.forcerun)
 result = dump.doOneWiki()
 if result == retCodes.FAILED:
 failures = failures + 1
@@ -212,6 +214,7 @@
 print --configfile:  Specify an alternate config file to read. 
Default file is 'dumpincr.conf' in the current directory.
 print --date:(Re)run incremental of a given date (use with 
care).
 print --dryrun:  Don't actually dump anything but print the 
commands that would be run.
+print --forcerun:Do the run even if there is already a successful 
run in place.
 print --revsonly:Do only the stubs part of the dumps.
 print --stubsonly:   Do only the revision text part of the dumps.
 print --verbose: Print error messages and other informative 
messages (normally the
@@ -227,10 +230,11 @@
 doRevs = True
 dryrun = False
 verbose = False
+forcerun = False
 
 try:
 (options, remainder) = getopt.gnu_getopt(sys.argv[1:], ,
- ['date=', 'configfile=', 
'stubsonly', 'revsonly', 'dryrun', 'verbose' ])
+ ['date=', 'configfile=', 
'stubsonly', 'revsonly', 'dryrun', 'verbose', 'forcerun' ])
 except:
 usage

[MediaWiki-CVS] SVN: [100112] branches/ariel/xmldumps-backup/create-rsync-list.sh

2011-10-18 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/100112

Revision: 100112
Author:   ariel
Date: 2011-10-18 10:37:50 + (Tue, 18 Oct 2011)
Log Message:
---
generate rsync-friendly full listing of files we want mirrored

Modified Paths:
--
branches/ariel/xmldumps-backup/create-rsync-list.sh

Modified: branches/ariel/xmldumps-backup/create-rsync-list.sh
===
--- branches/ariel/xmldumps-backup/create-rsync-list.sh 2011-10-18 09:49:08 UTC 
(rev 100111)
+++ branches/ariel/xmldumps-backup/create-rsync-list.sh 2011-10-18 10:37:50 UTC 
(rev 100112)
@@ -186,4 +186,4 @@
 exit 1
 fi
 
-
+/usr/bin/rsync --list-only --files-from=$outputfile $publicdir dummy   
$outputfile.rsync


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [100038] branches/ariel/xmldumps-backup/monitor.py

2011-10-17 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/100038

Revision: 100038
Author:   ariel
Date: 2011-10-17 15:32:10 + (Mon, 17 Oct 2011)
Log Message:
---
fix check of argument existence

Modified Paths:
--
branches/ariel/xmldumps-backup/monitor.py

Modified: branches/ariel/xmldumps-backup/monitor.py
===
--- branches/ariel/xmldumps-backup/monitor.py   2011-10-17 15:29:32 UTC (rev 
100037)
+++ branches/ariel/xmldumps-backup/monitor.py   2011-10-17 15:32:10 UTC (rev 
100038)
@@ -6,12 +6,6 @@
 from os.path import exists
 from WikiDump import FileUtils
 
-# can specify name of alternate config file
-if (sys.argv[1]):
-   config = WikiDump.Config(sys.argv[1])
-else:
-   config = WikiDump.Config()
-
 def generateIndex():
running = False
states = []
@@ -52,4 +46,10 @@
os.rename(tempFilename, outputFileName)
 
 if __name__ == __main__:
+   # can specify name of alternate config file
+   if (len(sys.argv)  2):
+   config = WikiDump.Config(sys.argv[1])
+   else:
+   config = WikiDump.Config()
+
updateIndex()


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [100043] branches/ariel/xmldumps-backup

2011-10-17 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/100043

Revision: 100043
Author:   ariel
Date: 2011-10-17 15:59:10 + (Mon, 17 Oct 2011)
Log Message:
---
remove stray raise; use relative web paths in generated html

Modified Paths:
--
branches/ariel/xmldumps-backup/WikiDump.py
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/WikiDump.py
===
--- branches/ariel/xmldumps-backup/WikiDump.py  2011-10-17 15:57:47 UTC (rev 
100042)
+++ branches/ariel/xmldumps-backup/WikiDump.py  2011-10-17 15:59:10 UTC (rev 
100043)
@@ -95,7 +95,6 @@
size = os.path.getsize(path)
return (timestamp, size)
except:
-   raise
return(None, None)
 
fileAge = staticmethod(fileAge)
@@ -429,6 +428,16 @@
 
def webDir(self):
return /.join((self.config.webRoot, self.dbName))
+
+   def webDirRelative(self):
+   webRootRelative = self.webDir()
+   i = webRootRelative.find(://)
+   if i = 0:
+ webRootRelative = webRootRelative[i:]
+   i = webRootRelative.find(/)
+   if i = 0:
+ webRootRelative = webRootRelative[i:]
+   return webRootRelative

# Actions!


Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-10-17 15:57:47 UTC (rev 
100042)
+++ branches/ariel/xmldumps-backup/worker.py2011-10-17 15:59:10 UTC (rev 
100043)
@@ -897,6 +897,14 @@
dateString = self._wiki.date
return os.path.join(self._wiki.webDir(), dateString, 
dumpFile.filename)
 
+
+   def webPathRelative(self, dumpFile, dateString = None):
+   Given a DumpFilename object produce the url relative to the 
docroot for the filename for the date of
+   the dump for the selected database.
+   if (not dateString):
+   dateString = self._wiki.date
+   return os.path.join(self._wiki.webDirRelative(), dateString, 
dumpFile.filename)
+
def dirCacheOutdated(self, date):
if not date:
date = self._wiki.date
@@ -1375,8 +1383,8 @@
if itemStatus == in-progress:
return li class='file'%s %s (written) /li % 
(fileObj.filename, size)
elif itemStatus == done:
-   webpath = self.dumpDir.webPath(fileObj)
-   return li class='file'a href=\%s\%s/a %s/li 
% (webpath, fileObj.filename, size)
+   webpathRelative = self.dumpDir.webPathRelative(fileObj)
+   return li class='file'a href=\%s\%s/a %s/li 
% (webpathRelative, fileObj.filename, size)
else:
return li class='missing'%s/li % fileObj.filename
 
@@ -1423,7 +1431,7 @@
status: self._reportStatusSummaryLine(done),
previous: self._reportPreviousDump(done),
items: html,
-   checksum: self.dumpDir.webPath(f),
+   checksum: self.dumpDir.webPathRelative(f),
index: self.wiki.config.index}
 
def _reportPreviousDump(self, done):


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [100051] branches/ariel/xmldumps-backup/WikiDump.py

2011-10-17 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/100051

Revision: 100051
Author:   ariel
Date: 2011-10-17 16:46:02 + (Mon, 17 Oct 2011)
Log Message:
---
and actually make relative path contruction work

Modified Paths:
--
branches/ariel/xmldumps-backup/WikiDump.py

Modified: branches/ariel/xmldumps-backup/WikiDump.py
===
--- branches/ariel/xmldumps-backup/WikiDump.py  2011-10-17 16:31:26 UTC (rev 
100050)
+++ branches/ariel/xmldumps-backup/WikiDump.py  2011-10-17 16:46:02 UTC (rev 
100051)
@@ -427,16 +427,19 @@
return os.path.join(self.config.privateDir, self.dbName)
 
def webDir(self):
-   return /.join((self.config.webRoot, self.dbName))
+   webRoot = self.config.webRoot
+   if webRoot[-1] == '/':
+   webRoot = webRoot[:-1]
+   return /.join((webRoot, self.dbName))
 
def webDirRelative(self):
webRootRelative = self.webDir()
i = webRootRelative.find(://)
if i = 0:
- webRootRelative = webRootRelative[i:]
+   webRootRelative = webRootRelative[i+3:]
i = webRootRelative.find(/)
if i = 0:
- webRootRelative = webRootRelative[i:]
+   webRootRelative = webRootRelative[i:]
return webRootRelative

# Actions!


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [100075] branches/ariel/xmldumps-backup/monitor.py

2011-10-17 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/100075

Revision: 100075
Author:   ariel
Date: 2011-10-17 19:36:13 + (Mon, 17 Oct 2011)
Log Message:
---
count the number of arguments correctly. and don't fix bugs while jetlagged.

Modified Paths:
--
branches/ariel/xmldumps-backup/monitor.py

Modified: branches/ariel/xmldumps-backup/monitor.py
===
--- branches/ariel/xmldumps-backup/monitor.py   2011-10-17 19:35:29 UTC (rev 
100074)
+++ branches/ariel/xmldumps-backup/monitor.py   2011-10-17 19:36:13 UTC (rev 
100075)
@@ -47,7 +47,7 @@
 
 if __name__ == __main__:
# can specify name of alternate config file
-   if (len(sys.argv)  2):
+   if (len(sys.argv) = 2):
config = WikiDump.Config(sys.argv[1])
else:
config = WikiDump.Config()


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [99704] branches/ariel/xmldumps-backup/incrementals/ generateincrementals.py

2011-10-13 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/99704

Revision: 99704
Author:   ariel
Date: 2011-10-13 17:08:11 + (Thu, 13 Oct 2011)
Log Message:
---
actually dump something when we specify just one wiki as an option

Modified Paths:
--
branches/ariel/xmldumps-backup/incrementals/generateincrementals.py

Modified: branches/ariel/xmldumps-backup/incrementals/generateincrementals.py
===
--- branches/ariel/xmldumps-backup/incrementals/generateincrementals.py 
2011-10-13 17:06:03 UTC (rev 99703)
+++ branches/ariel/xmldumps-backup/incrementals/generateincrementals.py 
2011-10-13 17:08:11 UTC (rev 99704)
@@ -261,6 +261,7 @@
 
 if len(remainder)  0:
 dump = IncrDump(config, date, remainder[0], doStubs, doRevs, dryrun, 
verbose)
+dump.doOneWiki()
 else:
 dump = IncrDumpLoop(config, date, doStubs, doRevs, dryrun, verbose)
 dump.doAllWikisTilDone(3)


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [99655] branches/ariel/xmldumps-backup

2011-10-12 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/99655

Revision: 99655
Author:   ariel
Date: 2011-10-12 23:24:40 + (Wed, 12 Oct 2011)
Log Message:
---
initial checkin of adds/changes dumps

Added Paths:
---
branches/ariel/xmldumps-backup/incrementals/
branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
branches/ariel/xmldumps-backup/incrementals/README.config
branches/ariel/xmldumps-backup/incrementals/README.txt
branches/ariel/xmldumps-backup/incrementals/all.dblist
branches/ariel/xmldumps-backup/incrementals/closed.dblist
branches/ariel/xmldumps-backup/incrementals/dumpincr.conf.sample
branches/ariel/xmldumps-backup/incrementals/generateincrementals.py
branches/ariel/xmldumps-backup/incrementals/generatemaxrevids.py
branches/ariel/xmldumps-backup/incrementals/incrmonitor
branches/ariel/xmldumps-backup/incrementals/incrmonitor.py
branches/ariel/xmldumps-backup/incrementals/incrs-index.html
branches/ariel/xmldumps-backup/incrementals/private.dblist

Added: branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py
===
--- branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py  
(rev 0)
+++ branches/ariel/xmldumps-backup/incrementals/IncrDumpLib.py  2011-10-12 
23:24:40 UTC (rev 99655)
@@ -0,0 +1,390 @@
+# shared classes for incrementals
+import os
+import sys
+import re
+import ConfigParser
+import WikiDump
+from WikiDump import FileUtils, TimeUtils, MiscUtils
+from os.path import exists
+import socket
+import subprocess
+from subprocess import Popen, PIPE
+
+class ContentFile(object):
+def __init__(self, config, date, wikiName):
+self._config = config
+self.date = date
+self.incrDir = IncrementDir(self._config, date)
+self.wikiName = wikiName
+
+# override this.
+def getFileName(self):
+return content.txt
+
+def getPath(self):
+return 
os.path.join(self.incrDir.getIncDir(self.wikiName),self.getFileName())
+
+def getFileInfo(self):
+return FileUtils.fileInfo(self.getPath())
+
+class MaxRevIDFile(ContentFile):
+def getFileName(self):
+return maxrevid.txt
+
+class StubFile(ContentFile):
+def getFileName(self):
+return %s-%s-stubs-meta-hist-incr.xml.gz % ( self.wikiName, 
self.date )
+
+class RevsFile(ContentFile):
+def getFileName(self):
+return %s-%s-pages-meta-hist-incr.xml.bz2 % ( self.wikiName, 
self.date )
+
+class StatusFile(ContentFile):
+def getFileName(self):
+return status.txt
+
+def getPath(self, date = None):
+return os.path.join(self.incrDir.getIncDir(self.wikiName, 
date),self.getFileName())
+
+class LockFile(ContentFile):
+def getFileName(self):
+return %s-%s.lock % ( self.wikiName, self.date )
+
+def getPath(self):
+return 
os.path.join(self.incrDir.getIncDirNoDate(self.wikiName),self.getFileName())
+
+class  MaxRevIDLockFile(LockFile):
+def getFileName(self):
+return %s-%s-maxrevid.lock % ( self.wikiName, self.date )
+
+class  IncrDumpLockFile(LockFile):
+def getFileName(self):
+return %s-%s-incrdump.lock % ( self.wikiName, self.date )
+
+class MD5File(ContentFile):
+def getFileName(self):
+return %s-%s-md5sums.txt % ( self.wikiName, self.date )
+
+class IndexFile(ContentFile):
+def __init__(self, config):
+self._config = config
+self.incrDir = IncrementDir(self._config)
+
+def getFileName(self):
+return index.html
+
+def getPath(self):
+return os.path.join(self.incrDir.getIncDirBase(),self.getFileName())
+
+class StatusInfo(object):
+def __init__(self, config, date, wikiName):
+self._config = config
+self.date = date
+self.wikiName = wikiName
+self.statusFile = StatusFile(self._config, self.date, self.wikiName)
+
+def getStatus(self, date = None):
+if exists(self.statusFile.getPath(date)):
+status = FileUtils.readFile(self.statusFile.getPath(date)).rstrip()
+if status == done:
+return True
+return False
+
+def setStatus(self, status):
+FileUtils.writeFileInPlace(self.statusFile.getPath(),status, 
self._config.fileperms)
+
+class Lock(object):
+def __init__(self, config, date, wikiName):
+self._config = config
+self.date = date
+self.wikiName = wikiName
+self.lockFile = LockFile(self._config, self.date, self.wikiName)
+
+def isLocked(self):
+return exists(self.lockFile.getPath())
+
+def getLock(self):
+try:
+if not exists(self._config.incrementalsDir):
+os.makedirs(self._config.incrementalsDir)
+f = FileUtils.atomicCreate(self.lockFile.getPath(), w)
+f.write(%s %d % (socket.getfqdn(), os.getpid()))
+f.close()
+return True

[MediaWiki-CVS] SVN: [99435] branches/ariel/xmldumps-backup/WikiDump.py

2011-10-10 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/99435

Revision: 99435
Author:   ariel
Date: 2011-10-10 21:47:20 + (Mon, 10 Oct 2011)
Log Message:
---
function for file size and date

Modified Paths:
--
branches/ariel/xmldumps-backup/WikiDump.py

Modified: branches/ariel/xmldumps-backup/WikiDump.py
===
--- branches/ariel/xmldumps-backup/WikiDump.py  2011-10-10 21:36:52 UTC (rev 
99434)
+++ branches/ariel/xmldumps-backup/WikiDump.py  2011-10-10 21:47:20 UTC (rev 
99435)
@@ -87,6 +87,17 @@
else:
return FileUtils._prettySize(size / 1024.0, quanta[1:])
 
+   def fileInfo(path):
+   Return a tuple of date/time and size of a file, or None, 
None
+   try:
+   timestamp = time.gmtime(os.stat(path).st_mtime)
+   timestamp = time.strftime(%Y-%m-%d %H:%M:%S,timestamp)
+   size = os.path.getsize(path)
+   return (timestamp, size)
+   except:
+   raise
+   return(None, None)
+
fileAge = staticmethod(fileAge)
atomicCreate = staticmethod(atomicCreate)
writeFile = staticmethod(writeFile)
@@ -96,6 +107,7 @@
relativePath = staticmethod(relativePath)
prettySize = staticmethod(prettySize)
_prettySize = staticmethod(_prettySize)
+   fileInfo = staticmethod(fileInfo)
 
 class TimeUtils(object):
 


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [99203] branches/ariel/xmldumps-backup/worker.py

2011-10-07 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/99203

Revision: 99203
Author:   ariel
Date: 2011-10-07 09:36:16 + (Fri, 07 Oct 2011)
Log Message:
---
last het deploy fix (for active abstract filter); remove some comment cruft; 
clean up old symlinks in latest only for the given job; don't barf with 
exception if we are asked to get files from dump dir that hasn't been created

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-10-07 09:26:00 UTC (rev 
99202)
+++ branches/ariel/xmldumps-backup/worker.py2011-10-07 09:36:16 UTC (rev 
99203)
@@ -179,14 +179,26 @@
return( .join(MultiVersion.MWScriptAsArray(config, 
maintenanceScript)))
 
def MWScriptAsArray(config, maintenanceScript):
-   MWScriptLocation = 
os.path.join(config.wikiDir,..,multiversion,MWScript.php)
+   MWScriptLocation = 
os.path.join(config.wikiDir,multiversion,MWScript.php)
if exists(MWScriptLocation):
return [ MWScriptLocation, maintenanceScript ]
else:
return [ %s/maintenance/%s % (config.wikiDir, 
maintenanceScript) ]
 
+   def MWVersion(config, dbName):
+   getVersionLocation = 
os.path.join(config.wikiDir,multiversion,getMWVersion)
+   if exists(getVersionLocation):
+   # run the command for the wiki and get the version
+   command =  getVersionLocation +   +  dbName
+   version = RunSimpleCommand.runAndReturn(command)
+   if version:
+   version = version.rstrip()
+   return version
+   return None
+
MWScriptAsString = staticmethod(MWScriptAsString)
MWScriptAsArray = staticmethod(MWScriptAsArray)
+   MWVersion = staticmethod(MWVersion)
 
 class DbServerInfo(object):
def __init__(self, wiki, dbName, errorCallback = None):
@@ -200,7 +212,6 @@
if (not exists( self.wiki.config.php ) ):
raise BackupError(php command %s not found % 
self.wiki.config.php)
commandList = MultiVersion.MWScriptAsArray(self.wiki.config, 
getSlaveServer.php)
-#  command = %s -q %s/maintenance/getSlaveServer.php --wiki=%s 
--group=dump % MiscUtils.shellEscape((
for i in range(0,len(commandList)):
phpCommand = MiscUtils.shellEscape(self.wiki.config.php)
dbName = MiscUtils.shellEscape(self.dbName)
@@ -267,7 +278,6 @@
if (not exists( self.wiki.config.php ) ):
raise BackupError(php command %s not found % 
self.wiki.config.php)
commandList = MultiVersion.MWScriptAsArray(self.wiki.config, 
eval.php)
-#  command = echo 'print $wgDBprefix; ' | %s -q 
%s/maintenance/eval.php --wiki=%s % MiscUtils.shellEscape((
for i in range(0,len(commandList)):
phpCommand = MiscUtils.shellEscape(self.wiki.config.php)
dbName = MiscUtils.shellEscape(self.dbName)
@@ -891,11 +901,14 @@
if not date:
date = self._wiki.date
directory = os.path.join(self._wiki.publicDir(), date)
-   dirTimeStamp = os.stat(directory).st_mtime
-   if (not date in self._dirCache or dirTimeStamp  
self._dirCacheTime[date]):
+   if exists(directory):
+   dirTimeStamp = os.stat(directory).st_mtime
+   if (not date in self._dirCache or dirTimeStamp  
self._dirCacheTime[date]):
+   return True
+   else:
+   return False
+   else:
return True
-   else:
-   return False
 
# warning: date can also be latest
def getFilesInDir(self, date = None):
@@ -903,15 +916,18 @@
date = self._wiki.date
if (self.dirCacheOutdated(date)):
directory = os.path.join(self._wiki.publicDir(),date)
-   dirTimeStamp = os.stat(directory).st_mtime
-   files = os.listdir(directory)
-   fileObjs = []
-   for f in files:
-   fileObj = DumpFilename(self._wiki)
-   fileObj.newFromFilename(f)
-   fileObjs.append(fileObj)
-   self._dirCache[date] = fileObjs
-   self._dirCacheTime[date] = dirTimeStamp
+   if exists(directory):
+   dirTimeStamp = os.stat

[MediaWiki-CVS] SVN: [98214] branches/ariel/xmldumps-backup

2011-09-27 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/98214

Revision: 98214
Author:   ariel
Date: 2011-09-27 09:16:37 + (Tue, 27 Sep 2011)
Log Message:
---
allow for self-termination via 'maintenance mode'

Modified Paths:
--
branches/ariel/xmldumps-backup/monitor.py
branches/ariel/xmldumps-backup/worker
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/monitor.py
===
--- branches/ariel/xmldumps-backup/monitor.py   2011-09-27 08:51:19 UTC (rev 
98213)
+++ branches/ariel/xmldumps-backup/monitor.py   2011-09-27 09:16:37 UTC (rev 
98214)
@@ -3,6 +3,8 @@
 import os
 import sys
 import WikiDump
+from os.path import exists
+from WikiDump import FileUtils
 
 # can specify name of alternate config file
 if (sys.argv[1]):
@@ -32,6 +34,8 @@

if running:
status = Dumps are in progress...
+   elif exists(maintenance.txt):
+   status = FileUtils.readFile(maintenance.txt)
else:
status = Dump process is idle.


Modified: branches/ariel/xmldumps-backup/worker
===
--- branches/ariel/xmldumps-backup/worker   2011-09-27 08:51:19 UTC (rev 
98213)
+++ branches/ariel/xmldumps-backup/worker   2011-09-27 09:16:37 UTC (rev 
98214)
@@ -7,11 +7,16 @@
 fi
 
 while true; do
-if [ ! -z $configFile ]; then
-   python $WIKIDUMP_BASE/worker.py --configfile $configFile
+if [ -e maintenance.txt ]; then
+   echo in maintenance mode, sleeping 5 minutes
+   sleep 300
 else
-   python $WIKIDUMP_BASE/worker.py
+   if [ ! -z $configFile ]; then
+   python $WIKIDUMP_BASE/worker.py --configfile $configFile
+   else
+   python $WIKIDUMP_BASE/worker.py
+   fi
+   echo sleeping
+   sleep 30
 fi
-echo sleeping
-sleep 30
 done

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-09-27 08:51:19 UTC (rev 
98213)
+++ branches/ariel/xmldumps-backup/worker.py2011-09-27 09:16:37 UTC (rev 
98214)
@@ -24,6 +24,26 @@
 from WikiDump import FileUtils, MiscUtils, TimeUtils
 from CommandManagement import CommandPipeline, CommandSeries, 
CommandsInParallel
 
+class Maintenance(object):
+
+   def inMaintenanceMode():
+   Use this to let callers know that we really should not
+   be running.  Callers should try to exit the job
+   they are running as soon as possible.
+   return exists(maintenance.txt)
+
+   def exitIfInMaintenanceMode(message = None):
+   Call this from possible exit points of running jobs
+   in order to exit if we need to
+   if Maintenance.inMaintenanceMode():
+   if message:
+   raise BackupError(message)
+   else:
+   raise BackupError(In maintenance mode, 
exiting.)
+   
+   inMaintenanceMode = staticmethod(inMaintenanceMode)
+   exitIfInMaintenanceMode = staticmethod(exitIfInMaintenanceMode)
+   
 class Logger(object):
 
def __init__(self, logFileName=None):
@@ -1735,6 +1755,8 @@
# mark all the following jobs to run as well 
self.dumpItemList.markFollowingJobsToRun()
 
+   Maintenance.exitIfInMaintenanceMode(In maintenance mode, 
exiting dump of %s % self.dbName )
+
self.makeDir(os.path.join(self.wiki.publicDir(), 
self.wiki.date))
self.makeDir(os.path.join(self.wiki.privateDir(), 
self.wiki.date))
 
@@ -1752,6 +1774,8 @@
 
for item in self.dumpItemList.dumpItems:
if (item.toBeRun()):
+   Maintenance.exitIfInMaintenanceMode(In 
maintenance mode, exiting dump of %s at step %s % ( self.dbName, 
self.jobRequested ) )
+
item.start(self)
self.status.updateStatusFiles()

self.runInfoFile.saveDumpRunInfoFile(self.dumpItemList.reportDumpRunInfo())


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [98215] branches/ariel/xmldumps-backup

2011-09-27 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/98215

Revision: 98215
Author:   ariel
Date: 2011-09-27 10:12:36 + (Tue, 27 Sep 2011)
Log Message:
---
quit generating dumps after n failures in a row (n = 3 for now), we probably 
have some serious problem

Modified Paths:
--
branches/ariel/xmldumps-backup/worker
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker
===
--- branches/ariel/xmldumps-backup/worker   2011-09-27 09:16:37 UTC (rev 
98214)
+++ branches/ariel/xmldumps-backup/worker   2011-09-27 10:12:36 UTC (rev 
98215)
@@ -1,5 +1,10 @@
 #!/bin/bash
 
+# number of failures of worker.py in a row before we decide
+# something serious is broken and we refuse to run
+MAXFAILS=3
+failures=0
+
 WIKIDUMP_BASE=`dirname $0`
 
 if [ ! -z $1 ]; then
@@ -7,7 +12,7 @@
 fi
 
 while true; do
-if [ -e maintenance.txt ]; then
+if [ -e $WIKIDUMP_BASE/maintenance.txt ]; then
echo in maintenance mode, sleeping 5 minutes
sleep 300
 else
@@ -16,6 +21,13 @@
else
python $WIKIDUMP_BASE/worker.py
fi
+   if [ $? -ne 0 ]; then
+   failures=$(($failures+1))
+   if [ $failures -gt $MAXFAILS ]; then
+   echo more than $MAXFAILS failures in a row, halting.
+   exit 1
+   fi
+   fi
echo sleeping
sleep 30
 fi

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-09-27 09:16:37 UTC (rev 
98214)
+++ branches/ariel/xmldumps-backup/worker.py2011-09-27 10:12:36 UTC (rev 
98215)
@@ -1832,6 +1832,12 @@


self.showRunnerStateComplete()
 
+   # let caller know if this was a successful run
+   if self.status.failCount  0:
+   return False
+   else:
+   return True
+
def cleanOldDumps(self):
if self._cleanOldDumpsEnabled:
old = self.wiki.dumpDirs()
@@ -3706,6 +3712,7 @@
chunkToDo = False
checkpointFile = None
pageIDRange = None
+   result = False
 
try:
(options, remainder) = getopt.gnu_getopt(sys.argv[1:], 
,
@@ -3806,11 +3813,16 @@
print Running %s, job %s... % (wiki.dbName, 
jobRequested)
else:
print Running %s... % wiki.dbName
-   runner.run()
+   result = runner.run()
# if we are doing one piece only of the dump, we don't 
unlock either
if locksEnabled:
wiki.unlock()
else:
print No wikis available to run.
+   result = True
finally:
WikiDump.cleanup()
+   if result == False:
+   sys.exit(1)
+   else:
+   sys.exit(0)


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [98115] branches/ariel/xmldumps-backup/dumpcentralauth.sh

2011-09-26 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/98115

Revision: 98115
Author:   ariel
Date: 2011-09-26 07:05:23 + (Mon, 26 Sep 2011)
Log Message:
---
fix path for db.php; add mwscript wrapper

Modified Paths:
--
branches/ariel/xmldumps-backup/dumpcentralauth.sh

Modified: branches/ariel/xmldumps-backup/dumpcentralauth.sh
===
--- branches/ariel/xmldumps-backup/dumpcentralauth.sh   2011-09-26 06:24:55 UTC 
(rev 98114)
+++ branches/ariel/xmldumps-backup/dumpcentralauth.sh   2011-09-26 07:05:23 UTC 
(rev 98115)
@@ -16,9 +16,13 @@
echo exiting...
exit 1
 fi
-dbcluster=`grep centralauth /apache/common/php/wmf-config/db.php | awk -F' ' 
{ print $4 }'`
-wiki=`grep $dbcluster /apache/common/php/wmf-config/db.php | grep wiki | head 
-1 | awk -F' ' { print $2 }'`
-host=`echo 'echo wfGetLB()-getServerName(0);' | php 
/apache/common/php/maintenance/eval.php $wiki`
+if [ ! -f /apache/common/wmf-config/db.php ]; then
+   echo failed to find db.php, exiting...
+   exit 1
+fi
+dbcluster=`grep centralauth /apache/common/wmf-config/db.php | awk -F' ' { 
print $4 }'`
+wiki=`grep $dbcluster /apache/common/wmf-config/db.php | grep wiki | head -1 | 
awk -F' ' { print $2 }'`
+host=`echo 'echo wfGetLB()-getServerName(0);' | php 
/apache/common/multiversion/MWScript.php eval.php $wiki`
 if [ -z $dbcluster -o -z $wiki -o -z $host ]; then
echo can't locate db server for centralauth, exiting.
exit 1


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [97889] trunk/phase3/maintenance/dumpTextPass.php

2011-09-23 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97889

Revision: 97889
Author:   ariel
Date: 2011-09-23 06:15:20 + (Fri, 23 Sep 2011)
Log Message:
---
handle naming checkpoint file with first/last pageID when the file is empty

Modified Paths:
--
trunk/phase3/maintenance/dumpTextPass.php

Modified: trunk/phase3/maintenance/dumpTextPass.php
===
--- trunk/phase3/maintenance/dumpTextPass.php   2011-09-23 05:50:11 UTC (rev 
97888)
+++ trunk/phase3/maintenance/dumpTextPass.php   2011-09-23 06:15:20 UTC (rev 
97889)
@@ -286,8 +286,19 @@
// we wrote some stuff after last checkpoint that needs 
renamed
if (file_exists($filenameList[0])) {
$newFilenames = array();
-   $firstPageID = 
str_pad($this-firstPageWritten,9,0,STR_PAD_LEFT);
-   $lastPageID = 
str_pad($this-lastPageWritten,9,0,STR_PAD_LEFT);
+   # we might have just written the header and 
footer and had no 
+   # pages or revisions written... perhaps they 
were all deleted
+   # there's no pageID 0 so we use that. the 
caller is responsible
+   # for deciding what to do with a file 
containing only the
+   # siteinfo information and the mw tags.
+   if (! $this-firstPageWritten) {
+   $firstPageID = 
str_pad(0,9,0,STR_PAD_LEFT);
+   $lastPageID = 
str_pad(0,9,0,STR_PAD_LEFT);
+   }
+   else {
+   $firstPageID = 
str_pad($this-firstPageWritten,9,0,STR_PAD_LEFT);
+   $lastPageID = 
str_pad($this-lastPageWritten,9,0,STR_PAD_LEFT);
+   }
for ( $i = 0; $i  count( $filenameList ); $i++ 
) {
$checkpointNameFilledIn = sprintf( 
$this-checkpointFiles[$i], $firstPageID, $lastPageID );
$fileinfo = pathinfo($filenameList[$i]);


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [97895] trunk/phase3/maintenance/dumpTextPass.php

2011-09-23 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97895

Revision: 97895
Author:   ariel
Date: 2011-09-23 07:48:30 + (Fri, 23 Sep 2011)
Log Message:
---
add mwscript handling for call of fetchText.php maintenance script

Modified Paths:
--
trunk/phase3/maintenance/dumpTextPass.php

Modified: trunk/phase3/maintenance/dumpTextPass.php
===
--- trunk/phase3/maintenance/dumpTextPass.php   2011-09-23 07:32:41 UTC (rev 
97894)
+++ trunk/phase3/maintenance/dumpTextPass.php   2011-09-23 07:48:30 UTC (rev 
97895)
@@ -427,12 +427,23 @@
function openSpawn() {
global $IP;
 
-   $cmd = implode(  ,
-   array_map( 'wfEscapeShellArg',
-   array(
-   $this-php,
-   $IP/maintenance/fetchText.php,
-   '--wiki', wfWikiID() ) ) );
+   if ( file_exists( $IP/../multiversion/MWScript.php ) ) {
+   $cmd = implode(  ,
+   array_map( 'wfEscapeShellArg',
+   array(
+   $this-php,
+   
$IP/../multiversion/MWScript.php,
+   fetchText.php,
+   '--wiki', wfWikiID() ) ) );
+   }
+   else {
+   $cmd = implode(  ,
+   array_map( 'wfEscapeShellArg',
+   array(
+   $this-php,
+   $IP/maintenance/fetchText.php,
+   '--wiki', wfWikiID() ) ) );
+   }
$spec = array(
0 = array( pipe, r ),
1 = array( pipe, w ),


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [97897] branches/wmf/1.17wmf1/maintenance/dumpTextPass.php

2011-09-23 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97897

Revision: 97897
Author:   ariel
Date: 2011-09-23 08:16:40 + (Fri, 23 Sep 2011)
Log Message:
---
mft r97895 (call MWScript for invocation of fetchText.php, if needed)

Modified Paths:
--
branches/wmf/1.17wmf1/maintenance/dumpTextPass.php

Modified: branches/wmf/1.17wmf1/maintenance/dumpTextPass.php
===
--- branches/wmf/1.17wmf1/maintenance/dumpTextPass.php  2011-09-23 08:07:28 UTC 
(rev 97896)
+++ branches/wmf/1.17wmf1/maintenance/dumpTextPass.php  2011-09-23 08:16:40 UTC 
(rev 97897)
@@ -307,12 +307,23 @@
function openSpawn() {
global $IP;
 
-   $cmd = implode(  ,
-   array_map( 'wfEscapeShellArg',
-   array(
-   $this-php,
-   $IP/maintenance/fetchText.php,
-   '--wiki', wfWikiID() ) ) );
+   if ( file_exists( $IP/../multiversion/MWScript.php ) ) {
+   $cmd = implode(  ,
+   array_map( 'wfEscapeShellArg',
+   array(
+   $this-php,
+   
$IP/../multiversion/MWScript.php,
+   fetchText.php,
+   '--wiki', wfWikiID() ) ) );
+   }
+   else {
+   $cmd = implode(  ,
+   array_map( 'wfEscapeShellArg',
+   array(
+   $this-php,
+   $IP/maintenance/fetchText.php,
+   '--wiki', wfWikiID() ) ) );
+   }
$spec = array(
0 = array( pipe, r ),
1 = array( pipe, w ),


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [97898] branches/wmf/1.18wmf1/maintenance/dumpTextPass.php

2011-09-23 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97898

Revision: 97898
Author:   ariel
Date: 2011-09-23 08:17:19 + (Fri, 23 Sep 2011)
Log Message:
---
mft r97895 (call MWScript for invocation of fetchText.php, if needed)

Modified Paths:
--
branches/wmf/1.18wmf1/maintenance/dumpTextPass.php

Modified: branches/wmf/1.18wmf1/maintenance/dumpTextPass.php
===
--- branches/wmf/1.18wmf1/maintenance/dumpTextPass.php  2011-09-23 08:16:40 UTC 
(rev 97897)
+++ branches/wmf/1.18wmf1/maintenance/dumpTextPass.php  2011-09-23 08:17:19 UTC 
(rev 97898)
@@ -422,12 +422,23 @@
function openSpawn() {
global $IP;
 
-   $cmd = implode(  ,
-   array_map( 'wfEscapeShellArg',
-   array(
-   $this-php,
-   $IP/maintenance/fetchText.php,
-   '--wiki', wfWikiID() ) ) );
+   if ( file_exists( $IP/../multiversion/MWScript.php ) ) {
+   $cmd = implode(  ,
+   array_map( 'wfEscapeShellArg',
+   array(
+   $this-php,
+   
$IP/../multiversion/MWScript.php,
+   fetchText.php,
+   '--wiki', wfWikiID() ) ) );
+   }
+   else {
+   $cmd = implode(  ,
+   array_map( 'wfEscapeShellArg',
+   array(
+   $this-php,
+   $IP/maintenance/fetchText.php,
+   '--wiki', wfWikiID() ) ) );
+   }
$spec = array(
0 = array( pipe, r ),
1 = array( pipe, w ),


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [97946] branches/ariel/xmldumps-backup/worker.py

2011-09-23 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97946

Revision: 97946
Author:   ariel
Date: 2011-09-23 18:56:51 + (Fri, 23 Sep 2011)
Log Message:
---
fold in mwscript wrapper for calls to various php maintenance scripts

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-09-23 18:25:10 UTC (rev 
97945)
+++ branches/ariel/xmldumps-backup/worker.py2011-09-23 18:56:51 UTC (rev 
97946)
@@ -154,6 +154,20 @@
return 0
return chunks
 
+class MultiVersion(object):
+   def MWScriptAsString(config, maintenanceScript):
+   return( .join(MultiVersion.MWScriptAsArray(config, 
maintenanceScript)))
+
+   def MWScriptAsArray(config, maintenanceScript):
+   MWScriptLocation = 
os.path.join(config.wikiDir,..,multiversion,MWScript.php)
+   if exists(MWScriptLocation):
+   return [ MWScriptLocation, maintenanceScript ]
+   else:
+   return [ %s/maintenance/%s % (config.wikiDir, 
maintenanceScript) ]
+
+   MWScriptAsString = staticmethod(MWScriptAsString)
+   MWScriptAsArray = staticmethod(MWScriptAsArray)
+
 class DbServerInfo(object):
def __init__(self, wiki, dbName, errorCallback = None):
self.wiki = wiki
@@ -165,8 +179,14 @@
# if this fails what do we do about it? Not a bleeping thing. 
*ugh* FIXME!!
if (not exists( self.wiki.config.php ) ):
raise BackupError(php command %s not found % 
self.wiki.config.php)
-   command = %s -q %s/maintenance/getSlaveServer.php --wiki=%s 
--group=dump % MiscUtils.shellEscape((
-   self.wiki.config.php, self.wiki.config.wikiDir, 
self.dbName))
+   commandList = MultiVersion.MWScriptAsArray(self.wiki.config, 
getSlaveServer.php)
+#  command = %s -q %s/maintenance/getSlaveServer.php --wiki=%s 
--group=dump % MiscUtils.shellEscape((
+   for i in range(0,len(commandList)):
+   phpCommand = MiscUtils.shellEscape(self.wiki.config.php)
+   dbName = MiscUtils.shellEscape(self.dbName)
+   commandList[i] = MiscUtils.shellEscape(commandList[i])
+   command =  .join(commandList)
+   command = %s -q %s --wiki=%s --group=dump % 
(phpCommand, command, dbName)
return RunSimpleCommand.runAndReturn(command, 
self.errorCallback).strip()
 
def selectDatabaseServer(self):
@@ -226,8 +246,14 @@
# FIXME later full path
if (not exists( self.wiki.config.php ) ):
raise BackupError(php command %s not found % 
self.wiki.config.php)
-   command = echo 'print $wgDBprefix; ' | %s -q 
%s/maintenance/eval.php --wiki=%s % MiscUtils.shellEscape((
-   self.wiki.config.php, self.wiki.config.wikiDir, 
self.dbName))
+   commandList = MultiVersion.MWScriptAsArray(self.wiki.config, 
eval.php)
+#  command = echo 'print $wgDBprefix; ' | %s -q 
%s/maintenance/eval.php --wiki=%s % MiscUtils.shellEscape((
+   for i in range(0,len(commandList)):
+   phpCommand = MiscUtils.shellEscape(self.wiki.config.php)
+   dbName = MiscUtils.shellEscape(self.dbName)
+   commandList[i] = MiscUtils.shellEscape(commandList[i])
+   command =  .join(commandList)
+   command = echo 'print $wgDBprefix; ' | %s -q %s 
--wiki=%s % (phpCommand, command, dbName)
return RunSimpleCommand.runAndReturn(command, 
self.errorCallback).strip()
  
 
@@ -2637,15 +2663,17 @@
articlesFile = runner.dumpDir.filenamePublicPath(f)
historyFile = 
runner.dumpDir.filenamePublicPath(DumpFilename(runner.wiki, f.date, 
self.historyDumpName, f.fileType, f.fileExt, f.chunk, f.checkpoint, f.temp))
currentFile = 
runner.dumpDir.filenamePublicPath(DumpFilename(runner.wiki, f.date, 
self.currentDumpName, f.fileType, f.fileExt, f.chunk, f.checkpoint, f.temp))
-   command = [ %s % runner.wiki.config.php,
-   -q, %s/maintenance/dumpBackup.php % 
runner.wiki.config.wikiDir,
-   --wiki=%s % runner.dbName,
-   --full, --stub, --report=1,
-   %s % runner.forceNormalOption(),
-   --output=gzip:%s % historyFile,
-   --output=gzip:%s % currentFile,
-   --filter=latest, --output=gzip:%s % 
articlesFile,
-   --filter=latest, --filter

[MediaWiki-CVS] SVN: [97951] branches/ariel/xmldumps-backup/monitor.py

2011-09-23 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97951

Revision: 97951
Author:   ariel
Date: 2011-09-23 20:15:18 + (Fri, 23 Sep 2011)
Log Message:
---
replace call to removed function with a few lines of inline code

Modified Paths:
--
branches/ariel/xmldumps-backup/monitor.py

Modified: branches/ariel/xmldumps-backup/monitor.py
===
--- branches/ariel/xmldumps-backup/monitor.py   2011-09-23 20:09:54 UTC (rev 
97950)
+++ branches/ariel/xmldumps-backup/monitor.py   2011-09-23 20:15:18 UTC (rev 
97951)
@@ -41,7 +41,11 @@

 def updateIndex():
outputFileName = os.path.join(config.publicDir, config.index)
-   WikiDump.dumpFile(outputFileName, generateIndex())
+   tempFilename = outputFileName + .tmp
+   file = open(tempFilename, wt)
+   file.write(generateIndex())
+   file.close()
+   os.rename(tempFilename, outputFileName)
 
 if __name__ == __main__:
updateIndex()


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [97409] trunk/backup/legal.html

2011-09-18 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97409

Revision: 97409
Author:   ariel
Date: 2011-09-18 08:02:53 + (Sun, 18 Sep 2011)
Log Message:
---
add CC-BY_SA license for text, plus pointer to terms of use

Modified Paths:
--
trunk/backup/legal.html

Modified: trunk/backup/legal.html
===
--- trunk/backup/legal.html 2011-09-18 05:43:40 UTC (rev 97408)
+++ trunk/backup/legal.html 2011-09-18 08:02:53 UTC (rev 97409)
@@ -11,7 +11,9 @@
 h1Copyright and license/h1
 pAll original textual content except Wikinews original textual content is
 licensed under the a 
href=http://www.wikipedia.org/wiki/Wikipedia:Copyrights; title=Wikipedia 
Copyrights
-GNU Free Documentation License/a (GFDL).
+GNU Free Documentation License/a (GFDL) and the 
+a href=http://creativecommons.org/licenses/by-sa/3.0/; title=Creative 
Commons Attribution-Share-Alike 3.0 LicenseCreative Commons 
Attribution-Share-Alike 3.0 License/a.  Some text may be available only under 
the Creative Commons license; see our
+a href=http://wikimediafoundation.org/wiki/Terms_of_use;Terms of Use/a 
for details.
 Text written by some authors may be released under additional licenses
 or into the public domain. Some text (including quotations) may
 be used under fair use, usually where it is believed that the use


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [97410] branches/ariel/xmldumps-backup/legal.html

2011-09-18 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97410

Revision: 97410
Author:   ariel
Date: 2011-09-18 08:04:10 + (Sun, 18 Sep 2011)
Log Message:
---
add CC-BY_SA license for text, plus pointer to terms of use (mft r97409)

Modified Paths:
--
branches/ariel/xmldumps-backup/legal.html

Modified: branches/ariel/xmldumps-backup/legal.html
===
--- branches/ariel/xmldumps-backup/legal.html   2011-09-18 08:02:53 UTC (rev 
97409)
+++ branches/ariel/xmldumps-backup/legal.html   2011-09-18 08:04:10 UTC (rev 
97410)
@@ -11,7 +11,9 @@
 h1Copyright and license/h1
 pAll original textual content except Wikinews original textual content is
 licensed under the a 
href=http://www.wikipedia.org/wiki/Wikipedia:Copyrights; title=Wikipedia 
Copyrights
-GNU Free Documentation License/a (GFDL).
+GNU Free Documentation License/a (GFDL) and the 
+a href=http://creativecommons.org/licenses/by-sa/3.0/; title=Creative 
Commons Attribution-Share-Alike 3.0 LicenseCreative Commons 
Attribution-Share-Alike 3.0 License/a.  Some text may be available only under 
the Creative Commons license; see our
+a href=http://wikimediafoundation.org/wiki/Terms_of_use;Terms of Use/a 
for details.
 Text written by some authors may be released under additional licenses
 or into the public domain. Some text (including quotations) may
 be used under fair use, usually where it is believed that the use


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [97245] branches/ariel/xmldumps-backup/worker.py

2011-09-16 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97245

Revision: 97245
Author:   ariel
Date: 2011-09-16 07:40:24 + (Fri, 16 Sep 2011)
Log Message:
---
redo checkpoint file for history 7z step

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-09-16 06:51:27 UTC (rev 
97244)
+++ branches/ariel/xmldumps-backup/worker.py2011-09-16 07:40:24 UTC (rev 
97245)
@@ -609,7 +609,7 @@
  metahistory7zdump,
  All pages with complete edit history 
(.7z),
  These dumps can be *very* large, 
uncompressing up to 100 times the archive download size.  +
- Suitable for archival and 
statistical use, most mirror sites won't want or need this., 
self.findItemByName('metahistorybz2dump'), self.wiki, 
self._getChunkToDo(metahistory7zdump), 
self.chunkInfo.getPagesPerChunkHistory(), self.checkpointFile))
+ Suitable for archival and 
statistical use, most mirror sites won't want or need this., 
self.findItemByName('metahistorybz2dump'), self.wiki, 
self._getChunkToDo(metahistory7zdump), 
self.chunkInfo.getPagesPerChunkHistory(), checkpoints, self.checkpointFile))
if (self.chunkInfo.chunksEnabled() and 
self.chunkInfo.recombineHistory()):
self.dumpItems.append(

RecombineXmlRecompressDump(metahistory7zdumprecombine,
@@ -1814,9 +1814,7 @@
# of that very file. meh. how likely is 
it that we 
# have one? these files are time based 
and the start/end pageids
# are going to fluctuate. whatever
-   cf = DumpFilename(self.wiki)
-   cf.newFromFilename(item.checkpointFile)
-   checkpoint = cf.checkpoint
+   checkpoint = 
item.checkpointFile.checkpoint
 
for d in dumpNames:

self.symLinks.removeSymLinksFromOldRuns(self.wiki.date, d, chunk, checkpoint )
@@ -3135,7 +3133,7 @@
 class XmlRecompressDump(Dump):
Take a .bz2 and recompress it as 7-Zip.
 
-   def __init__(self, subset, name, desc, detail, itemForRecompression, 
wiki, chunkToDo, chunks = False, checkpoints = False):
+   def __init__(self, subset, name, desc, detail, itemForRecompression, 
wiki, chunkToDo, chunks = False, checkpoints = False, checkpointFile = None):
self._subset = subset
self._detail = detail
self._chunks = chunks
@@ -3146,6 +3144,7 @@
self.itemForRecompression = itemForRecompression
if checkpoints:
self._checkpointsEnabled = True
+   self.checkpointFile = checkpointFile
Dump.__init__(self, name, desc)
 
def getDumpName(self):
@@ -3182,7 +3181,11 @@
commands = []
# Remove prior 7zip attempts; 7zip will try to append to an 
existing archive
self.cleanupOldFiles(runner.dumpDir)
-   if self._chunksEnabled and not self._chunkToDo:
+   if self.checkpointFile:
+   outputFile = DumpFilename(self.wiki, None, 
self.checkpointFile.dumpName, self.checkpointFile.fileType, self.fileExt, 
self.checkpointFile.chunk, self.checkpointFile.checkpoint) 
+   series = self.buildCommand(runner, [ outputFile ])
+   commands.append(series)
+   elif self._chunksEnabled and not self._chunkToDo:
# must set up each parallel job separately, they may 
have checkpoint files that
# need to be processed in series, it's a special case
for i in range(1, len(self._chunks)+1):


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [97295] branches/ariel/xmldumps-backup

2011-09-16 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97295

Revision: 97295
Author:   ariel
Date: 2011-09-16 15:44:53 + (Fri, 16 Sep 2011)
Log Message:
---
fix typo in error message; make ListOutputFilesToPublish for xml stub recombine 
step work; rerun xml dump step from given pageid (poorly); remove commented out 
try stanzas frmo debugging

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py
branches/ariel/xmldumps-backup/writeuptopageid.c

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-09-16 15:24:57 UTC (rev 
97294)
+++ branches/ariel/xmldumps-backup/worker.py2011-09-16 15:44:53 UTC (rev 
97295)
@@ -485,7 +485,7 @@
self._toBeRun = toBeRun
 
 class DumpItemList(object):
-   def __init__(self, wiki, prefetch, spawn, chunkToDo, checkpointFile, 
singleJob, chunkInfo, runInfoFile, dumpDir):
+   def __init__(self, wiki, prefetch, spawn, chunkToDo, checkpointFile, 
singleJob, chunkInfo, pageIDRange, runInfoFile, dumpDir):
self.wiki = wiki
self._hasFlaggedRevs = self.wiki.hasFlaggedRevs()
self._prefetch = prefetch
@@ -496,6 +496,8 @@
self._singleJob = singleJob
self._runInfoFile = runInfoFile
self.dumpDir = dumpDir
+   self.pageIDRange = pageIDRange
+
if self.wiki.config.checkpointTime:
checkpoints = True
else:
@@ -570,7 +572,7 @@
XmlDump(articles,
articlesdump,
bigbArticles, templates, image 
descriptions, and primary meta-pages./b/big,
-   This contains current versions of article 
content, and is the archive most mirror sites will probably want., 
self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, 
self._getChunkToDo(articlesdump), self.chunkInfo.getPagesPerChunkHistory(), 
checkpoints, self.checkpointFile))
+   This contains current versions of article 
content, and is the archive most mirror sites will probably want., 
self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, 
self._getChunkToDo(articlesdump), self.chunkInfo.getPagesPerChunkHistory(), 
checkpoints, self.checkpointFile, self.pageIDRange))
if (self.chunkInfo.chunksEnabled()):

self.dumpItems.append(RecombineXmlDump(articlesdumprecombine, 
bigbRecombine articles, templates, image descriptions, and primary 
meta-pages./b/big,This contains current versions of article content, and 
is the archive most mirror sites will probably want.,  
self.findItemByName('articlesdump')))

@@ -578,7 +580,7 @@
XmlDump(meta-current,
metacurrentdump,
All pages, current versions only.,
-   Discussion and user pages are included in this 
complete archive. Most mirrors won't want this extra material., 
self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, 
self._getChunkToDo(metacurrentdump), 
self.chunkInfo.getPagesPerChunkHistory(), checkpoints, self.checkpointFile))
+   Discussion and user pages are included in this 
complete archive. Most mirrors won't want this extra material., 
self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, 
self._getChunkToDo(metacurrentdump), 
self.chunkInfo.getPagesPerChunkHistory(), checkpoints, self.checkpointFile, 
self.pageIDRange))

if (self.chunkInfo.chunksEnabled()):

self.dumpItems.append(RecombineXmlDump(metacurrentdumprecombine, Recombine 
all pages, current versions only.,Discussion and user pages are included in 
this complete archive. Most mirrors won't want this extra material., 
self.findItemByName('metacurrentdump')))
@@ -597,7 +599,7 @@
   metahistorybz2dump,
   All pages with complete page edit history 
(.bz2),
   These dumps can be *very* large, 
uncompressing up to 20 times the archive download size.  +
-  Suitable for archival and statistical use, 
most mirror sites won't want or need this., 
self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, 
self._getChunkToDo(metahistorybz2dump), 
self.chunkInfo.getPagesPerChunkHistory(), checkpoints, self.checkpointFile))
+  Suitable for archival and statistical use, 
most mirror sites won't want or need this., 
self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, 
self._getChunkToDo(metahistorybz2dump

[MediaWiki-CVS] SVN: [97178] trunk/phase3/includes/Export.php

2011-09-15 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/97178

Revision: 97178
Author:   ariel
Date: 2011-09-15 17:18:13 + (Thu, 15 Sep 2011)
Log Message:
---
throw exception if rename of output file fails

Modified Paths:
--
trunk/phase3/includes/Export.php

Modified: trunk/phase3/includes/Export.php
===
--- trunk/phase3/includes/Export.php2011-09-15 16:42:22 UTC (rev 97177)
+++ trunk/phase3/includes/Export.php2011-09-15 17:18:13 UTC (rev 97178)
@@ -759,8 +759,10 @@
}
if ( $newname ) {
fclose( $this-handle );
-   rename( $this-filename, $newname );
-   if ( $open ) {
+   if (! rename( $this-filename, $newname ) ) {
+   throw new MWException( __METHOD__ . : rename 
of file {$this-filename} to $newname failed\n );
+   }
+   elseif ( $open ) {
$this-handle = fopen( $this-filename, wt );
}
}
@@ -814,8 +816,10 @@
if ( $newname ) {
fclose( $this-handle );
proc_close( $this-procOpenResource );
-   rename( $this-filename, $newname );
-   if ( $open ) {
+   if (! rename( $this-filename, $newname ) ) {
+   throw new MWException( __METHOD__ . : rename 
of file {$this-filename} to $newname failed\n );
+   }
+   elseif ( $open ) {
$command = $this-command;
$command .= . wfEscapeShellArg( 
$this-filename );
$this-startCommand( $command );
@@ -873,8 +877,10 @@
if ( $newname ) {
fclose( $this-handle );
proc_close( $this-procOpenResource );
-   rename( $this-filename, $newname );
-   if ( $open ) {
+   if (! rename( $this-filename, $newname ) ) {
+   throw new MWException( __METHOD__ . : rename 
of file {$this-filename} to $newname failed\n );
+   }
+   elseif ( $open ) {
$command = 7za a -bd -si  . wfEscapeShellArg( 
$file );
// Suppress annoying useless crap from p7zip
// Unfortunately this could suppress real error 
messages too


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [96826] branches/ariel/xmldumps-backup/worker.py

2011-09-12 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/96826

Revision: 96826
Author:   ariel
Date: 2011-09-12 07:38:15 + (Mon, 12 Sep 2011)
Log Message:
---
remove some dead fixmes, add _chunkToDo to base Dump class, seriously cleanup 
linking to rss feed files and removal of old latest links, this were pretty 
broken after checkpoint files went in

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-09-12 02:10:11 UTC (rev 
96825)
+++ branches/ariel/xmldumps-backup/worker.py2011-09-12 07:38:15 UTC (rev 
96826)
@@ -1792,6 +1792,35 @@
dumpFile = DumpFilename(self.wiki, None, 
self.checksums.getChecksumFileNameBasename())
self.symLinks.saveSymlink(dumpFile)
self.symLinks.cleanupSymLinks()
+
+   for item in self.dumpItemList.dumpItems:
+   dumpNames = item.getDumpName()
+   if type(dumpNames).__name__!='list':
+   dumpNames = [ dumpNames ]
+
+   if (item._chunksEnabled):
+   # if there is a specific chunk, we want to only 
clear out
+   # old files for that piece, because new files 
for the other
+   # pieces may not have been generated yet.
+   chunk = item._chunkToDo
+   else:
+   chunk = None
+
+   checkpoint = None
+   if (item._checkpointsEnabled):
+   if (item.checkpointFile):
+   # if there's a specific checkpoint file 
we are
+   # rerunning, we would only clear out 
old copies
+   # of that very file. meh. how likely is 
it that we 
+   # have one? these files are time based 
and the start/end pageids
+   # are going to fluctuate. whatever
+   cf = DumpFilename(self.wiki)
+   cf.newFromFilename(item.checkpointFile)
+   checkpoint = cf.checkpoint
+
+   for d in dumpNames:
+   
self.symLinks.removeSymLinksFromOldRuns(self.wiki.date, d, chunk, checkpoint )
+
self.feeds.cleanupFeeds()
 
def makeDir(self, dir):
@@ -1826,10 +1855,15 @@
link = os.path.join(self.dumpDir.latestDir(), 
latestFilename)
if exists(link) or os.path.islink(link):
if os.path.islink(link):
-   realfile = os.readlink(link)
+   oldrealfile = os.readlink(link)
# format of these links should be...  
../20110228/elwikidb-20110228-templatelinks.sql.gz
rellinkpattern = 
re.compile('^\.\./(20[0-9]+)/')
-   dateinterval = int(self.wiki.date) - 
int(dumpFile.date)
+   dateinlink = 
rellinkpattern.search(oldrealfile)
+   if (dateinlink):
+   dateoflinkedfile = 
dateinlink.group(1)
+   dateinterval = 
int(self.wiki.date) - int(dateoflinkedfile)
+   else:
+   dateinterval = 0
# no file or it's older than ours... 
*then* remove the link
if not exists(os.path.realpath(link)) 
or dateinterval  0:
self.debugfn(Removing old 
symlink %s % link)
@@ -1854,6 +1888,34 @@
if not 
exists(os.path.join(latestDir,realfile)):
os.remove(link)
 
+   # if the args are False or None, we remove all the old links for all 
values of the arg.
+   # example: if chunk is False or None then we remove all old values for 
all chunks
+   # old means older than the specified datestring.
+   def removeSymLinksFromOldRuns(self, dateString, dumpName=None, 
chunk=None, checkpoint=None):
+   # fixme this needs to do more work if there are chunks or 
checkpoint files linked in here from 
+   # earlier dates. checkpoint ranges change, and configuration of 
chunks changes too, so maybe
+   # old files still exist and the links need to be removed 
because we have newer files

[MediaWiki-CVS] SVN: [96648] trunk/phase3

2011-09-09 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/96648

Revision: 96648
Author:   ariel
Date: 2011-09-09 07:28:11 + (Fri, 09 Sep 2011)
Log Message:
---
getFilename renamed to getFilenames since it can return a list

Modified Paths:
--
trunk/phase3/includes/Export.php
trunk/phase3/maintenance/dumpTextPass.php

Modified: trunk/phase3/includes/Export.php
===
--- trunk/phase3/includes/Export.php2011-09-09 05:17:05 UTC (rev 96647)
+++ trunk/phase3/includes/Export.php2011-09-09 07:28:11 UTC (rev 96648)
@@ -724,7 +724,7 @@
 * Returns the name of the file or files which are
 * being written to, if there are any.
 */
-   function getFilename() {
+   function getFilenames() {
return NULL;
}
 }
@@ -766,7 +766,7 @@
}
}
 
-   function getFilename() {
+   function getFilenames() {
return $this-filename;
}
 }
@@ -938,8 +938,8 @@
$this-sink-closeAndRename( $newname, $open );
}
 
-   function getFilename() {
-   return $this-sink-getFilename();
+   function getFilenames() {
+   return $this-sink-getFilenames();
}
 
/**
@@ -1100,10 +1100,10 @@
}
}
 
-   function getFilename() {
+   function getFilenames() {
$filenames = array();
for ( $i = 0; $i  $this-count; $i++ ) {
-   $filenames[] =  $this-sinks[$i]-getFilename();
+   $filenames[] =  $this-sinks[$i]-getFilenames();
}
return $filenames;
}

Modified: trunk/phase3/maintenance/dumpTextPass.php
===
--- trunk/phase3/maintenance/dumpTextPass.php   2011-09-09 05:17:05 UTC (rev 
96647)
+++ trunk/phase3/maintenance/dumpTextPass.php   2011-09-09 07:28:11 UTC (rev 
96648)
@@ -246,7 +246,7 @@
}
 
if ( $this-checkpointFiles ) {
-   $filenameList = (array)$this-egress-getFilename();
+   $filenameList = (array)$this-egress-getFilenames();
if ( count( $filenameList ) != count( 
$this-checkpointFiles ) ) {
throw new MWException(One checkpointfile must 
be specified for each output option, if maxtime is used.\n);
}
@@ -282,7 +282,7 @@
$offset += strlen( $chunk );
} while ( $chunk !== false  !feof( $input ) );
if ($this-maxTimeAllowed) {
-   $filenameList = (array)$this-egress-getFilename();
+   $filenameList = (array)$this-egress-getFilenames();
// we wrote some stuff after last checkpoint that needs 
renamed
if (file_exists($filenameList[0])) {
$newFilenames = array();
@@ -571,7 +571,7 @@
$this-thisPage = ;
// this could be more than one file if we had 
more than one output arg
$checkpointFilenames = array();
-   $filenameList = 
(array)$this-egress-getFilename();
+   $filenameList = 
(array)$this-egress-getFilenames();
$newFilenames = array();
$firstPageID = 
str_pad($this-firstPageWritten,9,0,STR_PAD_LEFT);
$lastPageID = 
str_pad($this-lastPageWritten,9,0,STR_PAD_LEFT);


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [96651] branches/ariel/xmldumps-backup/worker.py

2011-09-09 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/96651

Revision: 96651
Author:   ariel
Date: 2011-09-09 09:23:33 + (Fri, 09 Sep 2011)
Log Message:
---
names of links in 'latest' directory should have 'latest' and not the date

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-09-09 09:03:54 UTC (rev 
96650)
+++ branches/ariel/xmldumps-backup/worker.py2011-09-09 09:23:33 UTC (rev 
96651)
@@ -1822,7 +1822,8 @@
if (self._enabled):
self.makeDir(self.dumpDir.latestDir())
realfile = self.dumpDir.filenamePublicPath(dumpFile)
-   link = os.path.join(self.dumpDir.latestDir(), 
dumpFile.filename)
+   latestFilename = 
dumpFile.newFilename(dumpFile.dumpName, dumpFile.fileType, dumpFile.fileExt, 
'latest', dumpFile.chunk, dumpFile.checkpoint, dumpFile.temp)
+   link = os.path.join(self.dumpDir.latestDir(), 
latestFilename)
if exists(link) or os.path.islink(link):
if os.path.islink(link):
realfile = os.readlink(link)


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [96616] trunk/phase3/maintenance/dumpTextPass.php

2011-09-08 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/96616

Revision: 96616
Author:   ariel
Date: 2011-09-08 21:06:15 + (Thu, 08 Sep 2011)
Log Message:
---
uniform comment style, fix a few space issues, address couple issues from 
comments on r95272

Modified Paths:
--
trunk/phase3/maintenance/dumpTextPass.php

Modified: trunk/phase3/maintenance/dumpTextPass.php
===
--- trunk/phase3/maintenance/dumpTextPass.php   2011-09-08 20:58:48 UTC (rev 
96615)
+++ trunk/phase3/maintenance/dumpTextPass.php   2011-09-08 21:06:15 UTC (rev 
96616)
@@ -56,9 +56,9 @@
 
var $xmlwriterobj = false;
 
-   # when we spend more than maxTimeAllowed seconds on this run, we 
continue
-   # processing until we write out the next complete page, then save 
output file(s),
-   # rename it/them and open new one(s)
+   // when we spend more than maxTimeAllowed seconds on this run, we 
continue
+   // processing until we write out the next complete page, then save 
output file(s),
+   // rename it/them and open new one(s)
var $maxTimeAllowed = 0;  // 0 = no limit
var $timeExceeded = false;
var $firstPageWritten = false;
@@ -72,11 +72,11 @@
}
 
function dump( $history, $text = WikiExporter::TEXT ) {
-   # This shouldn't happen if on console... ;)
+   // This shouldn't happen if on console... ;)
header( 'Content-type: text/html; charset=UTF-8' );
 
-   # Notice messages will foul up your XML output even if they're
-   # relatively harmless.
+   // Notice messages will foul up your XML output even if they're
+   // relatively harmless.
if ( ini_get( 'display_errors' ) )
ini_set( 'display_errors', 'stderr' );
 
@@ -86,10 +86,10 @@
 
$this-egress = new ExportProgressFilter( $this-sink, $this );
 
-   # it would be nice to do it in the constructor, oh well. need 
egress set
+   // it would be nice to do it in the constructor, oh well. need 
egress set
$this-finalOptionCheck();
 
-   # we only want this so we know how to close a stream :-P
+   // we only want this so we know how to close a stream :-P
$this-xmlwriterobj = new XmlDumpWriter();
 
$input = fopen( $this-input, rt );
@@ -234,23 +234,20 @@
}
 
function finalOptionCheck() {
-   if (($this-checkpointFiles  ! $this-maxTimeAllowed) ||
-   ($this-maxTimeAllowed  !$this-checkpointFiles)) {
+   if ( ( $this-checkpointFiles  ! $this-maxTimeAllowed ) ||
+   ( $this-maxTimeAllowed  !$this-checkpointFiles ) ) {
throw new MWException(Options checkpointfile and 
maxtime must be specified together.\n);
}
foreach ($this-checkpointFiles as $checkpointFile) {
-   $count = substr_count ($checkpointFile,%s);
-   if (substr_count ($checkpointFile,%s) != 2) {
+   $count = substr_count ( $checkpointFile,%s );
+   if ( $count != 2 ) {
throw new MWException(Option checkpointfile 
must contain two '%s' for substitution of first and last pageids, count is 
$count instead, file is $checkpointFile.\n);
}
}
 
-   if ($this-checkpointFiles) {
-   $filenameList = $this-egress-getFilename();
-   if (! is_array($filenameList)) {
-   $filenameList = array( $filenameList );
-   }
-   if (count($filenameList) != 
count($this-checkpointFiles)) {
+   if ( $this-checkpointFiles ) {
+   $filenameList = (array)$this-egress-getFilename();
+   if ( count( $filenameList ) != count( 
$this-checkpointFiles ) ) {
throw new MWException(One checkpointfile must 
be specified for each output option, if maxtime is used.\n);
}
}
@@ -285,19 +282,16 @@
$offset += strlen( $chunk );
} while ( $chunk !== false  !feof( $input ) );
if ($this-maxTimeAllowed) {
-   $filenameList = $this-egress-getFilename();
-   # we wrote some stuff after last checkpoint that needs 
renamed */
-   if (! is_array($filenameList)) {
-   $filenameList = array( $filenameList );
-   }
+   $filenameList = (array)$this-egress-getFilename();
+   // we wrote some stuff after last checkpoint that needs 
renamed
if (file_exists

[MediaWiki-CVS] SVN: [96486] trunk/phase3/includes/Export.php

2011-09-07 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/96486

Revision: 96486
Author:   ariel
Date: 2011-09-07 20:21:52 + (Wed, 07 Sep 2011)
Log Message:
---
get rid of duplication, remove unused function rename(), add documentation as 
per comments on r95260

Modified Paths:
--
trunk/phase3/includes/Export.php

Modified: trunk/phase3/includes/Export.php
===
--- trunk/phase3/includes/Export.php2011-09-07 20:14:20 UTC (rev 96485)
+++ trunk/phase3/includes/Export.php2011-09-07 20:21:52 UTC (rev 96486)
@@ -709,17 +709,21 @@
return;
}
 
-   // TODO: document
-   function closeAndRename( $newname ) {
+   /**
+* Close the old file, and move it to a specified name.
+* Use this for the last piece of a file written out 
+* at specified checkpoints (e.g. every n hours).
+* @param $newname mixed File name. May be a string or an array with 
one element
+* @param $open bool If true, a new file with the old filename will be 
opened again for writing (default: false)
+*/
+   function closeAndRename( $newname, $open = false ) {
return;
}
 
-   // TODO: document
-   function rename( $newname ) {
-   return;
-   }
-
-   // TODO: document
+   /**
+* Returns the name of the file or files which are
+* being written to, if there are any.
+*/
function getFilename() {
return NULL;
}
@@ -742,21 +746,10 @@
}
 
function closeRenameAndReopen( $newname ) {
-   if ( is_array( $newname ) ) {
-   if ( count( $newname )  1 ) {
-   throw new MWException( __METHOD__ . : passed 
multiple arguments for rename of single file\n );
-   } else {
-   $newname = $newname[0];
-   }
-   }
-   if ( $newname ) {
-   fclose( $this-handle );
-   rename( $this-filename, $newname );
-   $this-handle = fopen( $this-filename, wt );
-   }
+   $this-closeAndRename( $newname, true );
}
 
-   function closeAndRename( $newname ) {
+   function closeAndRename( $newname, $open = false ) {
if ( is_array( $newname ) ) {
if ( count( $newname )  1 ) {
throw new MWException( __METHOD__ . : passed 
multiple arguments for rename of single file\n );
@@ -767,20 +760,10 @@
if ( $newname ) {
fclose( $this-handle );
rename( $this-filename, $newname );
-   }
-   }
-
-   function rename( $newname ) {
-   if ( is_array( $newname ) ) {
-   if ( count( $newname )  1 ) {
-   throw new MWException( __METHOD__ . : passed 
multiple arguments for rename of single file\n );
-   } else {
-   $newname = $newname[0];
+   if ( $open ) {
+   $this-handle = fopen( $this-filename, wt );
}
}
-   if ( $newname ) {
-   rename( $this-filename, $newname );
-   }
}
 
function getFilename() {
@@ -816,29 +799,11 @@
$this-handle = $pipes[0];
}
 
-   /**
-* Close the old file, move it to a specified name,
-* and reopen new file with the old name.
-*/
function closeRenameAndReopen( $newname ) {
-   if ( is_array( $newname ) ) {
-   if ( count( $newname )  1 ) {
-   throw new MWException( __METHOD__ . : passed 
multiple arguments for rename of single file\n );
-   } else {
-   $newname = $newname[0];
-   }
-   }
-   if ( $newname ) {
-   fclose( $this-handle );
-   proc_close( $this-procOpenResource );
-   rename( $this-filename, $newname );
-   $command = $this-command;
-   $command .= . wfEscapeShellArg( $this-filename 
);
-   $this-startCommand( $command );
-   }
+   $this-closeAndRename( $newname, true );
}
 
-   function closeAndRename( $newname ) {
+   function closeAndRename( $newname, $open = false ) {
if ( is_array( $newname ) ) {
if ( count( $newname )  1 ) {
throw new MWException( __METHOD__ . : passed 
multiple arguments for rename of single file\n );
@@ -847,25 +812,17

[MediaWiki-CVS] SVN: [96314] branches/ariel/xmldumps-backup/create-rsync-list.sh

2011-09-06 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/96314

Revision: 96314
Author:   ariel
Date: 2011-09-06 08:04:24 + (Tue, 06 Sep 2011)
Log Message:
---
script which creates a list of the n most recent XML successful dumps per 
project for mirroring

Added Paths:
---
branches/ariel/xmldumps-backup/create-rsync-list.sh

Added: branches/ariel/xmldumps-backup/create-rsync-list.sh
===
--- branches/ariel/xmldumps-backup/create-rsync-list.sh 
(rev 0)
+++ branches/ariel/xmldumps-backup/create-rsync-list.sh 2011-09-06 08:04:24 UTC 
(rev 96314)
@@ -0,0 +1,189 @@
+#!/bin/bash
+
+# This script generates a list of the last n sets of XML dump files
+# per project that were successful, adding failed dumps to the list if there
+# are not n successful dumps available.  
+
+# Options:
+# dumpsnumber -- number of dumps to list
+# outputfile  -- path to file in which to write the list
+# configfile  -- path to config file used to generate dumps
+
+usage() {
+echo Usage: $0 --dumpsnumber n --outputfile filename --configfile 
filename --rsyncprefix path
+echo 
+echo   dumpsnumber   number of dumps to list
+echo   outputfilename of file to which we will write iw action 
list
+echo   configfilename of configuration file for dump generation
+echo (default value: wikidump.conf)
+echo   rsyncprefix   path to substitute in place of the public path 
supplied
+echo in the configuration file, if needed
+echo 
+echo For example:
+echo$0 --dumpsnumber 5 --outputfile 
/data/dumps/public/dumpsfiles_for_rsync.txt --configfile wikidump.conf.testing
+exit 1
+}
+
+check_args() {
+if [ -z $dumpsnumber ]; then
+   echo $0: dumpsnumber must be an integer greater than 0
+   usage
+fi
+if ! [[ $dumpsnumber =~ ^[0-9]+$ ]] ; then
+   echo $0: dumpsnumber must be an integer greater than 0
+   usage
+fi
+if [ $dumpsnumber -lt 1 ]; then
+   echo $0: dumpsnumber must be an integer greater than 0
+   usage
+fi
+if [ -z $outputfile ]; then
+   echo No value was given for outfile option.
+   usage
+fi
+if [ -z $configfile ]; then
+   echo No value was given for configfile option.
+   usage
+fi
+if [ ! -f $configfile ]; then
+   echo $0: can't open configuration file $configfile, exiting...
+   exit 1
+fi
+}
+
+
+listdumpsforproject() {
+# cannot rely on timestamp. sometimes we have rerun a phase in 
+# some earlier dump and have it completed later than a later dump,
+# or we may have two en pedia runs going at once in different 
+# phases.
+dirs=`ls -dr $publicdir/$p/20* 2/dev/null`
+
+for day in $dirs; do
+   # tools, mw, static...
+   if [ -d $day ]; then
+   complete=`grep Dump complete $day/status.html 2/dev/null | 
grep -v failed 2/dev/null`
+   if [ ! -z $complete ]; then
+   complete_dumps=(${complete_dumps[@]} $day)
+   fi
+   failed=`grep Dump complete $day/status.html 2/dev/null | grep 
failed 2/dev/null`
+   if [ ! -z $failed ]; then
+   failed_dumps=(${failed_dumps[@]} $day)
+   fi
+   fi
+done
+}
+
+list_files_in_dir() {
+if [ ! -f $outputfile.tmp ]; then
+   touch $outputfile.tmp
+fi
+if [ $rsyncprefix == false ]; then
+   ls $d/*.gz 2/dev/null  $outputfile.tmp
+   ls $d/*.bz2 2/dev/null  $outputfile.tmp
+   ls $d/*.7z 2/dev/null  $outputfile.tmp
+   ls $d/*.html 2/dev/null  $outputfile.tmp
+   ls $d/*.txt 2/dev/null  $outputfile.tmp
+else
+   ls $d/*.gz 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
+   ls $d/*.bz2 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
+   ls $d/*.7z 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
+   ls $d/*.html 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
+   ls $d/*.txt 2/dev/null | sed -e s|^$publicdir|$rsyncprefix|  
$outputfile.tmp
+fi
+}
+
+get_list_of_files() {
+projectdirs=`ls -d $publicdir/$p/20* 2/dev/null`
+declare -a complete_dumps
+declare -a failed_dumps
+listdumpsforproject
+if [ ${#complete_dumps[@]} -ge $dumpsnumber ]; then
+   dumps_to_copy=${complete_dumps[@]:0:$dumpsnumber}
+   for d in $dumps_to_copy; do
+   list_files_in_dir
+   done
+else
+   for d in ${complete_dumps[@]}; do
+   list_files_in_dir
+   done
+   left_to_get=$(( $dumpsnumber - ${#complete_dumps[@]} ))
+   if [ ${#failed_dumps[@]} -ge $left_to_get ]; then
+   dumps_to_copy=${failed_dumps[@]:0:$left_to_get}
+   for d in $dumps_to_copy; do
+   list_files_in_dir
+   done
+   else
+   for d in ${failed_dumps[@]}; do

[MediaWiki-CVS] SVN: [96353] branches/ariel/xmldumps-backup/worker.py

2011-09-06 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/96353

Revision: 96353
Author:   ariel
Date: 2011-09-06 17:53:42 + (Tue, 06 Sep 2011)
Log Message:
---
missing parens in usage message; add 'latestlinks' option to just redo links 
and rss feed files in 'latest' directory; don't cleanup/delete for noop

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-09-06 17:12:09 UTC (rev 
96352)
+++ branches/ariel/xmldumps-backup/worker.py2011-09-06 17:53:42 UTC (rev 
96353)
@@ -505,6 +505,7 @@
if (self._singleJob[-5:] == 'table' or 
self._singleJob[-9:] == 'recombine' or 
self._singleJob == 'noop' or 
+   self._singleJob == 'latestlinks' or 
self._singleJob == 'xmlpagelogsdump' or
self._singleJob == 'pagetitlesdump' or
self._singleJob.endswith('recombine')):
@@ -514,6 +515,7 @@
if (self._singleJob[-5:] == 'table' or 
self._singleJob[-9:] == 'recombine' or 
self._singleJob == 'noop' or 
+   self._singleJob == 'latestlinks' or 
self._singleJob == 'xmlpagelogsdump' or
self._singleJob == 'pagetitlesdump' or
self._singleJob == 'abstractsdump' or
@@ -651,11 +653,12 @@
if (item.name() == job):
item.setToBeRun(True)
return True
-   if job == noop:
+   if job == noop or job == latestlinks:
return True
print No job of the name specified exists. Choose one of the 
following:
-   print noop (runs no job but rewrites md5sums file and resets 
latest links
-   print tables (includes all items below that end in 'table'
+   print noop (runs no job but rewrites md5sums file and resets 
latest links)
+   print latestlinks (runs no job but resets latest links)
+   print tables (includes all items below that end in 'table')
for item in self.dumpItems:
print %s  % item.name()
return False
@@ -1518,6 +1521,22 @@
self._cleanupOldFilesEnabled = False
 
self.jobRequested = job
+
+   if self.jobRequested == latestlinks:
+   self._statusEnabled = False
+   self._checksummerEnabled = False
+   self._runInfoFileEnabled = False
+   self._noticeFileEnabled = False
+   self._makeDirEnabled = False
+   self._cleanOldDumpsEnabled = False
+   self._cleanupOldFilesEnabled = False
+   self._checkForTruncatedFilesEnabled = False
+
+   if self.jobRequested == noop:
+   self._cleanOldDumpsEnabled = False
+   self._cleanupOldFilesEnabled = False
+   self._checkForTruncatedFilesEnabled = False
+   
self.dbServerInfo = DbServerInfo(self.wiki, self.dbName, 
self.logAndPrint)
self.dumpDir = DumpDir(self.wiki, self.dbName)
 
@@ -1877,7 +1896,7 @@
for f in files:
if f.endswith(-rss.xml):
filename = f[:-8];
-   link = os.path.join(latestDir,filename)
+   link = os.path.join(latestDir,f)
if not exists(link):

os.remove(os.path.join(latestDir,f))
 


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [96272] branches/ariel/xmldumps-backup/worker.py

2011-09-05 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/96272

Revision: 96272
Author:   ariel
Date: 2011-09-05 08:24:53 + (Mon, 05 Sep 2011)
Log Message:
---
check actual location of file being linked to before we claim it doesn't exist, 
rather than some buggy one

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-09-05 06:56:08 UTC (rev 
96271)
+++ branches/ariel/xmldumps-backup/worker.py2011-09-05 08:24:53 UTC (rev 
96272)
@@ -1831,7 +1831,7 @@
link = os.path.join(latestDir,f)
if os.path.islink(link):
realfile = os.readlink(link)
-   if not exists(realfile):
+   if not 
exists(os.path.join(latestDir,realfile)):
os.remove(link)
 
 class Feeds(object):


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [95720] trunk/phase3/maintenance/dumpTextPass.php

2011-08-29 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95720

Revision: 95720
Author:   ariel
Date: 2011-08-29 22:48:18 + (Mon, 29 Aug 2011)
Log Message:
---
check the checkpoint related options only if we specified checkpoints, duh

Modified Paths:
--
trunk/phase3/maintenance/dumpTextPass.php

Modified: trunk/phase3/maintenance/dumpTextPass.php
===
--- trunk/phase3/maintenance/dumpTextPass.php   2011-08-29 22:42:18 UTC (rev 
95719)
+++ trunk/phase3/maintenance/dumpTextPass.php   2011-08-29 22:48:18 UTC (rev 
95720)
@@ -245,13 +245,15 @@
}
}
 
-   $filenameList = $this-egress-getFilename();
-   if (! is_array($filenameList)) {
-   $filenameList = array( $filenameList );
+   if ($this-checkpointFiles) {
+   $filenameList = $this-egress-getFilename();
+   if (! is_array($filenameList)) {
+   $filenameList = array( $filenameList );
+   }
+   if (count($filenameList) != 
count($this-checkpointFiles)) {
+   wfDie(One checkpointfile must be specified for 
each output option, if maxtime is used.\n);
+   }
}
-   if (count($filenameList) != count($this-checkpointFiles)) {
-   wfDie(One checkpointfile must be specified for each 
output option, if maxtime is used.\n);
-   }
}
 
function readDump( $input ) {


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [95732] branches/ariel/xmldumps-backup

2011-08-29 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95732

Revision: 95732
Author:   ariel
Date: 2011-08-30 00:20:10 + (Tue, 30 Aug 2011)
Log Message:
---
first take at rerunning a checkpoint file

Modified Paths:
--
branches/ariel/xmldumps-backup/WikiDump.py
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/WikiDump.py
===
--- branches/ariel/xmldumps-backup/WikiDump.py  2011-08-30 00:10:32 UTC (rev 
95731)
+++ branches/ariel/xmldumps-backup/WikiDump.py  2011-08-30 00:20:10 UTC (rev 
95732)
@@ -191,6 +191,7 @@
cat: /bin/cat,
grep: /bin/grep,
checkforbz2footer: /usr/local/bin/checkforbz2footer,
+   writeuptopageid: /usr/local/bin/writeuptopageid,
#cleanup: {
keep: 3,
#chunks: {
@@ -271,6 +272,7 @@
self.cat = self.conf.get(tools, cat)
self.grep = self.conf.get(tools, grep)
self.checkforbz2footer = 
self.conf.get(tools,checkforbz2footer)
+   self.writeuptopageid = self.conf.get(tools,writeuptopageid)
 
if not self.conf.has_section('cleanup'):
self.conf.add_section('cleanup')

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-08-30 00:10:32 UTC (rev 
95731)
+++ branches/ariel/xmldumps-backup/worker.py2011-08-30 00:20:10 UTC (rev 
95732)
@@ -485,12 +485,13 @@
self._toBeRun = toBeRun
 
 class DumpItemList(object):
-   def __init__(self, wiki, prefetch, spawn, chunkToDo, singleJob, 
chunkInfo, runInfoFile, dumpDir):
+   def __init__(self, wiki, prefetch, spawn, chunkToDo, checkpointFile, 
singleJob, chunkInfo, runInfoFile, dumpDir):
self.wiki = wiki
self._hasFlaggedRevs = self.wiki.hasFlaggedRevs()
self._prefetch = prefetch
self._spawn = spawn
self.chunkInfo = chunkInfo
+   self.checkpointFile = checkpointFile
self._chunkToDo = chunkToDo
self._singleJob = singleJob
self._runInfoFile = runInfoFile
@@ -505,9 +506,21 @@
self._singleJob[-9:] == 'recombine' or 
self._singleJob == 'noop' or 
self._singleJob == 'xmlpagelogsdump' or
-   self._singleJob == 'pagetitlesdump'):
+   self._singleJob == 'pagetitlesdump' or
+   self._singleJob.endswith('recombine')):
raise BackupError(You cannot specify a chunk 
with the job %s, exiting.\n % self._singleJob)
 
+   if (self._singleJob and self.checkpointFile):
+   if (self._singleJob[-5:] == 'table' or 
+   self._singleJob[-9:] == 'recombine' or 
+   self._singleJob == 'noop' or 
+   self._singleJob == 'xmlpagelogsdump' or
+   self._singleJob == 'pagetitlesdump' or
+   self._singleJob == 'abstractsdump' or
+   self._singleJob == 'xmlstubsdump' or
+   self._singleJob.endswith('recombine')):
+   raise BackupError(You cannot specify a 
checkpoint file with the job %s, exiting.\n % self._singleJob)
+
self.dumpItems = [PrivateTable(user, usertable, User 
account data.),
PrivateTable(watchlist, watchlisttable, Users' 
watchlist settings.),
PrivateTable(ipblocks, ipblockstable, Data for 
blocks of IP addresses, ranges, and users.),
@@ -555,7 +568,7 @@
XmlDump(articles,
articlesdump,
bigbArticles, templates, image 
descriptions, and primary meta-pages./b/big,
-   This contains current versions of article 
content, and is the archive most mirror sites will probably want., 
self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, 
self._getChunkToDo(articlesdump), self.chunkInfo.getPagesPerChunkHistory(), 
checkpoints))
+   This contains current versions of article 
content, and is the archive most mirror sites will probably want., 
self.findItemByName('xmlstubsdump'), self._prefetch, self._spawn, self.wiki, 
self._getChunkToDo(articlesdump), self.chunkInfo.getPagesPerChunkHistory(), 
checkpoints, self.checkpointFile))
if (self.chunkInfo.chunksEnabled()):

self.dumpItems.append(RecombineXmlDump(articlesdumprecombine, 
bigbRecombine articles

[MediaWiki-CVS] SVN: [95633] branches/wmf/1.17wmf1/includes/upload/UploadFromStash.php

2011-08-28 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95633

Revision: 95633
Author:   ariel
Date: 2011-08-28 18:00:02 + (Sun, 28 Aug 2011)
Log Message:
---
fix fatal Call to a member function getId() on a non-object

Modified Paths:
--
branches/wmf/1.17wmf1/includes/upload/UploadFromStash.php

Modified: branches/wmf/1.17wmf1/includes/upload/UploadFromStash.php
===
--- branches/wmf/1.17wmf1/includes/upload/UploadFromStash.php   2011-08-28 
17:57:11 UTC (rev 95632)
+++ branches/wmf/1.17wmf1/includes/upload/UploadFromStash.php   2011-08-28 
18:00:02 UTC (rev 95633)
@@ -29,7 +29,12 @@
if( $stash ) {
$this-stash = $stash;
} else {
-   wfDebug( __METHOD__ .  creating new UploadStash 
instance for  . $user-getId() . \n );
+   if ($user) {
+   wfDebug( __METHOD__ .  creating new 
UploadStash instance for  . $user-getId() . \n );
+   }
+   else {
+   wfDebug( __METHOD__ .  creating new 
UploadStash instance, no user\n);
+   }
$this-stash = new UploadStash( $this-repo, 
$this-user );
}
 


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [95634] trunk/phase3/maintenance

2011-08-28 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95634

Revision: 95634
Author:   ariel
Date: 2011-08-28 19:06:52 + (Sun, 28 Aug 2011)
Log Message:
---
move some member vars to parent class since they are needed there now, set 
lastTime and other vars so checkpoints without prefetch work, update progress 
reporting in parent class to give rates during interval and from start of run

Modified Paths:
--
trunk/phase3/maintenance/backup.inc
trunk/phase3/maintenance/dumpTextPass.php

Modified: trunk/phase3/maintenance/backup.inc
===
--- trunk/phase3/maintenance/backup.inc 2011-08-28 18:00:02 UTC (rev 95633)
+++ trunk/phase3/maintenance/backup.inc 2011-08-28 19:06:52 UTC (rev 95634)
@@ -51,6 +51,10 @@
var $stubText   = false; // include rev_text_id instead of text; for 
2-pass dump
var $dumpUploads = false;
var $dumpUploadFileContents = false;
+   var $lastTime = 0;
+   var $pageCountLast = 0;
+   var $revCountLast = 0;
+   var $ID = 0;
 
function BackupDumper( $args ) {
$this-stderr = fopen( php://stderr, wt );
@@ -233,6 +237,8 @@
$dbr = wfGetDB( DB_SLAVE );
$this-maxCount = $dbr-selectField( $table, MAX($field), '', 
__METHOD__ );
$this-startTime = wfTime();
+   $this-lastTime = $this-startTime;
+   $this-ID = getmypid();
}
 
/**
@@ -281,21 +287,35 @@
 
function showReport() {
if ( $this-reporting ) {
-   $delta = wfTime() - $this-startTime;
$now = wfTimestamp( TS_DB );
-   if ( $delta ) {
-   $rate = $this-pageCount / $delta;
-   $revrate = $this-revCount / $delta;
+   $nowts = wfTime();
+   $deltaAll = wfTime() - $this-startTime;
+   $deltaPart = wfTime() - $this-lastTime;
+   $this-pageCountPart = $this-pageCount - 
$this-pageCountLast;
+   $this-revCountPart = $this-revCount - 
$this-revCountLast;
+
+   if ( $deltaAll ) {
$portion = $this-revCount / $this-maxCount;
-   $eta = $this-startTime + $delta / $portion;
+   $eta = $this-startTime + $deltaAll / $portion;
$etats = wfTimestamp( TS_DB, intval( $eta ) );
+   $pageRate = $this-pageCount / $deltaAll;
+   $revRate = $this-revCount / $deltaAll;
} else {
-   $rate = '-';
-   $revrate = '-';
+   $pageRate = '-';
+   $revRate = '-';
$etats = '-';
}
-   $this-progress( sprintf( %s: %s %d pages (%0.3f/sec), 
%d revs (%0.3f/sec), ETA %s [max %d],
-   $now, wfWikiID(), $this-pageCount, $rate, 
$this-revCount, $revrate, $etats, $this-maxCount ) );
+   if ( $deltaPart ) {
+   $pageRatePart = $this-pageCountPart / 
$deltaPart;
+   $revRatePart = $this-revCountPart / $deltaPart;
+   } else {
+   $pageRatePart = '-';
+   $revRatePart = '-';
+   }
+   $this-progress( sprintf( %s: %s (ID %d) %d pages 
(%0.1f|%0.1f/sec all|curr), %d revs (%0.1f|%0.1f/sec all|curr), ETA %s [max 
%d],
+   $now, wfWikiID(), $this-ID, 
$this-pageCount, $pageRate, $pageRatePart, $this-revCount, $revRate, 
$revRatePart, $etats, $this-maxCount ) );
+   $this-lastTime = $nowts;
+   $this-revCountLast = $this-revCount;
}
}
 

Modified: trunk/phase3/maintenance/dumpTextPass.php
===
--- trunk/phase3/maintenance/dumpTextPass.php   2011-08-28 18:00:02 UTC (rev 
95633)
+++ trunk/phase3/maintenance/dumpTextPass.php   2011-08-28 19:06:52 UTC (rev 
95634)
@@ -38,9 +38,6 @@
var $history = WikiExporter::FULL;
var $fetchCount = 0;
var $prefetchCount = 0;
-   var $lastTime = 0;
-   var $pageCountLast = 0;
-   var $revCountLast = 0;
var $prefetchCountLast = 0;
var $fetchCountLast = 0;
 
@@ -57,8 +54,6 @@
var $spawnRead = false;
var $spawnErr = false;
 
-   var $ID = 0;
-
var $xmlwriterobj = false;
 
# when we spend more than maxTimeAllowed seconds on this run, we 
continue
@@ -73,8 +68,6 @@
 
function initProgress( $history

[MediaWiki-CVS] SVN: [95636] branches/ariel/xmldumps-backup/README.config

2011-08-28 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95636

Revision: 95636
Author:   ariel
Date: 2011-08-28 20:40:51 + (Sun, 28 Aug 2011)
Log Message:
---
descriptions of checkpointTime, temp, checkforbz2footer options

Modified Paths:
--
branches/ariel/xmldumps-backup/README.config

Modified: branches/ariel/xmldumps-backup/README.config
===
--- branches/ariel/xmldumps-backup/README.config2011-08-28 19:58:59 UTC 
(rev 95635)
+++ branches/ariel/xmldumps-backup/README.config2011-08-28 20:40:51 UTC 
(rev 95636)
@@ -87,10 +87,13 @@
   (wikiproject) being dumped, in subdirectories by date
   Default value: /dumps/public
 private -- full path to directory under which all dumps of private wikis
-   and all private tables will be created, in subdirs by project
-   name and underneath that in subdirs by date, similar to the
-   public dumps
+  and all private tables will be created, in subdirs by project
+  name and underneath that in subdirs by date, similar to the
+  public dumps
   Default value: /dumps/private
+temp -- full path to directory under which temporary files will be created;
+  this should not be the same as the public or private directory.
+  Default value: /dumps/temp
 index -- name of the top-level index file for all projects that is 
   automatically created by the monitoring process
   Default value: index.html
@@ -103,7 +106,7 @@
   are found
   Default value: home
 perdumpindex -- name of the index file created for a dump for a given project
-   on a given date
+  on a given date
   Default value: index.html
 
 The above options do not have to be specified in the config file, 
@@ -158,6 +161,11 @@
   Default value: /bin/cat
 grep -- Location of the grep binary
   Default value:/bin/grep
+checkforbz2footer -- Location of the checkforbz2footer binary
+  This is part of the mwbzutils package. If
+  the package is not installed, this value will
+  be ignored.
+  Default value: /usr/local/bin/checkforbz2footer
 
 The above options do not have to be specified in the config file, 
 since default values are provided.
@@ -197,6 +205,18 @@
 pagesPerChunkAbstract -- as pagesPerChunkHistory but for the abstract 
generation phase
   Default value: False
+checkpointTime -- save checkpoints of files containing revision text
+   (articles, metacurrent, metahistory dumps)
+   every checkpointTime minutes.  This involves closing
+   the current output file, renaming it to a filename
+   which includes in the filename the first and last page
+   written, and opening a new file for the next portion
+   of the XML output.  This can be useful if you want
+   to produce a large number of smaller files as input
+   to XML-crunching scripts, or if you are dumping 
+   a very large wiki which has a tendency to fail 
+   somewhere in the middle (*cough*en wikipedia*cough*).
+  Default value: 0 (no checkpoints produced)
 
 The above options do not have to be specified in the config file, 
 since default values are provided.


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [95639] branches/ariel/xmldumps-backup/worker.py

2011-08-28 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95639

Revision: 95639
Author:   ariel
Date: 2011-08-28 22:15:58 + (Sun, 28 Aug 2011)
Log Message:
---
lose empty options from dumpTextPass runs

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-08-28 22:04:49 UTC (rev 
95638)
+++ branches/ariel/xmldumps-backup/worker.py2011-08-28 22:15:58 UTC (rev 
95639)
@@ -2750,12 +2750,12 @@
prefetch = --prefetch=%s % (source)
else:
runner.showRunnerState(... building %s %s XML dump, no 
text prefetch... % (self._subset, chunkinfo))
-   prefetch = None
+   prefetch = 
 
if self._spawn:
spawn = --spawn=%s % (runner.wiki.config.php)
else:
-   spawn = None
+   spawn = 
 
if (not exists( runner.wiki.config.php ) ):
raise BackupError(php command %s not found % 
runner.wiki.config.php)
@@ -2764,8 +2764,8 @@
checkpointTime = --maxtime=%s % 
(runner.wiki.config.checkpointTime)
checkpointFile = --checkpointfile=%s % 
outputFile.newFilename(outputFile.dumpName, outputFile.fileType, 
outputFile.fileExt, outputFile.date, outputFile.chunk, p%sp%s, None)
else:
-   checkpointTime = None
-   checkpointFile = None
+   checkpointTime = 
+   checkpointFile = 
dumpCommand = [ %s % runner.wiki.config.php,
-q, %s/maintenance/dumpTextPass.php % 
runner.wiki.config.wikiDir,
--wiki=%s % runner.dbName,
@@ -2777,6 +2777,7 @@
--report=1000,
%s % spawn ]

+   dumpCommand = filter(None, dumpCommand) 
command = dumpCommand
filters = self.buildFilters(runner, outputFile)
eta = self.buildEta(runner)


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [95601] trunk/phase3/includes/Export.php

2011-08-27 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95601

Revision: 95601
Author:   ariel
Date: 2011-08-27 15:50:17 + (Sat, 27 Aug 2011)
Log Message:
---
replace WfDie()

Modified Paths:
--
trunk/phase3/includes/Export.php

Modified: trunk/phase3/includes/Export.php
===
--- trunk/phase3/includes/Export.php2011-08-27 15:00:08 UTC (rev 95600)
+++ trunk/phase3/includes/Export.php2011-08-27 15:50:17 UTC (rev 95601)
@@ -739,7 +739,7 @@
function closeRenameAndReopen( $newname ) {
if ( is_array($newname) ) {
if (count($newname)  1) {
-   WfDie(Export closeRenameAndReopen: passed 
multiple argumnts for rename of single file\n);
+   throw new MWException(Export 
closeRenameAndReopen: passed multiple argumnts for rename of single file\n);
}
else {
$newname = $newname[0];
@@ -755,7 +755,7 @@
function rename( $newname ) {
if ( is_array($newname) ) {
if (count($newname)  1) {
-   WfDie(Export closeRenameAndReopen: passed 
multiple argumnts for rename of single file\n);
+   throw new MWException(Export 
closeRenameAndReopen: passed multiple argumnts for rename of single file\n);
}
else {
$newname = $newname[0];
@@ -796,7 +796,7 @@
function closeRenameAndReopen( $newname ) {
if ( is_array($newname) ) {
if (count($newname)  1) {
-   WfDie(Export closeRenameAndReopen: passed 
multiple argumnts for rename of single file\n);
+   throw new MWException(Export 
closeRenameAndReopen: passed multiple argumnts for rename of single file\n);
}
else {
$newname = $newname[0];
@@ -814,7 +814,7 @@
function rename( $newname ) {
if ( is_array($newname) ) {
if (count($newname)  1) {
-   WfDie(Export closeRenameAndReopen: passed 
multiple argumnts for rename of single file\n);
+   throw new MWException(Export 
closeRenameAndReopen: passed multiple argumnts for rename of single file\n);
}
else {
$newname = $newname[0];
@@ -865,7 +865,7 @@
function closeRenameAndReopen( $newname ) {
if ( is_array($newname) ) {
if (count($newname)  1) {
-   WfDie(Export closeRenameAndReopen: passed 
multiple argumnts for rename of single file\n);
+   throw new MWException(Export 
closeRenameAndReopen: passed multiple argumnts for rename of single file\n);
}
else {
$newname = $newname[0];
@@ -883,7 +883,7 @@
function rename( $newname ) {
if ( is_array($newname) ) {
if (count($newname)  1) {
-   WfDie(Export closeRenameAndReopen: passed 
multiple argumnts for rename of single file\n);
+   throw new MWException(Export 
closeRenameAndReopen: passed multiple argumnts for rename of single file\n);
}
else {
$newname = $newname[0];


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [95604] trunk/phase3

2011-08-27 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95604

Revision: 95604
Author:   ariel
Date: 2011-08-27 18:31:03 + (Sat, 27 Aug 2011)
Log Message:
---
define and use closeAndRename() after last write of xml dump file; convert from 
popen (child inherits all open descriptors and there is no workaround) to 
proc_open (CLOEXEC set on all descriptors), needed so close and rename doesn't 
hang forever if a child (prefetcher) is forked

Modified Paths:
--
trunk/phase3/includes/Export.php
trunk/phase3/maintenance/dumpTextPass.php

Modified: trunk/phase3/includes/Export.php
===
--- trunk/phase3/includes/Export.php2011-08-27 18:28:04 UTC (rev 95603)
+++ trunk/phase3/includes/Export.php2011-08-27 18:31:03 UTC (rev 95604)
@@ -704,6 +704,10 @@
return;
}
 
+   function closeAndRename( $newname ) {
+   return;
+   }
+
function rename( $newname ) {
return;
}
@@ -752,6 +756,21 @@
}
}
 
+   function closeAndRename( $newname ) {
+   if ( is_array($newname) ) {
+   if (count($newname)  1) {
+   throw new MWException(Export 
closeRenameAndReopen: passed multiple argumnts for rename of single file\n);
+   }
+   else {
+   $newname = $newname[0];
+   }
+   }
+   if ( $newname ) {
+   fclose( $this-handle );
+   rename( $this-filename, $newname );
+   }
+   }
+
function rename( $newname ) {
if ( is_array($newname) ) {
if (count($newname)  1) {
@@ -784,11 +803,21 @@
if ( !is_null( $file ) ) {
$command .= . wfEscapeShellArg( $file );
}
-   $this-handle = popen( $command, w );
+   
+   $this-startCommand($command);
$this-command = $command;
$this-filename = $file;
}
 
+   function startCommand($command) {
+   $spec = array(
+   0 = array( pipe, r ),
+   );
+   $pipes = array();
+   $this-procOpenResource = proc_open( $command, $spec, $pipes );
+   $this-handle = $pipes[0];
+   }
+
/**
 * Close the old file, move it to a specified name, 
 * and reopen new file with the old name. 
@@ -803,14 +832,32 @@
}
}
if ( $newname ) {
-   pclose( $this-handle );
+   fclose( $this-handle );
+   proc_close($this-procOpenResource);
rename( $this-filename, $newname );
$command = $this-command;
$command .= . wfEscapeShellArg( $this-filename 
);
-   $this-handle = popen( $command, w );
+   $this-startCommand($command);
}
}
 
+   function closeAndRename( $newname ) {
+   if ( is_array($newname) ) {
+   if (count($newname)  1) {
+   throw new MWException(Export 
closeRenameAndReopen: passed multiple argumnts for rename of single file\n);
+   }
+   else {
+   $newname = $newname[0];
+   }
+   }
+   if ( $newname ) {
+#  pclose( $this-handle );
+   fclose( $this-handle );
+   proc_close($this-procOpenResource);
+   rename( $this-filename, $newname );
+   }
+   }
+
function rename( $newname ) {
if ( is_array($newname) ) {
if (count($newname)  1) {
@@ -872,14 +919,31 @@
}
}
if ( $newname ) {
-   pclose( $this-handle );
+   fclose( $this-handle );
+   proc_close($this-procOpenResource);
rename( $this-filename, $newname );
$command = 7za a -bd -si  . wfEscapeShellArg( $file );
$command .= ' ' . wfGetNull() . ' 21';
-   $this-handle = popen( $command, w );
+   $this-startCommand($command);
}
}
 
+   function closeAndRename( $newname ) {
+   if ( is_array($newname) ) {
+   if (count($newname)  1) {
+   throw new MWException(Export 
closeRenameAndReopen: passed multiple argumnts for rename of single file\n

[MediaWiki-CVS] SVN: [95404] branches/ariel/xmldumps-backup/worker.py

2011-08-24 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95404

Revision: 95404
Author:   ariel
Date: 2011-08-24 17:26:34 + (Wed, 24 Aug 2011)
Log Message:
---
actually check user's y/n in a fashion that works; write dumpruninfo file in 
place, no need for temp file first

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-08-24 16:21:00 UTC (rev 
95403)
+++ branches/ariel/xmldumps-backup/worker.py2011-08-24 17:26:34 UTC (rev 
95404)
@@ -395,7 +395,7 @@
directory = self._getDumpRunInfoDirName()
dumpRunInfoFilename = self._getDumpRunInfoFileName()
 #  FileUtils.writeFile(directory, dumpRunInfoFilename, text, 
self.wiki.config.fileperms)
-   FileUtils.writeFile(self.wiki.config.tempDir, 
dumpRunInfoFilename, text, self.wiki.config.fileperms)
+   FileUtils.writeFileInPlace(self.wiki.config.tempDir, 
dumpRunInfoFilename, text, self.wiki.config.fileperms)
 
# format: name:%; updated:%; status:%
def _getStatusForJobFromRunInfoFileLine(self, line, jobName):
@@ -1646,7 +1646,7 @@
print This means that the status information 
about the old run will be lost, and
print only the information about the current 
(and future) runs will be kept.
reply = raw_input(Continue anyways? [y/N]: )
-   if (not reply in y, Y):
+   if (not reply in [ y, Y ]):
raise RuntimeError( No run information 
available for previous dump, exiting )
if (not self.wiki.existsPerDumpIndex()):
# AFAWK this is a new run (not updating or 
rerunning an old run), 


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [95406] branches/ariel/xmldumps-backup/worker.py

2011-08-24 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95406

Revision: 95406
Author:   ariel
Date: 2011-08-24 17:59:55 + (Wed, 24 Aug 2011)
Log Message:
---
fix dump run info file writing (again), clean up dup code in newFromFilename

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-08-24 17:56:49 UTC (rev 
95405)
+++ branches/ariel/xmldumps-backup/worker.py2011-08-24 17:59:55 UTC (rev 
95406)
@@ -395,7 +395,7 @@
directory = self._getDumpRunInfoDirName()
dumpRunInfoFilename = self._getDumpRunInfoFileName()
 #  FileUtils.writeFile(directory, dumpRunInfoFilename, text, 
self.wiki.config.fileperms)
-   FileUtils.writeFileInPlace(self.wiki.config.tempDir, 
dumpRunInfoFilename, text, self.wiki.config.fileperms)
+   FileUtils.writeFileInPlace(dumpRunInfoFilename, text, 
self.wiki.config.fileperms)
 
# format: name:%; updated:%; status:%
def _getStatusForJobFromRunInfoFileLine(self, line, jobName):
@@ -973,6 +973,29 @@
Constructor.  Arguments: the full file name including the 
chunk, the extension, etc BUT NOT the dir name. 
self.filename = filename
 
+   self.dbName = None
+   self.date = None
+   self.dumpName = None
+
+   self.basename = None
+   self.fileExt = None
+   self.fileType = None
+
+   self.filePrefix = 
+   self.filePrefixLength = 0
+
+   self.isChunkFile = False
+   self.chunk = None
+   self.chunkInt = 0
+
+   self.isCheckpointFile = False
+   self.checkpoint = None
+   self.firstPageID = None
+   self.lastPageID = None
+
+   self.isTempFile = False
+   self.temp = None
+
# example filenames:
# elwikidb-20110729-all-titles-in-ns0.gz
# elwikidb-20110729-abstract.xml
@@ -983,32 +1006,14 @@
if self.filename.endswith(-tmp):
self.isTempFile = True
self.temp = -tmp
-   else:
-   self.isTempFile = False
-   self.temp = None
 
if ('.' in self.filename):
(fileBase, self.fileExt) = self.filename.rsplit('.',1)
if (self.temp):
self.fileExt = self.fileExt[:-4];
else:
-   self.dbName = None
-   self.date = None
-   self.dumpName = None
-   self.filePrefix = 
-   self.filePrefixLength = 0
-   self.isChunkFile = False
-   self.isCheckpointFile = False
-   self.checkpoint = None
-   self.firstPageID = None
-   self.lastPageID = None
-   self.isTempFile = False
-   self.fileExt = None
-   self.fileType = None
return False
 
-   # FIXME could have -tmp at the end, when do we look for that??
-
if not self.isExt(self.fileExt):
self.fileType = self.fileExt
 #  self.fileExt = None
@@ -1019,35 +1024,17 @@
 
# some files are not of this form, we skip them
if not '-' in fileBase:
-   self.dbName = None
-   self.date = None
-   self.dumpName = None
-   self.filePrefix = 
-   self.filePrefixLength = 0
-   self.isChunkFile = False
-   self.isCheckpointFile = False
-   self.checkpoint = None
-   self.firstPageID = None
-   self.lastPageID = None
-   self.isTempFile = False
-   self.temp = None
return False
 
(self.dbName, self.date, self.dumpName) = fileBase.split('-',2)
if not self.date or not self.dumpName:
-   self.dbName = None
-   self.date = None
self.dumpName = fileBase
-   self.filePrefix = 
-   self.filePrefixLength = 0
else:
self.filePrefix = %s-%s- % (self.dbName, self.date)
self.filePrefixLength = len(self.filePrefix)
 
if self.filename.startswith(self.filePrefix):
self.basename = self.filename[self.filePrefixLength:]
-   else

[MediaWiki-CVS] SVN: [95443] trunk/phase3/maintenance/dumpTextPass.php

2011-08-24 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95443

Revision: 95443
Author:   ariel
Date: 2011-08-24 20:43:09 + (Wed, 24 Aug 2011)
Log Message:
---
remove extraneous hyphen that crept in, grr

Modified Paths:
--
trunk/phase3/maintenance/dumpTextPass.php

Modified: trunk/phase3/maintenance/dumpTextPass.php
===
--- trunk/phase3/maintenance/dumpTextPass.php   2011-08-24 20:32:00 UTC (rev 
95442)
+++ trunk/phase3/maintenance/dumpTextPass.php   2011-08-24 20:43:09 UTC (rev 
95443)
@@ -220,7 +220,7 @@
$pageRatePart = '-';
$revRatePart = '-';
}
-   $this-progress( sprintf( %s: %s (ID %d) %d pages 
(%0.1f|%0.1f/sec all|curr), %d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% 
prefetched (all|curr), ETA %s [max %d],-
+   $this-progress( sprintf( %s: %s (ID %d) %d pages 
(%0.1f|%0.1f/sec all|curr), %d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% 
prefetched (all|curr), ETA %s [max %d],
$now, wfWikiID(), $this-ID, 
$this-pageCount, $pageRate, $pageRatePart, $this-revCount, $revRate, 
$revRatePart, $fetchRate, $fetchRatePart, $etats, $this-maxCount ) );
$this-lastTime = $nowts;
$this-revCountLast = $this-revCount;


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [95455] branches/ariel/xmldumps-backup/README

2011-08-24 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95455

Revision: 95455
Author:   ariel
Date: 2011-08-24 22:52:34 + (Wed, 24 Aug 2011)
Log Message:
---
bring main README up to date, minor cleanups

Modified Paths:
--
branches/ariel/xmldumps-backup/README

Modified: branches/ariel/xmldumps-backup/README
===
--- branches/ariel/xmldumps-backup/README   2011-08-24 22:36:09 UTC (rev 
95454)
+++ branches/ariel/xmldumps-backup/README   2011-08-24 22:52:34 UTC (rev 
95455)
@@ -6,40 +6,45 @@
 
 === Worker ===
 
-Each dump machine runs a worker process which continuously generates dumps.
+Each dump machine runs a worker process, a shell script which continuously 
+calls a python script to generate a dump for the next available wiki.
 At each iteration, the set of wikis is ordered by last dump date, and the
 least-recently-touched wiki is selected.
 
-Workers are kept from stomping on each other by creating a lock file in
-the private dump directory. To aid in administration, the lock file contains
+There are two directory trees used by the dumps processes, one for public
+tables and files of public wikis, and one for private wikis or for private 
+tables and files (such as the user table) of public wikis.
+
+Workers (the python scripts) are kept from stomping on each other by creating 
+a lock file in the private dump directory for the specific wiki. The lock file 
contains
 the hostname and process ID of the worker process holding the lock.
 
 Lock files are touched every 10 seconds while the process runs, and removed
 at the end.
 
-On each iteration, the script and configuration are reloaded, so additions
-to the database list or dump code will be made available without manually
-restarting things.
+On each iteration, a new copy of the python script is run, which reads its
+configuration files from scratch, so additions to the database list files or 
+changes to the dupm script introduced during the middle of one dump will
+go into effect at the start of the next dump.
 
-
 === Monitor ===
 
-One master machine runs the monitor process, which periodically sweeps all
-wikis for their current status. This accomplishes two tasks:
+One server runs the monitor process, which periodically sweeps all
+public dump directories (one per wiki) for their current status. This 
accomplishes two tasks:
 
 * The index page is updated with a summary of dump states
-* Aborted dumps are detected and cleaned up
+* Aborted dumps are detected and cleaned up (how complete is this?)
 
 A lock file that has not been touched in some time is detected as stale,
 indicating that the worker process holding the lock has died. The status
 for that dump can then be updated from running to stopped, and the lock
-file is removed so that the wiki will get redumped later.
+file is removed so that the wiki will get dumped again later.
 
+== Code ==
 
-== Code files ==
-
 worker.py
-- Runs a dump for the least-recently dumped wiki in the stack.
+- Runs a dump for the least-recently dumped wiki in the stack, or the desired 
wiki
+  can be specified from the command line
 
 monitor.py
 - Generates the site-wide index summary and removes stale locks.
@@ -47,7 +52,16 @@
 WikiDump.py
 - Shared classes and functions
 
+CommandManagement.py
+- Classes for running multiple commands at the same time, used for running 
some phases
+  of the dumps in multiple pieces at the same time, for speed
 
+mwbzutils/
+- Library of utilities for working with bzip2 files, used for locating
+  an arbitrary XML page in a dump file, checking that the file was written
+  out completely without truncation, and other tools.  See the README in
+  the directory for more details.
+ 
 == Configuration ==
 
 Configuration is done with an INI-style configuration file wikidump.conf.


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [95260] trunk/phase3/includes/Export.php

2011-08-22 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95260

Revision: 95260
Author:   ariel
Date: 2011-08-22 22:01:32 + (Mon, 22 Aug 2011)
Log Message:
---
add functions that support close and rename of output files as they are being 
written, used to write out checkpoint files at regular intervals during XML 
dump production

Modified Paths:
--
trunk/phase3/includes/Export.php

Modified: trunk/phase3/includes/Export.php
===
--- trunk/phase3/includes/Export.php2011-08-22 21:52:07 UTC (rev 95259)
+++ trunk/phase3/includes/Export.php2011-08-22 22:01:32 UTC (rev 95260)
@@ -354,6 +354,9 @@
  * @ingroup Dump
  */
 class XmlDumpWriter {
+   var $firstPageWritten = 0;
+   var $lastPageWritten = 0;
+   var $pageInProgress = 0;
 
/**
 * Returns the export schema version.
@@ -458,6 +461,7 @@
$title = Title::makeTitle( $row-page_namespace, 
$row-page_title );
$out .= '' . Xml::elementClean( 'title', array(), 
$title-getPrefixedText() ) . \n;
$out .= '' . Xml::element( 'id', array(), strval( 
$row-page_id ) ) . \n;
+   $this-pageInProgress = $row-page_id;
if ( $row-page_is_redirect ) {
$out .= '' . Xml::element( 'redirect', array() ) . 
\n;
}
@@ -478,6 +482,10 @@
 */
function closePage() {
return   /page\n;
+   if (! $this-firstPageWritten) {
+   $this-firstPageWritten = $this-pageInProgress;
+   }
+   $this-lastPageWritten = $this-pageInProgress;
}
 
/**
@@ -691,6 +699,18 @@
function write( $string ) {
print $string;
}
+
+   function closeRenameAndReopen( $newname ) {
+   return;
+   }
+
+   function rename( $newname ) {
+   return;
+   }
+
+   function getFilename() {
+   return NULL;
+   }
 }
 
 /**
@@ -699,14 +719,56 @@
  */
 class DumpFileOutput extends DumpOutput {
var $handle;
+   var $filename;
 
function __construct( $file ) {
$this-handle = fopen( $file, wt );
+   $this-filename = $file;
}
 
function write( $string ) {
fputs( $this-handle, $string );
}
+
+   /**
+* Close the old file, move it to a specified name, 
+* and reopen new file with the old name. Use this
+* for writing out a file in multiple pieces
+* at specified checkpoints (e.g. every n hours).
+*/
+   function closeRenameAndReopen( $newname ) {
+   if ( is_array($newname) ) {
+   if (count($newname)  1) {
+   WfDie(Export closeRenameAndReopen: passed 
multiple argumnts for rename of single file\n);
+   }
+   else {
+   $newname = $newname[0];
+   }
+   }
+   if ( $newname ) {
+   fclose( $this-handle );
+   rename( $this-filename, $newname );
+   $this-handle = fopen( $this-filename, wt );
+   }
+   }
+
+   function rename( $newname ) {
+   if ( is_array($newname) ) {
+   if (count($newname)  1) {
+   WfDie(Export closeRenameAndReopen: passed 
multiple argumnts for rename of single file\n);
+   }
+   else {
+   $newname = $newname[0];
+   }
+   }
+   if ( $newname ) {
+   rename( $this-filename, $newname );
+   }
+   }
+
+   function getFilename() {
+   return $this-filename;
+   }
 }
 
 /**
@@ -716,12 +778,52 @@
  * @ingroup Dump
  */
 class DumpPipeOutput extends DumpFileOutput {
+   var $command;
+
function __construct( $command, $file = null ) {
if ( !is_null( $file ) ) {
$command .= . wfEscapeShellArg( $file );
}
$this-handle = popen( $command, w );
+   $this-command = $command;
+   $this-filename = $file;
}
+
+   /**
+* Close the old file, move it to a specified name, 
+* and reopen new file with the old name. 
+*/
+   function closeRenameAndReopen( $newname ) {
+   if ( is_array($newname) ) {
+   if (count($newname)  1) {
+   WfDie(Export closeRenameAndReopen: passed 
multiple argumnts for rename of single file\n);
+   }
+   else {
+   $newname = $newname[0

[MediaWiki-CVS] SVN: [95272] trunk/phase3/maintenance/dumpTextPass.php

2011-08-22 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95272

Revision: 95272
Author:   ariel
Date: 2011-08-22 22:45:22 + (Mon, 22 Aug 2011)
Log Message:
---
add support for writing out checkpoint files of xml dump at regular intervals 
(close and rename file based on filename pattern which includes first and last 
page id written)

Modified Paths:
--
trunk/phase3/maintenance/dumpTextPass.php

Modified: trunk/phase3/maintenance/dumpTextPass.php
===
--- trunk/phase3/maintenance/dumpTextPass.php   2011-08-22 22:45:21 UTC (rev 
95271)
+++ trunk/phase3/maintenance/dumpTextPass.php   2011-08-22 22:45:22 UTC (rev 
95272)
@@ -59,10 +59,23 @@
 
var $ID = 0;
 
+   var $xmlwriterobj = false;
+
+   # when we spend more than maxTimeAllowed seconds on this run, we 
continue
+   # processing until we write out the next complete page, then save 
output file(s),
+   # rename it/them and open new one(s)
+   var $maxTimeAllowed = 0;  // 0 = no limit
+   var $timeExceeded = false;
+   var $firstPageWritten = false;
+   var $lastPageWritten = false;
+   var $checkpointJustWritten = false;
+   var $checkpointFiles = array();
+
function initProgress( $history ) {
parent::initProgress();
$this-ID = getmypid();
$this-lastTime = $this-startTime;
+   $this-timeOfCheckpoint = $this-startTime;
}
 
function dump( $history, $text = WikiExporter::TEXT ) {
@@ -80,6 +93,12 @@
 
$this-egress = new ExportProgressFilter( $this-sink, $this );
 
+   # it would be nice to do it in the constructor, oh well. need 
egress set
+   $this-finalOptionCheck();
+
+   # we only want this so we know how to close a stream :-P
+   $this-xmlwriterobj = new XmlDumpWriter();
+
$input = fopen( $this-input, rt );
$result = $this-readDump( $input );
 
@@ -106,6 +125,12 @@
case 'stub':
$this-input = $url;
break;
+   case 'maxtime':
+   $this-maxTimeAllowed = intval($val)*60;
+   break;
+   case 'checkpointfile':
+   $this-checkpointFiles[] = $val;
+   break;
case 'current':
$this-history = WikiExporter::CURRENT;
break;
@@ -204,6 +229,39 @@
}
}
 
+   function setTimeExceeded() {
+   $this-timeExceeded = True;
+   }
+
+   function checkIfTimeExceeded() {
+   if ( $this-maxTimeAllowed   ( $this-lastTime - 
$this-timeOfCheckpoint   $this-maxTimeAllowed ) ) {
+   return True;
+   }
+   return False;
+   }
+
+   function finalOptionCheck() {
+   if (($this-checkpointFiles  ! $this-maxTimeAllowed) ||
+   ($this-maxTimeAllowed  !$this-checkpointFiles)) {
+   wfDie(Options checkpointfile and maxtime must be 
specified together.\n);
+   }
+   foreach ($this-checkpointFiles as $checkpointFile) {
+   $count = substr_count ($checkpointFile,%s);
+   if (substr_count ($checkpointFile,%s) != 2) {
+   wfDie(Option checkpointfile must contain two 
'%s' for substitution of first and last pageids, count is $count instead, fil
+e is $checkpointFile.\n);
+   }
+   }
+
+   $filenameList = $this-egress-getFilename();
+   if (! is_array($filenameList)) {
+   $filenameList = array( $filenameList );
+   }
+   if (count($filenameList) != count($this-checkpointFiles)) {
+   wfDie(One checkpointfile must be specified for each 
output option, if maxtime is used.\n);
+   }
+   }
+
function readDump( $input ) {
$this-buffer = ;
$this-openElement = false;
@@ -222,6 +280,9 @@
$offset = 0; // for context extraction on error reporting
$bufferSize = 512 * 1024;
do {
+   if ($this-checkIfTimeExceeded()) {
+   $this-setTimeExceeded();
+   }
$chunk = fread( $input, $bufferSize );
if ( !xml_parse( $parser, $chunk, feof( $input ) ) ) {
wfDebug( TextDumpPass::readDump encountered 
XML parsing error\n );
@@ -229,6 +290,24 @@
}
$offset += strlen( $chunk );
} while ( $chunk !== false  !feof( $input ) );
+   if ($this-maxTimeAllowed) {
+   $filenameList

[MediaWiki-CVS] SVN: [95288] trunk/phase3/maintenance/dumpTextPass.php

2011-08-22 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95288

Revision: 95288
Author:   ariel
Date: 2011-08-23 00:04:45 + (Tue, 23 Aug 2011)
Log Message:
---
fix a couple bad lines in previous commit from bad merge attempt

Modified Paths:
--
trunk/phase3/maintenance/dumpTextPass.php

Modified: trunk/phase3/maintenance/dumpTextPass.php
===
--- trunk/phase3/maintenance/dumpTextPass.php   2011-08-22 23:56:23 UTC (rev 
95287)
+++ trunk/phase3/maintenance/dumpTextPass.php   2011-08-23 00:04:45 UTC (rev 
95288)
@@ -222,7 +222,6 @@
$this-progress( sprintf( %s: %s (ID %d) %d pages 
(%0.1f|%0.1f/sec all|curr), %d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% 
prefetched (all|curr), ETA %s [max %d],-
$now, wfWikiID(), $this-ID, 
$this-pageCount, $pageRate, $pageRatePart, $this-revCount, $revRate, 
$revRatePart, $fetchRate, $fetchRatePart, $etats, $this-maxCount ) );
$this-lastTime = $now;
-   $this-partCountLast = $this-partCount;
$this-revCountLast = $this-revCount;
$this-prefetchCountLast = $this-prefetchCount;
$this-fetchCountLast = $this-fetchCount;
@@ -248,8 +247,7 @@
foreach ($this-checkpointFiles as $checkpointFile) {
$count = substr_count ($checkpointFile,%s);
if (substr_count ($checkpointFile,%s) != 2) {
-   wfDie(Option checkpointfile must contain two 
'%s' for substitution of first and last pageids, count is $count instead, fil
-e is $checkpointFile.\n);
+   wfDie(Option checkpointfile must contain two 
'%s' for substitution of first and last pageids, count is $count instead, file 
is $checkpointFile.\n);
}
}
 


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [95290] trunk/phase3/maintenance/dumpTextPass.php

2011-08-22 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/95290

Revision: 95290
Author:   ariel
Date: 2011-08-23 00:36:15 + (Tue, 23 Aug 2011)
Log Message:
---
fix timestamp stuff, more fallout from bad merge attempt

Modified Paths:
--
trunk/phase3/maintenance/dumpTextPass.php

Modified: trunk/phase3/maintenance/dumpTextPass.php
===
--- trunk/phase3/maintenance/dumpTextPass.php   2011-08-23 00:11:13 UTC (rev 
95289)
+++ trunk/phase3/maintenance/dumpTextPass.php   2011-08-23 00:36:15 UTC (rev 
95290)
@@ -181,6 +181,7 @@
 
if ( $this-reporting ) {
$now = wfTimestamp( TS_DB );
+   $nowts = wfTime();
$deltaAll = wfTime() - $this-startTime;
$deltaPart = wfTime() - $this-lastTime;
$this-pageCountPart = $this-pageCount - 
$this-pageCountLast;
@@ -221,7 +222,7 @@
}
$this-progress( sprintf( %s: %s (ID %d) %d pages 
(%0.1f|%0.1f/sec all|curr), %d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% 
prefetched (all|curr), ETA %s [max %d],-
$now, wfWikiID(), $this-ID, 
$this-pageCount, $pageRate, $pageRatePart, $this-revCount, $revRate, 
$revRatePart, $fetchRate, $fetchRatePart, $etats, $this-maxCount ) );
-   $this-lastTime = $now;
+   $this-lastTime = $nowts;
$this-revCountLast = $this-revCount;
$this-prefetchCountLast = $this-prefetchCount;
$this-fetchCountLast = $this-fetchCount;
@@ -233,6 +234,10 @@
}
 
function checkIfTimeExceeded() {
+   $m1 = $this-maxTimeAllowed;
+   $m2 = $this-lastTime; 
+   $m3 =  $this-timeOfCheckpoint;
+   $m4 = $this-lastTime - $this-timeOfCheckpoint;
if ( $this-maxTimeAllowed   ( $this-lastTime - 
$this-timeOfCheckpoint   $this-maxTimeAllowed ) ) {
return True;
}


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [93307] branches/ariel/xmldumps-backup/worker.py

2011-07-27 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/93307

Revision: 93307
Author:   ariel
Date: 2011-07-27 20:19:51 + (Wed, 27 Jul 2011)
Log Message:
---
get rid of last renameFile call (oops)

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-07-27 20:06:27 UTC (rev 
93306)
+++ branches/ariel/xmldumps-backup/worker.py2011-07-27 20:19:51 UTC (rev 
93307)
@@ -1908,7 +1908,7 @@
p.runPipelineAndGetOutput()
if not p.exitedSuccessfully():
runner.logAndPrint(file %s is 
truncated, moving out of the way %f )
-   os.renameFile( f,  f + 
.truncated )
+   os.rename( f,  f + .truncated 
)
return 1
return 0
 


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [92610] branches/ariel/xmldumps-backup/worker.py

2011-07-20 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92610

Revision: 92610
Author:   ariel
Date: 2011-07-20 07:23:56 + (Wed, 20 Jul 2011)
Log Message:
---
add a few more enabled flags, get rid of a few more checks for dryrun etc

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-07-20 04:28:48 UTC (rev 
92609)
+++ branches/ariel/xmldumps-backup/worker.py2011-07-20 07:23:56 UTC (rev 
92610)
@@ -1021,7 +1021,6 @@
return os.path.join(self.wiki.publicDir(), self.date);
 
 class Runner(object):
-
def __init__(self, wiki, date=None, prefetch=True, spawn=True, 
job=None, restart=False, notice=, dryrun = False, loggingEnabled=False, 
chunkToDo = False):
self.wiki = wiki
self.dbName = wiki.dbName
@@ -1029,17 +1028,22 @@
self.spawn = spawn
self.chunkInfo = Chunk(wiki, self.dbName, self.logAndPrint)
self.restart = restart
-   self.loggingEnabled = loggingEnabled
self.htmlNoticeFile = None
self.log = None
self.dryrun = dryrun
self._chunkToDo = chunkToDo
+
+   self._loggingEnabled = loggingEnabled
self._statusEnabled = True
self._checksummerEnabled = True
self._runInfoFileEnabled = True
self._symLinksEnabled = True
self._feedsEnabled = True
self._noticeFileEnabled = True
+   self._makeDirEnabled = True
+   self._cleanOldDumpsEnabled = True
+   self._cleanupOldFilesEnabled = False
+   self._checkForTruncatedFilesEnabled = True
 
if self.dryrun or self._chunkToDo:
self._statusEnabled = False
@@ -1048,8 +1052,13 @@
self._symLinksEnabled = False
self._feedsEnabled = False
self._noticeFileEnabled = False
+   self._makeDirEnabled = False
+   self._cleanOldDumpsEnabled = False
+   self._cleanupOldFilesEnables = False
+
if self.dryrun:
-   self.loggingEnabled = False
+   self._loggingEnabled = False
+   self._checkForTruncatedFilesEnabled = False
 
if date:
# Override, continuing a past dump?
@@ -1065,7 +1074,7 @@
self.lastFailed = False
 
# these must come after the dumpdir setup so we know which 
directory we are in 
-   if (loggingEnabled):
+   if (self._loggingEnabled and self._makeDirEnabled):
self.logFileName = 
self.dumpDir.publicPath(self.wiki.config.logFile)
self.makeDir(join(self.wiki.publicDir(), self.date))
self.log = Logger(self.logFileName)
@@ -1088,7 +1097,7 @@
done = log.doJobOnLogQueue()

def logAndPrint(self, message):
-   if hasattr(self,'log') and self.log and not self.dryrun:
+   if hasattr(self,'log') and self.log and self._loggingEnabled:
self.log.addToLogQueue(%s\n % message)
print message
 
@@ -1098,9 +1107,8 @@
else:
return 
 
-   def remove(self, filename):
-   if not self.dryrun:
-   os.remove(filename)
+   def removeFile(self, filename):
+   os.remove(filename)
 
# returns 0 on success, 1 on error
def saveTable(self, table, outfile):
@@ -1224,9 +1232,8 @@
# mark all the following jobs to run as well 
self.dumpItemList.markFollowingJobsToRun()
 
-   if not self.dryrun:
-   self.makeDir(join(self.wiki.publicDir(), self.date))
-   self.makeDir(join(self.wiki.privateDir(), self.date))
+   self.makeDir(join(self.wiki.publicDir(), self.date))
+   self.makeDir(join(self.wiki.privateDir(), self.date))
 
if (self.restart):
self.logAndPrint(Preparing for restart from job %s of 
%s % (self.jobRequested, self.dbName))
@@ -1250,12 +1257,12 @@
except Exception, ex:
self.debug(*** exception!  + 
str(ex))
item.setStatus(failed)
-   if item.status() == failed and not 
self.dryrun and not self._chunkToDo:
+   if item.status() == failed

[MediaWiki-CVS] SVN: [92524] branches/ariel/xmldumps-backup/worker.py

2011-07-19 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92524

Revision: 92524
Author:   ariel
Date: 2011-07-19 11:36:21 + (Tue, 19 Jul 2011)
Log Message:
---
use hashlib instead of deprecated md5 module

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-07-19 11:30:38 UTC (rev 
92523)
+++ branches/ariel/xmldumps-backup/worker.py2011-07-19 11:36:21 UTC (rev 
92524)
@@ -1,7 +1,7 @@
 # Worker process, does the actual dumping
 
 import getopt
-import md5
+import hashlib
 import os
 import popen2
 import re
@@ -760,7 +760,7 @@
return 
(self.dumpDir.publicPath(self.getChecksumFileNameBasename() + . + 
self.timestamp + .tmp))
 
def _md5File(self, filename):
-   summer = md5.new()
+   summer = hashlib.md5()
infile = file(filename, rb)
bufsize = 4192 * 32
buffer = infile.read(bufsize)


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [92536] branches/ariel/xmldumps-backup/worker.py

2011-07-19 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92536

Revision: 92536
Author:   ariel
Date: 2011-07-19 14:37:05 + (Tue, 19 Jul 2011)
Log Message:
---
copy over partial md5 results after every job, so users who download files 
before the entire dump is done can check file integrity

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-07-19 14:30:50 UTC (rev 
92535)
+++ branches/ariel/xmldumps-backup/worker.py2011-07-19 14:37:05 UTC (rev 
92536)
@@ -747,6 +747,13 @@
realFileName = self._getChecksumFileName()
os.rename(tmpFileName, realFileName)
 
+   def cpMd5TmpFileToPermFile(self):
+   if (self._enabled):
+   tmpFileName = self._getChecksumFileNameTmp()
+   realFileName = self._getChecksumFileName()
+   text = FileUtils.readFile(tmpFileName)
+   FileUtils.writeFile(self._getMd5FileDirName(), 
realFileName, text, self.wiki.config.fileperms)
+
def getChecksumFileNameBasename(self):
return (md5sums.txt)
 
@@ -780,6 +787,9 @@
checksum = self._md5FileLine(path)
output.write(checksum)
 
+   def _getMd5FileDirName(self):
+   return os.path.join(self.wiki.publicDir(), self.wiki.date);
+
 class DumpDir(object):
def __init__(self, wiki, dbName, date):
self._wiki = wiki
@@ -1281,6 +1291,7 @@
else:
if not self.dryrun and not 
self._chunkToDo:
self.runUpdateItemFileInfo(item)
+   
self.checksums.cpMd5TmpFileToPermFile()
self.lastFailed = False
 
self.status.updateStatusFiles(done)


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [92543] branches/ariel/xmldumps-backup/worker.py

2011-07-19 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92543

Revision: 92543
Author:   ariel
Date: 2011-07-19 17:25:01 + (Tue, 19 Jul 2011)
Log Message:
---
clean up use of the config var

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-07-19 17:13:26 UTC (rev 
92542)
+++ branches/ariel/xmldumps-backup/worker.py2011-07-19 17:25:01 UTC (rev 
92543)
@@ -69,19 +69,20 @@
def __init__(self, wiki, dbName, errorCallback = None):
 
self._dbName = dbName
-   self._chunksEnabled = wiki.config.chunksEnabled
+   self.wiki = wiki
+   self._chunksEnabled = self.wiki.config.chunksEnabled
if (self._chunksEnabled):
-   self._pagesPerChunkHistory = 
self.convertCommaSepLineToNumbers(wiki.config.pagesPerChunkHistory)
-   self._revsPerChunkHistory = 
self.convertCommaSepLineToNumbers(wiki.config.revsPerChunkHistory)
-   self._pagesPerChunkAbstract = 
self.convertCommaSepLineToNumbers(wiki.config.pagesPerChunkAbstract)
-   self._recombineHistory = wiki.config.recombineHistory
+   self._pagesPerChunkHistory = 
self.convertCommaSepLineToNumbers(self.wiki.config.pagesPerChunkHistory)
+   self._revsPerChunkHistory = 
self.convertCommaSepLineToNumbers(self.wiki.config.revsPerChunkHistory)
+   self._pagesPerChunkAbstract = 
self.convertCommaSepLineToNumbers(self.wiki.config.pagesPerChunkAbstract)
+   self._recombineHistory = 
self.wiki.config.recombineHistory
else:
self._pagesPerChunkHistory = False
self._revsPerChunkHistory = False
self._pagesPerChunkAbstract = False
self._recombineHistory = False
if (self._chunksEnabled):
-   self.Stats = PageAndEditStats(wiki,dbName, 
errorCallback)
+   self.Stats = PageAndEditStats(self.wiki,dbName, 
errorCallback)
if (not self.Stats.totalEdits or not 
self.Stats.totalPages):
raise BackupError(Failed to get DB stats, 
exiting)
if (self._revsPerChunkHistory):
@@ -155,17 +156,17 @@
 
 class DbServerInfo(object):
def __init__(self, wiki, dbName, errorCallback = None):
-   self.config = wiki.config
+   self.wiki = wiki
self.dbName = dbName
self.errorCallback = errorCallback
self.selectDatabaseServer()
 
def defaultServer(self):
# if this fails what do we do about it? Not a bleeping thing. 
*ugh* FIXME!!
-   if (not exists( self.config.php ) ):
-   raise BackupError(php command %s not found % 
self.config.php);
+   if (not exists( self.wiki.config.php ) ):
+   raise BackupError(php command %s not found % 
self.wiki.config.php);
command = %s -q %s/maintenance/getSlaveServer.php --wiki=%s 
--group=dump % MiscUtils.shellEscape((
-   self.config.php, self.config.wikiDir, self.dbName))
+   self.wiki.config.php, self.wiki.config.wikiDir, 
self.dbName))
return RunSimpleCommand.runAndReturn(command, 
self.errorCallback).strip()
 
def selectDatabaseServer(self):
@@ -173,12 +174,12 @@
 
def buildSqlCommand(self, query, pipeto = None):
Put together a command to execute an sql query to the server 
for this DB.
-   if (not exists( self.config.mysql ) ):
-   raise BackupError(mysql command %s not found % 
self.config.mysql);
+   if (not exists( self.wiki.config.mysql ) ):
+   raise BackupError(mysql command %s not found % 
self.wiki.config.mysql);
command = [ [ /bin/echo, %s % query ], 
-   [ %s % self.config.mysql, -h, 
+   [ %s % self.wiki.config.mysql, -h, 
  %s % self.dbServer,
- -u, %s % self.config.dbUser,
+ -u, %s % self.wiki.config.dbUser,
  %s % self.passwordOption(),
  %s % self.dbName, 
  -r ] ]
@@ -189,11 +190,11 @@
def buildSqlDumpCommand(self, table, pipeto = None):
Put together a command to dump a table from the current DB 
with mysqldump
and save to a gzipped sql file.
-   if (not exists( self.config.mysqldump ) ):
-   raise BackupError(mysqldump command %s not found

[MediaWiki-CVS] SVN: [92427] branches/ariel/xmldumps-backup/WikiDump.py

2011-07-18 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92427

Revision: 92427
Author:   ariel
Date: 2011-07-18 12:48:58 + (Mon, 18 Jul 2011)
Log Message:
---
class name before method invocation

Modified Paths:
--
branches/ariel/xmldumps-backup/WikiDump.py

Modified: branches/ariel/xmldumps-backup/WikiDump.py
===
--- branches/ariel/xmldumps-backup/WikiDump.py  2011-07-18 12:29:53 UTC (rev 
92426)
+++ branches/ariel/xmldumps-backup/WikiDump.py  2011-07-18 12:48:58 UTC (rev 
92427)
@@ -326,7 +326,7 @@
try:
# tack on the file mtime so that if we 
have multiple wikis
# dumped on the same day, they get 
ordered properly
-   date = int(today()) - int(last)
+   date = int(TimeUtils.today()) - 
int(last)
age = FileUtils.fileAge(dumpStatus)
status = FileUtils.readFile(dumpStatus)
except:


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [92370] branches/ariel/xmldumps-backup

2011-07-16 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92370

Revision: 92370
Author:   ariel
Date: 2011-07-16 17:15:10 + (Sat, 16 Jul 2011)
Log Message:
---
allow per project items in conf file; formatting cleanup; bug from previous 
commit, wrong indentation

Modified Paths:
--
branches/ariel/xmldumps-backup/WikiDump.py
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/WikiDump.py
===
--- branches/ariel/xmldumps-backup/WikiDump.py  2011-07-16 16:56:09 UTC (rev 
92369)
+++ branches/ariel/xmldumps-backup/WikiDump.py  2011-07-16 17:15:10 UTC (rev 
92370)
@@ -127,10 +127,12 @@
 
 class Config(object):
def __init__(self, configFile=False):
+   self.projectName = False
+
home = os.path.dirname(sys.argv[0])
if (not configFile):
configFile = wikidump.conf
-   files = [
+   self.files = [
os.path.join(home,configFile),
/etc/wikidump.conf,
os.path.join(os.getenv(HOME), .wikidump.conf)]
@@ -193,97 +195,123 @@
# whether or not to recombine the history pieces
recombineHistory : 1,
}
-   conf = ConfigParser.SafeConfigParser(defaults)
-   conf.read(files)
+   self.conf = ConfigParser.SafeConfigParser(defaults)
+   self.conf.read(self.files)

-   if not conf.has_section(wiki):
+   if not self.conf.has_section(wiki):
print The mandatory configuration section 'wiki' was 
not defined.
raise ConfigParser.NoSectionError('wiki')
 
-   if not conf.has_option(wiki,dir):
+   if not self.conf.has_option(wiki,dir):
print The mandatory setting 'dir' in the section 
'wiki' was not defined.
raise ConfigParser.NoOptionError('wiki','dir')
 
-   self.dbList = MiscUtils.dbList(conf.get(wiki, dblist))
-   self.skipDbList = MiscUtils.dbList(conf.get(wiki, 
skipdblist))
-   self.privateList = MiscUtils.dbList(conf.get(wiki, 
privatelist))
-   self.bigList = MiscUtils.dbList(conf.get(wiki, biglist))
-   self.flaggedRevsList = MiscUtils.dbList(conf.get(wiki, 
flaggedrevslist))
-   self.wikiDir = conf.get(wiki, dir)
-   self.forceNormal = conf.getint(wiki, forceNormal)
-   self.halt = conf.getint(wiki, halt)
+   self.parseConfFileGlobally()
+   self.parseConfFilePerProject()
 
+   def parseConfFileGlobally(self):
+   self.dbList = MiscUtils.dbList(self.conf.get(wiki, dblist))
+   self.skipDbList = MiscUtils.dbList(self.conf.get(wiki, 
skipdblist))
+   self.privateList = MiscUtils.dbList(self.conf.get(wiki, 
privatelist))
+   self.bigList = MiscUtils.dbList(self.conf.get(wiki, 
biglist))
+   self.flaggedRevsList = MiscUtils.dbList(self.conf.get(wiki, 
flaggedrevslist))
+   self.wikiDir = self.conf.get(wiki, dir)
+   self.forceNormal = self.conf.getint(wiki, forceNormal)
+   self.halt = self.conf.getint(wiki, halt)
+
self.dbList = list(set(self.dbList) - set(self.skipDbList))
 
-   if not conf.has_section('output'):
-   conf.add_section('output')
-   self.publicDir = conf.get(output, public)
-   self.privateDir = conf.get(output, private)
-   self.webRoot = conf.get(output, webroot)
-   self.index = conf.get(output, index)
-   self.templateDir = conf.get(output, templateDir)
-   self.perDumpIndex = conf.get(output, perdumpindex)
-   self.logFile = conf.get(output, logfile)
-   self.fileperms = conf.get(output, fileperms)
+   if not self.conf.has_section('output'):
+   self.conf.add_section('output')
+   self.publicDir = self.conf.get(output, public)
+   self.privateDir = self.conf.get(output, private)
+   self.webRoot = self.conf.get(output, webroot)
+   self.index = self.conf.get(output, index)
+   self.templateDir = self.conf.get(output, templateDir)
+   self.perDumpIndex = self.conf.get(output, perdumpindex)
+   self.logFile = self.conf.get(output, logfile)
+   self.fileperms = self.conf.get(output, fileperms)
self.fileperms = int(self.fileperms,0)
-   if not conf.has_section('reporting'):
-   conf.add_section('reporting')
-   self.adminMail = conf.get(reporting, adminmail)
-   self.mailFrom = conf.get(reporting

[MediaWiki-CVS] SVN: [92230] branches/ariel/xmldumps-backup/worker.py

2011-07-15 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92230

Revision: 92230
Author:   ariel
Date: 2011-07-15 07:53:31 + (Fri, 15 Jul 2011)
Log Message:
---
check for existence of various commands, exception if missing (thanks to reeve 
of irc for noticing the issue)

Modified Paths:
--
branches/ariel/xmldumps-backup/worker.py

Modified: branches/ariel/xmldumps-backup/worker.py
===
--- branches/ariel/xmldumps-backup/worker.py2011-07-15 07:42:20 UTC (rev 
92229)
+++ branches/ariel/xmldumps-backup/worker.py2011-07-15 07:53:31 UTC (rev 
92230)
@@ -160,6 +160,8 @@
 
def defaultServer(self):
# if this fails what do we do about it? Not a bleeping thing. 
*ugh* FIXME!!
+   if (not exists( self.config.php ) ):
+   raise BackupError(php command %s not found % 
self.config.php);
command = %s -q %s/maintenance/getSlaveServer.php --wiki=%s 
--group=dump % MiscUtils.shellEscape((
self.config.php, self.config.wikiDir, self.dbName))
return RunSimpleCommand.runAndReturn(command, 
self.errorCallback).strip()
@@ -169,6 +171,8 @@
 
def buildSqlCommand(self, query, pipeto = None):
Put together a command to execute an sql query to the server 
for this DB.
+   if (not exists( self.config.mysql ) ):
+   raise BackupError(mysql command %s not found % 
self.config.mysql);
command = [ [ /bin/echo, %s % query ], 
[ %s % self.config.mysql, -h, 
  %s % self.dbServer,
@@ -183,6 +187,8 @@
def buildSqlDumpCommand(self, table, pipeto = None):
Put together a command to dump a table from the current DB 
with mysqldump
and save to a gzipped sql file.
+   if (not exists( self.config.mysqldump ) ):
+   raise BackupError(mysqldump command %s not found % 
self.config.mysqldump);
command = [ [ %s % self.config.mysqldump, -h, 
   %s % self.dbServer, -u, 
   %s % self.config.dbUser, 
@@ -215,6 +221,8 @@
def getDBTablePrefix(self):
Get the prefix for all tables for the specific wiki 
($wgDBprefix)
# FIXME later full path
+   if (not exists( self.config.php ) ):
+   raise BackupError(php command %s not found % 
self.config.php);
command = echo 'print $wgDBprefix; ' | %s -q 
%s/maintenance/eval.php --wiki=%s % MiscUtils.shellEscape((
self.config.php, self.config.wikiDir, self.dbName))
return RunSimpleCommand.runAndReturn(command, 
self.errorCallback).strip()
@@ -953,11 +961,15 @@
# returns 0 on success, 1 on error
def saveTable(self, table, outfile):
Dump a table from the current DB with mysqldump, save to a 
gzipped sql file.
+   if (not exists( self.config.gzip ) ):
+   raise BackupError(gzip command %s not found % 
self.config.gzip);
commands = self.dbServerInfo.buildSqlDumpCommand(table, 
self.config.gzip)
return self.saveCommand(commands, outfile)
 
def saveSql(self, query, outfile):
Pass some SQL commands to the server for this DB and save 
output to a gzipped file.
+   if (not exists( self.config.gzip ) ):
+   raise BackupError(gzip command %s not found % 
self.config.gzip);
command = self.dbServerInfo.buildSqlCommand(query, 
self.config.gzip)
return self.saveCommand(command, outfile)
 
@@ -1354,8 +1366,14 @@
 outputFilename = runner.dumpDir.publicPath(outputFileBasename)
chunkNum = 0
recombines = []
+   if (not exists( runner.config.head ) ):
+   raise BackupError(head command %s not found % 
runner.config.head);
head = runner.config.head
+   if (not exists( runner.config.tail ) ):
+   raise BackupError(tail command %s not found % 
runner.config.tail);
tail = runner.config.tail
+   if (not exists( runner.config.grep ) ):
+   raise BackupError(grep command %s not found % 
runner.config.grep);
grep = runner.config.grep
 
# we assume the result is always going to be run in a subshell. 
@@ -1486,6 +1504,8 @@
current = self.buildCurrentOutputFilename(runner, chunk)
articles = self.buildArticlesOutputFilename(runner, chunk)
 
+   if (not exists( runner.config.php ) ):
+   raise BackupError(php command %s not found % 
runner.config.php);
command = [ %s % runner.config.php

[MediaWiki-CVS] SVN: [92144] branches/ariel/xmldumps-backup/mwbzutils

2011-07-14 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92144

Revision: 92144
Author:   ariel
Date: 2011-07-14 08:35:29 + (Thu, 14 Jul 2011)
Log Message:
---
version bump; for finding pageid in xml file, workaround for pages with giant 
cumulative rev text (*cough en pedia pageid 3976790), uses api (relatively 
fast) with fallback to stub file (much slower but not nearly as slow as a 
straight decompress and read)

Modified Paths:
--
branches/ariel/xmldumps-backup/mwbzutils/Makefile
branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c
branches/ariel/xmldumps-backup/mwbzutils/mwbzlib.c
branches/ariel/xmldumps-backup/mwbzutils/mwbzutils.h

Added Paths:
---
branches/ariel/xmldumps-backup/mwbzutils/httptiny.c

Modified: branches/ariel/xmldumps-backup/mwbzutils/Makefile
===
--- branches/ariel/xmldumps-backup/mwbzutils/Makefile   2011-07-14 07:00:25 UTC 
(rev 92143)
+++ branches/ariel/xmldumps-backup/mwbzutils/Makefile   2011-07-14 08:35:29 UTC 
(rev 92144)
@@ -34,8 +34,8 @@
 dumplastbz2block: $(OBJSBZ) mwbzlib.o dumplastbz2block.o
$(CC) $(CFLAGS) $(LDFLAGS) -o dumplastbz2block dumplastbz2block.o 
mwbzlib.o  $(OBJSBZ) -lbz2
 
-findpageidinbz2xml: $(OBJSBZ) mwbzlib.o findpageidinbz2xml.o
-   $(CC) $(CFLAGS) $(LDFLAGS) -o findpageidinbz2xml findpageidinbz2xml.o 
mwbzlib.o $(OBJSBZ) -lbz2
+findpageidinbz2xml: $(OBJSBZ) mwbzlib.o httptiny.o findpageidinbz2xml.o
+   $(CC) $(CFLAGS) $(LDFLAGS) -o findpageidinbz2xml findpageidinbz2xml.o 
httptiny.o mwbzlib.o $(OBJSBZ) -lbz2 -lz
 
 checkforbz2footer: $(OBJSBZ) mwbzlib.o checkforbz2footer.o
$(CC) $(CFLAGS) $(LDFLAGS) -o checkforbz2footer checkforbz2footer.o 
mwbzlib.o $(OBJSBZ) -lbz2
@@ -62,6 +62,8 @@
$(CC) $(CFLAGS) -c bzlibfuncs.c
 mwbzlib.o: mwbzlib.c bzlib.h bzlib_private.h mwbzutils.h
$(CC) $(CFLAGS) -c mwbzlib.c
+httptiny.o: httptiny.c
+   $(CC) $(CFLAGS) -c httptiny.c
 dumplastbz2block.o: dumplastbz2block.c
$(CC) $(CFLAGS) -c dumplastbz2block.c
 findpageidinbz2xml.o: findpageidinbz2xml.c
@@ -73,7 +75,7 @@
 
 distclean: clean
 
-DISTNAME=mwbzutils-0.0.1
+DISTNAME=mwbzutils-0.0.2
 dist: 
rm -f $(DISTNAME)
ln -s -f . $(DISTNAME)
@@ -82,6 +84,7 @@
   $(DISTNAME)/findpageidinbz2xml.c \
   $(DISTNAME)/checkforbz2footer.c \
   $(DISTNAME)/dumpbz2filefromoffset.c \
+  $(DISTNAME)/httptiny.c \
   $(DISTNAME)/mwbzlib.c \
   $(DISTNAME)/mwbzutils.h \
   $(DISTNAME)/bzlibfuncs.c \

Modified: branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c
===
--- branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c   
2011-07-14 07:00:25 UTC (rev 92143)
+++ branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c   
2011-07-14 08:35:29 UTC (rev 92144)
@@ -1,4 +1,5 @@
 #include unistd.h
+#include getopt.h
 #include stdio.h
 #include string.h
 #include sys/types.h
@@ -9,9 +10,9 @@
 #include sys/types.h
 #include regex.h
 #include inttypes.h
+#include zlib.h
 #include mwbzutils.h
 
-
 /* 
find the first bz2 block marker in the file, 
from its current position,
@@ -23,6 +24,13 @@
 int init_and_read_first_buffer_bz2_file(bz_info_t *bfile, int fin) {
   int res;
 
+  bfile-bufin_size = BUFINSIZE;
+  bfile-marker = init_marker();
+  bfile-bytes_read = 0;
+  bfile-bytes_written = 0;
+  bfile-eof = 0;
+  bfile-file_size = get_file_size(fin);
+
   bfile-initialized++;
 
   res = find_next_bz2_block_marker(fin, bfile, FORWARD);
@@ -32,35 +40,244 @@
 setup_first_buffer_to_decompress(fin, bfile);
 return(0);
   }
+  else {
+fprintf(stderr,failed to find the next frigging block marker\n);
+return(-1);
+  }
+}
+
+extern char * geturl(char *hostname, int port, char *url);
+
+char *get_hostname_from_xml_header(int fin) {
+  int res;
+  regmatch_t *match_base_expr;
+  regex_t compiled_base_expr;
+  /*basehttp://el.wiktionary.org/wiki/.../base */
+  /*  basehttp://trouble.localdomain/wiki/ */
+  char *base_expr = basehttp://([^/]+)/; 
+  int length=5000; /* output buffer size */
+
+  buf_info_t *b;
+  bz_info_t bfile;
+
+  int hostname_length = 0;
+
+  off_t old_position, seek_result;
+  static char hostname[256];
+
+  bfile.initialized = 0;
+
+  res = regcomp(compiled_base_expr, base_expr, REG_EXTENDED);
+  match_base_expr = (regmatch_t *)malloc(sizeof(regmatch_t)*2);
+
+  b = init_buffer(length);
+  bfile.bytes_read = 0;
+
+  bfile.position = (off_t)0;
+  old_position = lseek(fin,(off_t)0,SEEK_CUR);
+  seek_result = lseek(fin,(off_t)0,SEEK_SET);
+
+  while ((get_buffer_of_uncompressed_data(b, fin, bfile, FORWARD)=0)  (! 
bfile.eof)) {
+/* so someday the header might grow enough that base isn't in the first 
1000 characters but we'll ignore that for now */
+if (bfile.bytes_read  b-bytes_avail  1000) {
+  /* get project name

[MediaWiki-CVS] SVN: [92149] trunk/extensions/Renameuser/renameUserCleanup.php

2011-07-14 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92149

Revision: 92149
Author:   ariel
Date: 2011-07-14 11:07:41 + (Thu, 14 Jul 2011)
Log Message:
---
search for older format log entries in case the new format ones aren't there, 
clean up check for existence

Modified Paths:
--
trunk/extensions/Renameuser/renameUserCleanup.php

Modified: trunk/extensions/Renameuser/renameUserCleanup.php
===
--- trunk/extensions/Renameuser/renameUserCleanup.php   2011-07-14 10:47:56 UTC 
(rev 92148)
+++ trunk/extensions/Renameuser/renameUserCleanup.php   2011-07-14 11:07:41 UTC 
(rev 92149)
@@ -67,12 +67,29 @@
 ),
__METHOD__
);
-   if (! $result ) {
-   print(No log entry found for a rename of 
.$olduser-getName(). to .$newuser-getName()., giving up\n);
-   exit(1);
+   if (! $result || ! $result-numRows() ) {
+   // try the old format
+   $result = $dbr-select( 'logging', '*',
+   array( 'log_type' = 'renameuser',
+   'log_action'= 'renameuser',
+   'log_title' = $olduser-getName(),
+),
+   __METHOD__
+   );
+   if (! $result ||  ! $result-numRows() ) {
+   print(No log entry found for a rename of 
.$olduser-getName(). to .$newuser-getName()., giving up\n);
+   exit(1);
+   }
+   else {
+   foreach ( $result as $row ) {
+   print(Found possible log entry of the 
rename, please check: .$row-log_title. with comment .$row-log_comment. on 
$row-log_timestamp\n);
+   }
+   }
}
-   foreach ( $result as $row ) {
-   print(Found log entry of the rename: 
.$olduser-getName(). to .$newuser-getName(). on $row-log_timestamp\n);
+   else {
+   foreach ( $result as $row ) {
+   print(Found log entry of the rename: 
.$olduser-getName(). to .$newuser-getName(). on $row-log_timestamp\n);
+   }
}
if ($result-numRows()  1) {
print(More than one rename entry found in the log, not 
sure what to do. Continue anyways? [N/y]  );
@@ -91,7 +108,7 @@
$this-updateTable('logging', 'log_user_text', 'log_user', 
'log_timestamp', $olduser, $newuser, $dbw);
$this-updateTable('image', 'img_user_text', 'img_user', 
'img_timestamp', $olduser, $newuser, $dbw);
$this-updateTable('oldimage', 'oi_user_text', 'oi_user', 
'oi_timestamp', $olduser, $newuser, $dbw);
-# FIXME: updateTable('filearchive', 'fa_user_text','fa_user', 'fa_timestamp', 
$olduser, $newuser, $dbw);  (not indexed yet)
+   $this-updateTable('filearchive', 'fa_user_text','fa_user', 
'fa_timestamp', $olduser, $newuser, $dbw);
print Done!\n;
exit(0);
}


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [92154] branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c

2011-07-14 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/92154

Revision: 92154
Author:   ariel
Date: 2011-07-14 11:48:42 + (Thu, 14 Jul 2011)
Log Message:
---
this time without the debugging values for frequency of api calls

Modified Paths:
--
branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c

Modified: branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c
===
--- branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c   
2011-07-14 11:42:14 UTC (rev 92153)
+++ branches/ariel/xmldumps-backup/mwbzutils/findpageidinbz2xml.c   
2011-07-14 11:48:42 UTC (rev 92154)
@@ -336,8 +336,7 @@
   at least one rev id in there.  20 million / 5000 or whatever it is, 
is 4000 buffers full of crap
   hopefully that doesn't take forever. 
*/
-   /*  if (buffer_count(2000/BUFINSIZE)  rev_id) { */
-   if (buffer_count3  rev_id) { 
+   if (buffer_count(2000/BUFINSIZE)  rev_id) {
  if (use_api) {
page_id_found = get_page_id_from_rev_id_via_api(rev_id, fin);
  }


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


[MediaWiki-CVS] SVN: [91967] trunk/phase3

2011-07-12 Thread ariel
http://www.mediawiki.org/wiki/Special:Code/MediaWiki/91967

Revision: 91967
Author:   ariel
Date: 2011-07-12 15:01:58 + (Tue, 12 Jul 2011)
Log Message:
---
libxml = 2.7.3 ha a 10mb cap on the size of a text node and the 
LIBXML_PARSEHUGE lets us override that, needed for lucid since there are a few 
revs in the db larger than that limit

Modified Paths:
--
trunk/phase3/includes/Import.php
trunk/phase3/maintenance/backupPrefetch.inc

Modified: trunk/phase3/includes/Import.php
===
--- trunk/phase3/includes/Import.php2011-07-12 14:58:58 UTC (rev 91966)
+++ trunk/phase3/includes/Import.php2011-07-12 15:01:58 UTC (rev 91967)
@@ -45,7 +45,12 @@
 
stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' 
);
$id = UploadSourceAdapter::registerSource( $source );
-   $this-reader-open( uploadsource://$id );
+   if (defined( 'LIBXML_PARSEHUGE' ) ) {
+   $this-reader-open( uploadsource://$id, null, 
LIBXML_PARSEHUGE );
+   }
+   else {
+   $this-reader-open( uploadsource://$id );
+   }
 
// Default callbacks
$this-setRevisionCallback( array( $this, importRevision ) );

Modified: trunk/phase3/maintenance/backupPrefetch.inc
===
--- trunk/phase3/maintenance/backupPrefetch.inc 2011-07-12 14:58:58 UTC (rev 
91966)
+++ trunk/phase3/maintenance/backupPrefetch.inc 2011-07-12 15:01:58 UTC (rev 
91967)
@@ -51,7 +51,12 @@
$this-infiles = explode(';',$infile);
$this-reader = new XMLReader();
$infile = array_shift($this-infiles);
-   $this-reader-open( $infile );
+   if (defined( 'LIBXML_PARSEHUGE' ) ) {
+   $this-reader-open( $infile, null, LIBXML_PARSEHUGE );
+   }
+   else {
+   $this-reader-open( $infile );
+   }
}
 
/**


___
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs


  1   2   3   >