MarkAHershberger has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/164896

Change subject: Create CLI utility for ReplaceText jobs
......................................................................

Create CLI utility for ReplaceText jobs

This creates a new class (ReplaceTextSearch) to move the logic from
the special page so that it is available for CLI access.  Also creates
replace.php which can be used for search-replace tasks that need to be
automated.

Change-Id: I8ccec61f570f33043d8a8d00c52b40acd9d6894a
---
M ReplaceText.php
A ReplaceTextSearch.php
M SpecialReplaceText.php
A replace.php
4 files changed, 326 insertions(+), 58 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ReplaceText 
refs/changes/96/164896/1

diff --git a/ReplaceText.php b/ReplaceText.php
index 20df86a..8fdd798 100644
--- a/ReplaceText.php
+++ b/ReplaceText.php
@@ -47,6 +47,7 @@
 $wgSpecialPageGroups['ReplaceText'] = 'wiki';
 $wgAutoloadClasses['ReplaceText'] = $rtgIP . 'SpecialReplaceText.php';
 $wgAutoloadClasses['ReplaceTextJob'] = $rtgIP . 'ReplaceTextJob.php';
+$wgAutoloadClasses['ReplaceTextSearch'] = $rtgIP . 'ReplaceTextSearch.php';
 
 /**
  * This function should really go into a "ReplaceText_body.php" file.
diff --git a/ReplaceTextSearch.php b/ReplaceTextSearch.php
new file mode 100644
index 0000000..73a9f62
--- /dev/null
+++ b/ReplaceTextSearch.php
@@ -0,0 +1,59 @@
+<?php
+
+class ReplaceTextSearch {
+       public static function doSearchQuery( $search, $namespaces, $category, 
$prefix, $use_regex = false ) {
+               $dbr = wfGetDB( DB_SLAVE );
+               $tables = array( 'page', 'revision', 'text' );
+               $vars = array( 'page_id', 'page_namespace', 'page_title', 
'old_text' );
+               if ( $use_regex ) {
+                       $comparisonCond = self::regexCond( $dbr, 'old_text', 
$search );
+               } else {
+                       $any = $dbr->anyString();
+                       $comparisonCond = 'old_text ' . $dbr->buildLike( $any, 
$search, $any );
+               }
+               $conds = array(
+                       $comparisonCond,
+                       'page_namespace' => $namespaces,
+                       'rev_id = page_latest',
+                       'rev_text_id = old_id'
+               );
+
+               self::categoryCondition( $category, $tables, $conds );
+               self::prefixCondition( $prefix, $conds );
+               $sort = array( 'ORDER BY' => 'page_namespace, page_title' );
+
+               return $dbr->select( $tables, $vars, $conds, __METHOD__ , $sort 
);
+       }
+
+       static protected function categoryCondition( $category, &$tables, 
&$conds ) {
+               if ( strval( $category ) !== '' ) {
+                       $category = Title::newFromText( $category )->getDbKey();
+                       $tables[] = 'categorylinks';
+                       $conds[] = 'page_id = cl_from';
+                       $conds['cl_to'] = $category;
+               }
+       }
+
+       static protected function prefixCondition( $prefix, &$conds ) {
+               if ( strval( $prefix ) === '' ) {
+                       return;
+               }
+
+               $dbr = wfGetDB( DB_SLAVE );
+               $title = Title::newFromText( $prefix );
+               if ( !is_null( $title ) ) {
+                       $prefix = $title->getDbKey();
+               }
+               $any = $dbr->anyString();
+               $conds[] = 'page_title ' . $dbr->buildLike( $prefix, $any );
+       }
+
+       static private function regexCond( $dbr, $column, $regex ) {
+               if ( $dbr instanceof DatabasePostgres ) {
+                       $op = '~';
+               } else {
+                       $op = 'REGEXP';
+               }
+               return "$column $op " . $dbr->addQuotes( $regex );
+       }
+}
\ No newline at end of file
diff --git a/SpecialReplaceText.php b/SpecialReplaceText.php
index 9499712..8b70dcf 100644
--- a/SpecialReplaceText.php
+++ b/SpecialReplaceText.php
@@ -128,7 +128,7 @@
 
                        // if user is replacing text within pages...
                        if ( $this->edit_pages ) {
-                               $res = $this->doSearchQuery(
+                               $res = ReplaceTextSearch::doSearchQuery(
                                        $this->target,
                                        $this->selected_namespaces,
                                        $this->category,
@@ -209,7 +209,7 @@
                                if ( $this->replacement === '' ) {
                                        $warning_msg = 
$this->msg('replacetext_blankwarning')->text();
                                } elseif ( count( $titles_for_edit ) > 0 ) {
-                                       $res = $this->doSearchQuery( 
$this->replacement, $this->selected_namespaces, $this->category, $this->prefix, 
$this->use_regex );
+                                       $res = 
ReplaceTextSearch::doSearchQuery( $this->replacement, 
$this->selected_namespaces, $this->category, $this->prefix, $this->use_regex );
                                        $count = $res->numRows();
                                        if ( $count > 0 ) {
                                                $warning_msg = $this->msg( 
'replacetext_warning' )->numParams( $count )
@@ -594,61 +594,5 @@
                $sort = array( 'ORDER BY' => 'page_namespace, page_title' );
 
                return $dbr->select( $tables, $vars, $conds, __METHOD__ , $sort 
);
-       }
-
-       function doSearchQuery( $search, $namespaces, $category, $prefix, 
$use_regex = false ) {
-               $dbr = wfGetDB( DB_SLAVE );
-               $tables = array( 'page', 'revision', 'text' );
-               $vars = array( 'page_id', 'page_namespace', 'page_title', 
'old_text' );
-               if ( $use_regex ) {
-                       $comparisonCond = $this->regexCond( $dbr, 'old_text', 
$search );
-               } else {
-                       $any = $dbr->anyString();
-                       $comparisonCond = 'old_text ' . $dbr->buildLike( $any, 
$search, $any );
-               }
-               $conds = array(
-                       $comparisonCond,
-                       'page_namespace' => $namespaces,
-                       'rev_id = page_latest',
-                       'rev_text_id = old_id'
-               );
-
-               $this->categoryCondition( $category, $tables, $conds );
-               $this->prefixCondition( $prefix, $conds );
-               $sort = array( 'ORDER BY' => 'page_namespace, page_title' );
-
-               return $dbr->select( $tables, $vars, $conds, __METHOD__ , $sort 
);
-       }
-
-       protected function categoryCondition( $category, &$tables, &$conds ) {
-               if ( strval( $category ) !== '' ) {
-                       $category = Title::newFromText( $category )->getDbKey();
-                       $tables[] = 'categorylinks';
-                       $conds[] = 'page_id = cl_from';
-                       $conds['cl_to'] = $category;
-               }
-       }
-
-       protected function prefixCondition( $prefix, &$conds ) {
-               if ( strval( $prefix ) === '' ) {
-                       return;
-               }
-
-               $dbr = wfGetDB( DB_SLAVE );
-               $title = Title::newFromText( $prefix );
-               if ( !is_null( $title ) ) {
-                       $prefix = $title->getDbKey();
-               }
-               $any = $dbr->anyString();
-               $conds[] = 'page_title ' . $dbr->buildLike( $prefix, $any );
-       }
-
-       private function regexCond( $dbr, $column, $regex ) {
-               if ( $dbr instanceof DatabasePostgres ) {
-                       $op = '~';
-               } else {
-                       $op = 'REGEXP';
-               }
-               return "$column $op " . $dbr->addQuotes( $regex );
        }
 }
diff --git a/replace.php b/replace.php
new file mode 100755
index 0000000..c01311e
--- /dev/null
+++ b/replace.php
@@ -0,0 +1,264 @@
+#!/usr/bin/php
+<?php
+/**
+ * Insert jobs into the jobqueue to replace text bits.
+ * Or execute immediately... your choice.
+ *
+ * Copyright © 2014 Mark A. Hershberger <m...@nichework.com>
+ * https://www.mediawiki.org/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+$IP="/home/mah-public/wiki/core";
+require_once "$IP/maintenance/Maintenance.php";
+
+/**
+ * Maintenance script that generates a plaintext link dump.
+ *
+ * @ingroup Maintenance
+ */
+class ReplaceText extends Maintenance {
+       private $userReplacing;
+       private $userId;
+       private $target;
+       private $replacement;
+       private $summaryMsg;
+       private $namespaces;
+       private $category;
+       private $prefix;
+       private $useRegex;
+       private $titles;
+
+       public function __construct() {
+               parent::__construct();
+               $this->mDescription = "CLI utility to replace text wherever it 
is ".
+                       "found in the wiki.";
+
+               $this->addArg( "find", "Text to find.", false );
+               $this->addArg( "replace", "Text to replace.", false );
+
+               $this->addOption( "dry-run", "Only find the texts, don't 
replace.",
+                       false, false, 'n' );
+               $this->addOption( "regex", "This is a regex (false).",
+                       false, false, 'r' );
+               $this->addOption( "user", "The user to attribute this to (uid 
1).",
+                       false, true, 'u' );
+               $this->addOption( "yes", "Skip all prompts with an assumed 
'yes'.",
+                       false, false, 'y' );
+               $this->addOption( "summary", "Alternate edit summary. (%r is 
where to ".
+                       " place the replacement text, %f the text to look 
for.)",
+                       false, true, 's' );
+               $this->addOption( "ns", "Comma separated namespaces to search 
in. ".
+                       "(Main)" );
+
+               $this->addOption( "listns", "List out the namespaces on this 
wiki.",
+                       false, false );
+       }
+
+       protected function getUser() {
+               $userId = 1;
+               $userReplacing = $this->getOption( "user", null );
+
+               if ( $userReplacing !== null ) {
+                       if ( is_numeric( $userReplacing ) ) {
+                               $user = User::newFromId( $userReplacing );
+                       } else {
+                               $user = User::newFromName( $userReplacing );
+                       }
+
+                       if ( get_class( $user ) !== 'User' ) {
+                               $this->error(
+                                       "Couldn't translate '$userReplacing' to 
a user.", 1
+                               );
+                       }
+                       $userId = $user->getId();
+               }
+
+               return $userId;
+       }
+
+       protected function getTarget() {
+               $ret = $this->getArg( 0 );
+               if ( !$ret ) {
+                       $this->error( "You have to specify a target.", true );
+               }
+               return $ret;
+       }
+
+       protected function getReplacement() {
+               $ret = $this->getArg( 1 );
+               if ( !$ret ) {
+                       $this->error( "You have to specify replacement text.", 
true );
+               }
+               return $ret;
+       }
+
+       protected function getSummary() {
+               $msg = wfMessage(
+                       'replacetext_editsummary',
+                       $this->target, $this->replacement
+               )->inContentLanguage()->plain();
+               if ( $this->getOption( "summary" ) !== null ) {
+                       $msg = str_replace( array( '%f', '%r' ),
+                               array( $this->target, $this->replacement ),
+                               $this->getOption( "summary" ) );
+               }
+               return $msg;
+       }
+
+       protected function listNamespaces() {
+               echo "Index\tNamespace\n";
+               foreach( MWNamespace::getCanonicalNamespaces() as $int => $val 
) {
+                       if($val == "") {
+                               $val = "(main)";
+                       }
+                       echo " $int\t$val\n";
+               }
+       }
+
+       protected function getNamespaces() {
+               $namespaces = array( NS_MAIN );
+               $names = $this->getOption( "ns" );
+               $ns = MWNamespace::getCanonicalNamespaces();
+               $ns[0] = "main";
+               $nsflip = array_flip( $ns );
+               if( $names ) {
+                       $namespaces =
+                               array_filter(
+                                       array_map(
+                                               function( $namespace ) use ( 
$ns, $nsflip ) {
+                                                       if( is_numeric( 
$namespace )
+                                                               && isset( $ns[ 
$namespace ] ) ) {
+                                                               return intval( 
$namespace );
+                                                       }
+                                                       $namespace = 
strtolower( $namespace );
+                                                               
var_dump($nsflip[$namespace]);
+                                                       if( isset( $nsflip[ 
$namespace ] ) ) {
+                                                               return $nsflip[ 
$namespace ];
+                                                       }
+                                                       return null;
+                                               }, explode( ",", $names ) ),
+                                       function( $val ) { return $val !== 
null; }
+                               );
+               }
+               return $namespaces;
+       }
+
+       protected function getCategory() {
+               $cat = null;
+               return $cat;
+       }
+
+       protected function getPrefix() {
+               $prefix = null;
+               return $prefix;
+       }
+
+       protected function useRegex() {
+               return $this->getOption( "regex" );
+       }
+
+       protected function getTitles( $res ) {
+               if( count( $this->titles ) == 0 ) {
+                       $this->titles = array();
+                       while( $row = $res->fetchObject() ) {
+                               $this->titles[] = Title::makeTitleSafe(
+                                       $row->page_namespace,
+                                       $row->page_title
+                               );
+                       }
+               }
+               return $this->titles;
+       }
+
+       protected function listTitles( $res ) {
+               $ret = false;
+               foreach( $this->getTitles( $res ) as $title ) {
+                       $ret = true;
+                       echo "$title\n";
+               }
+               return $ret;
+       }
+
+       protected function replaceTitles( $res ) {
+               foreach( $this->getTitles( $res ) as $title ) {
+                       $param = array(
+                               'target_str'      => $this->target,
+                               'replacement_str' => $this->replacement,
+                               'use_regex'       => $this->useRegex,
+                               'user_id'         => $this->userId,
+                               'edit_summary'    => $this->summaryMsg,
+                       );
+                       echo "Replacing on $title... ";
+                       $job = new ReplaceTextJob( $title, $param, 0 );
+                       if ( $job->run() !== true ) {
+                               $this->error( "Trouble on the page '$title'." );
+                       }
+                       echo "done.\n";
+
+                       $c = Revision::newFromTitle( $title, false, 
Revision::READ_LATEST )
+                               ->getSerializedData();
+               }
+       }
+
+       protected function getReply( $q ) {
+               $reply = "";
+
+               while( $reply !== "y" && $reply !== "n" ) {
+                       $reply = $this->readconsole( "$q (Y/N) " );
+                       $reply = substr( strtolower( $reply ), 0, 1 );
+               }
+               return $reply === "y";
+       }
+
+       public function execute() {
+               global $wgShowExceptionDetails;
+               $wgShowExceptionDetails = true;
+
+               if ( $this->getOption( "listns" ) ) {
+                       $this->listNamespaces();
+                       exit(0);
+               }
+               $this->userId = $this->getUser();
+               $this->target = $this->getTarget();
+               $this->replacement = $this->getReplacement();
+               $this->summaryMsg = $this->getSummary();
+               $this->namespaces = $this->getNamespaces();
+               $this->category = $this->getCategory();
+               $this->prefix = $this->getPrefix();
+               $this->useRegex = $this->useRegex();
+
+               $res = ReplaceTextSearch::doSearchQuery( $this->target,
+                       $this->namespaces, $this->category, $this->prefix,
+                       $this->useRegex );
+
+               if ( !$this->getOption( "yes" ) && $this->listTitles( $res ) ) {
+                       if ( ! $this->getReply( "Replace instances on these 
pages?" ) ) {
+                               exit(0);
+                       }
+               }
+               if ( $res->numRows() > 0 ) {
+                       $this->replaceTitles( $res );
+               }
+       }
+}
+
+$maintClass = "ReplaceText";
+require_once RUN_MAINTENANCE_IF_MAIN;

-- 
To view, visit https://gerrit.wikimedia.org/r/164896
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I8ccec61f570f33043d8a8d00c52b40acd9d6894a
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/ReplaceText
Gerrit-Branch: master
Gerrit-Owner: MarkAHershberger <m...@nichework.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to