MarkAHershberger has uploaded a new change for review. https://gerrit.wikimedia.org/r/164896
Change subject: Create CLI utility for ReplaceText jobs ...................................................................... Create CLI utility for ReplaceText jobs This creates a new class (ReplaceTextSearch) to move the logic from the special page so that it is available for CLI access. Also creates replace.php which can be used for search-replace tasks that need to be automated. Change-Id: I8ccec61f570f33043d8a8d00c52b40acd9d6894a --- M ReplaceText.php A ReplaceTextSearch.php M SpecialReplaceText.php A replace.php 4 files changed, 326 insertions(+), 58 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ReplaceText refs/changes/96/164896/1 diff --git a/ReplaceText.php b/ReplaceText.php index 20df86a..8fdd798 100644 --- a/ReplaceText.php +++ b/ReplaceText.php @@ -47,6 +47,7 @@ $wgSpecialPageGroups['ReplaceText'] = 'wiki'; $wgAutoloadClasses['ReplaceText'] = $rtgIP . 'SpecialReplaceText.php'; $wgAutoloadClasses['ReplaceTextJob'] = $rtgIP . 'ReplaceTextJob.php'; +$wgAutoloadClasses['ReplaceTextSearch'] = $rtgIP . 'ReplaceTextSearch.php'; /** * This function should really go into a "ReplaceText_body.php" file. diff --git a/ReplaceTextSearch.php b/ReplaceTextSearch.php new file mode 100644 index 0000000..73a9f62 --- /dev/null +++ b/ReplaceTextSearch.php @@ -0,0 +1,59 @@ +<?php + +class ReplaceTextSearch { + public static function doSearchQuery( $search, $namespaces, $category, $prefix, $use_regex = false ) { + $dbr = wfGetDB( DB_SLAVE ); + $tables = array( 'page', 'revision', 'text' ); + $vars = array( 'page_id', 'page_namespace', 'page_title', 'old_text' ); + if ( $use_regex ) { + $comparisonCond = self::regexCond( $dbr, 'old_text', $search ); + } else { + $any = $dbr->anyString(); + $comparisonCond = 'old_text ' . $dbr->buildLike( $any, $search, $any ); + } + $conds = array( + $comparisonCond, + 'page_namespace' => $namespaces, + 'rev_id = page_latest', + 'rev_text_id = old_id' + ); + + self::categoryCondition( $category, $tables, $conds ); + self::prefixCondition( $prefix, $conds ); + $sort = array( 'ORDER BY' => 'page_namespace, page_title' ); + + return $dbr->select( $tables, $vars, $conds, __METHOD__ , $sort ); + } + + static protected function categoryCondition( $category, &$tables, &$conds ) { + if ( strval( $category ) !== '' ) { + $category = Title::newFromText( $category )->getDbKey(); + $tables[] = 'categorylinks'; + $conds[] = 'page_id = cl_from'; + $conds['cl_to'] = $category; + } + } + + static protected function prefixCondition( $prefix, &$conds ) { + if ( strval( $prefix ) === '' ) { + return; + } + + $dbr = wfGetDB( DB_SLAVE ); + $title = Title::newFromText( $prefix ); + if ( !is_null( $title ) ) { + $prefix = $title->getDbKey(); + } + $any = $dbr->anyString(); + $conds[] = 'page_title ' . $dbr->buildLike( $prefix, $any ); + } + + static private function regexCond( $dbr, $column, $regex ) { + if ( $dbr instanceof DatabasePostgres ) { + $op = '~'; + } else { + $op = 'REGEXP'; + } + return "$column $op " . $dbr->addQuotes( $regex ); + } +} \ No newline at end of file diff --git a/SpecialReplaceText.php b/SpecialReplaceText.php index 9499712..8b70dcf 100644 --- a/SpecialReplaceText.php +++ b/SpecialReplaceText.php @@ -128,7 +128,7 @@ // if user is replacing text within pages... if ( $this->edit_pages ) { - $res = $this->doSearchQuery( + $res = ReplaceTextSearch::doSearchQuery( $this->target, $this->selected_namespaces, $this->category, @@ -209,7 +209,7 @@ if ( $this->replacement === '' ) { $warning_msg = $this->msg('replacetext_blankwarning')->text(); } elseif ( count( $titles_for_edit ) > 0 ) { - $res = $this->doSearchQuery( $this->replacement, $this->selected_namespaces, $this->category, $this->prefix, $this->use_regex ); + $res = ReplaceTextSearch::doSearchQuery( $this->replacement, $this->selected_namespaces, $this->category, $this->prefix, $this->use_regex ); $count = $res->numRows(); if ( $count > 0 ) { $warning_msg = $this->msg( 'replacetext_warning' )->numParams( $count ) @@ -594,61 +594,5 @@ $sort = array( 'ORDER BY' => 'page_namespace, page_title' ); return $dbr->select( $tables, $vars, $conds, __METHOD__ , $sort ); - } - - function doSearchQuery( $search, $namespaces, $category, $prefix, $use_regex = false ) { - $dbr = wfGetDB( DB_SLAVE ); - $tables = array( 'page', 'revision', 'text' ); - $vars = array( 'page_id', 'page_namespace', 'page_title', 'old_text' ); - if ( $use_regex ) { - $comparisonCond = $this->regexCond( $dbr, 'old_text', $search ); - } else { - $any = $dbr->anyString(); - $comparisonCond = 'old_text ' . $dbr->buildLike( $any, $search, $any ); - } - $conds = array( - $comparisonCond, - 'page_namespace' => $namespaces, - 'rev_id = page_latest', - 'rev_text_id = old_id' - ); - - $this->categoryCondition( $category, $tables, $conds ); - $this->prefixCondition( $prefix, $conds ); - $sort = array( 'ORDER BY' => 'page_namespace, page_title' ); - - return $dbr->select( $tables, $vars, $conds, __METHOD__ , $sort ); - } - - protected function categoryCondition( $category, &$tables, &$conds ) { - if ( strval( $category ) !== '' ) { - $category = Title::newFromText( $category )->getDbKey(); - $tables[] = 'categorylinks'; - $conds[] = 'page_id = cl_from'; - $conds['cl_to'] = $category; - } - } - - protected function prefixCondition( $prefix, &$conds ) { - if ( strval( $prefix ) === '' ) { - return; - } - - $dbr = wfGetDB( DB_SLAVE ); - $title = Title::newFromText( $prefix ); - if ( !is_null( $title ) ) { - $prefix = $title->getDbKey(); - } - $any = $dbr->anyString(); - $conds[] = 'page_title ' . $dbr->buildLike( $prefix, $any ); - } - - private function regexCond( $dbr, $column, $regex ) { - if ( $dbr instanceof DatabasePostgres ) { - $op = '~'; - } else { - $op = 'REGEXP'; - } - return "$column $op " . $dbr->addQuotes( $regex ); } } diff --git a/replace.php b/replace.php new file mode 100755 index 0000000..c01311e --- /dev/null +++ b/replace.php @@ -0,0 +1,264 @@ +#!/usr/bin/php +<?php +/** + * Insert jobs into the jobqueue to replace text bits. + * Or execute immediately... your choice. + * + * Copyright © 2014 Mark A. Hershberger <m...@nichework.com> + * https://www.mediawiki.org/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Maintenance + */ + +$IP="/home/mah-public/wiki/core"; +require_once "$IP/maintenance/Maintenance.php"; + +/** + * Maintenance script that generates a plaintext link dump. + * + * @ingroup Maintenance + */ +class ReplaceText extends Maintenance { + private $userReplacing; + private $userId; + private $target; + private $replacement; + private $summaryMsg; + private $namespaces; + private $category; + private $prefix; + private $useRegex; + private $titles; + + public function __construct() { + parent::__construct(); + $this->mDescription = "CLI utility to replace text wherever it is ". + "found in the wiki."; + + $this->addArg( "find", "Text to find.", false ); + $this->addArg( "replace", "Text to replace.", false ); + + $this->addOption( "dry-run", "Only find the texts, don't replace.", + false, false, 'n' ); + $this->addOption( "regex", "This is a regex (false).", + false, false, 'r' ); + $this->addOption( "user", "The user to attribute this to (uid 1).", + false, true, 'u' ); + $this->addOption( "yes", "Skip all prompts with an assumed 'yes'.", + false, false, 'y' ); + $this->addOption( "summary", "Alternate edit summary. (%r is where to ". + " place the replacement text, %f the text to look for.)", + false, true, 's' ); + $this->addOption( "ns", "Comma separated namespaces to search in. ". + "(Main)" ); + + $this->addOption( "listns", "List out the namespaces on this wiki.", + false, false ); + } + + protected function getUser() { + $userId = 1; + $userReplacing = $this->getOption( "user", null ); + + if ( $userReplacing !== null ) { + if ( is_numeric( $userReplacing ) ) { + $user = User::newFromId( $userReplacing ); + } else { + $user = User::newFromName( $userReplacing ); + } + + if ( get_class( $user ) !== 'User' ) { + $this->error( + "Couldn't translate '$userReplacing' to a user.", 1 + ); + } + $userId = $user->getId(); + } + + return $userId; + } + + protected function getTarget() { + $ret = $this->getArg( 0 ); + if ( !$ret ) { + $this->error( "You have to specify a target.", true ); + } + return $ret; + } + + protected function getReplacement() { + $ret = $this->getArg( 1 ); + if ( !$ret ) { + $this->error( "You have to specify replacement text.", true ); + } + return $ret; + } + + protected function getSummary() { + $msg = wfMessage( + 'replacetext_editsummary', + $this->target, $this->replacement + )->inContentLanguage()->plain(); + if ( $this->getOption( "summary" ) !== null ) { + $msg = str_replace( array( '%f', '%r' ), + array( $this->target, $this->replacement ), + $this->getOption( "summary" ) ); + } + return $msg; + } + + protected function listNamespaces() { + echo "Index\tNamespace\n"; + foreach( MWNamespace::getCanonicalNamespaces() as $int => $val ) { + if($val == "") { + $val = "(main)"; + } + echo " $int\t$val\n"; + } + } + + protected function getNamespaces() { + $namespaces = array( NS_MAIN ); + $names = $this->getOption( "ns" ); + $ns = MWNamespace::getCanonicalNamespaces(); + $ns[0] = "main"; + $nsflip = array_flip( $ns ); + if( $names ) { + $namespaces = + array_filter( + array_map( + function( $namespace ) use ( $ns, $nsflip ) { + if( is_numeric( $namespace ) + && isset( $ns[ $namespace ] ) ) { + return intval( $namespace ); + } + $namespace = strtolower( $namespace ); + var_dump($nsflip[$namespace]); + if( isset( $nsflip[ $namespace ] ) ) { + return $nsflip[ $namespace ]; + } + return null; + }, explode( ",", $names ) ), + function( $val ) { return $val !== null; } + ); + } + return $namespaces; + } + + protected function getCategory() { + $cat = null; + return $cat; + } + + protected function getPrefix() { + $prefix = null; + return $prefix; + } + + protected function useRegex() { + return $this->getOption( "regex" ); + } + + protected function getTitles( $res ) { + if( count( $this->titles ) == 0 ) { + $this->titles = array(); + while( $row = $res->fetchObject() ) { + $this->titles[] = Title::makeTitleSafe( + $row->page_namespace, + $row->page_title + ); + } + } + return $this->titles; + } + + protected function listTitles( $res ) { + $ret = false; + foreach( $this->getTitles( $res ) as $title ) { + $ret = true; + echo "$title\n"; + } + return $ret; + } + + protected function replaceTitles( $res ) { + foreach( $this->getTitles( $res ) as $title ) { + $param = array( + 'target_str' => $this->target, + 'replacement_str' => $this->replacement, + 'use_regex' => $this->useRegex, + 'user_id' => $this->userId, + 'edit_summary' => $this->summaryMsg, + ); + echo "Replacing on $title... "; + $job = new ReplaceTextJob( $title, $param, 0 ); + if ( $job->run() !== true ) { + $this->error( "Trouble on the page '$title'." ); + } + echo "done.\n"; + + $c = Revision::newFromTitle( $title, false, Revision::READ_LATEST ) + ->getSerializedData(); + } + } + + protected function getReply( $q ) { + $reply = ""; + + while( $reply !== "y" && $reply !== "n" ) { + $reply = $this->readconsole( "$q (Y/N) " ); + $reply = substr( strtolower( $reply ), 0, 1 ); + } + return $reply === "y"; + } + + public function execute() { + global $wgShowExceptionDetails; + $wgShowExceptionDetails = true; + + if ( $this->getOption( "listns" ) ) { + $this->listNamespaces(); + exit(0); + } + $this->userId = $this->getUser(); + $this->target = $this->getTarget(); + $this->replacement = $this->getReplacement(); + $this->summaryMsg = $this->getSummary(); + $this->namespaces = $this->getNamespaces(); + $this->category = $this->getCategory(); + $this->prefix = $this->getPrefix(); + $this->useRegex = $this->useRegex(); + + $res = ReplaceTextSearch::doSearchQuery( $this->target, + $this->namespaces, $this->category, $this->prefix, + $this->useRegex ); + + if ( !$this->getOption( "yes" ) && $this->listTitles( $res ) ) { + if ( ! $this->getReply( "Replace instances on these pages?" ) ) { + exit(0); + } + } + if ( $res->numRows() > 0 ) { + $this->replaceTitles( $res ); + } + } +} + +$maintClass = "ReplaceText"; +require_once RUN_MAINTENANCE_IF_MAIN; -- To view, visit https://gerrit.wikimedia.org/r/164896 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I8ccec61f570f33043d8a8d00c52b40acd9d6894a Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/ReplaceText Gerrit-Branch: master Gerrit-Owner: MarkAHershberger <m...@nichework.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits