jenkins-bot has submitted this change and it was merged.

Change subject: (bug 51621) Make SBL aware of ContentHandler.
......................................................................


(bug 51621) Make SBL aware of ContentHandler.

This changes SpamBlacklist to make use of the new, ContentHandler
aware hooks.

This change also includes some refactoring and cleanup which made
the migration to the new hooks easier.

Change-Id: I21e9cc8479f2b95fb53c502f6e279c8a1ea378a5
---
M SpamBlacklist.php
M SpamBlacklistHooks.php
M SpamBlacklist_body.php
3 files changed, 134 insertions(+), 71 deletions(-)

Approvals:
  Hoo man: Looks good to me, approved
  Aude: Looks good to me, but someone else must approve
  jenkins-bot: Verified



diff --git a/SpamBlacklist.php b/SpamBlacklist.php
index 5791801..5be78df 100644
--- a/SpamBlacklist.php
+++ b/SpamBlacklist.php
@@ -10,7 +10,7 @@
 $wgExtensionCredits['antispam'][] = array(
        'path'           => __FILE__,
        'name'           => 'SpamBlacklist',
-       'author'         => array( 'Tim Starling', 'John Du Hart' ),
+       'author'         => array( 'Tim Starling', 'John Du Hart', 'Daniel 
Kinzler' ),
        'url'            => 
'https://www.mediawiki.org/wiki/Extension:SpamBlacklist',
        'descriptionmsg' => 'spam-blacklist-desc',
 );
@@ -37,10 +37,19 @@
  */
 $wgSpamBlacklistSettings =& $wgBlacklistSettings['spam'];
 
-$wgHooks['EditFilterMerged'][] = 'SpamBlacklistHooks::filterMerged';
+if ( !defined( 'MW_SUPPORTS_CONTENTHANDLER' ) ) {
+       die( "This version of SpamBlacklist requires a version of MediaWiki 
that supports the ContentHandler facility (supported since MW 1.21)." );
+}
+
+// filter pages on save
+$wgHooks['EditFilterMergedContent'][] = 
'SpamBlacklistHooks::filterMergedContent';
 $wgHooks['APIEditBeforeSave'][] = 
'SpamBlacklistHooks::filterAPIEditBeforeSave';
+
+// editing filter rules
 $wgHooks['EditFilter'][] = 'SpamBlacklistHooks::validate';
-$wgHooks['ArticleSaveComplete'][] = 'SpamBlacklistHooks::articleSave';
+$wgHooks['PageContentSaveComplete'][] = 'SpamBlacklistHooks::pageSaveContent';
+
+// email filters
 $wgHooks['UserCanSendEmail'][] = 'SpamBlacklistHooks::userCanSendEmail';
 $wgHooks['AbortNewAccount'][] = 'SpamBlacklistHooks::abortNewAccount';
 
diff --git a/SpamBlacklistHooks.php b/SpamBlacklistHooks.php
index 530df16..54f833c 100644
--- a/SpamBlacklistHooks.php
+++ b/SpamBlacklistHooks.php
@@ -5,34 +5,53 @@
  */
 class SpamBlacklistHooks {
        /**
-        * Hook function for EditFilterMerged
+        * Hook function for EditFilterMergedContent
         *
-        * @param $editPage EditPage
-        * @param $text string
-        * @param $hookErr string
-        * @param $editSummary string
+        * @param IContextSource $context
+        * @param Content        $content
+        * @param Status         $status
+        * @param string         $summary
+        * @param User           $user
+        * @param bool           $minoredit
+        *
         * @return bool
         */
-       static function filterMerged( $editPage, $text, &$hookErr, $editSummary 
) {
-               global $wgTitle;
-               if( is_null( $wgTitle ) ) {
-                       # API mode
-                       # wfSpamBlacklistFilterAPIEditBeforeSave already 
checked the blacklist
+       static function filterMergedContent( IContextSource $context, Content 
$content, Status $status, $summary, User $user, $minoredit ) {
+               $title = $context->getTitle();
+
+               if ( isset( $title->spamBlackListFiltered ) && 
$title->spamBlackListFiltered ) {
+                       // already filtered
                        return true;
                }
 
-               $spamObj = BaseBlacklist::getInstance( 'spam' );
-               $title = $editPage->mArticle->getTitle();
-               $ret = $spamObj->filter( $title, $text, '', $editSummary, 
$editPage );
-               if ( $ret !== false ) {
-                       $editPage->spamPageWithContent( $ret );
+               // get the link from the not-yet-saved page content.
+               $pout = $content->getParserOutput( $title );
+               $links = array_keys( $pout->getExternalLinks() );
+
+               // HACK: treat the edit summary as a link
+               if ( $summary !== '' ) {
+                       $links[] = $summary;
                }
-               // Return convention for hooks is the inverse of 
$wgFilterCallback
-               return ( $ret === false );
+
+               $spamObj = BaseBlacklist::getInstance( 'spam' );
+               $matches = $spamObj->filter( $links, $title );
+
+               if ( $matches !== false ) {
+                       $status->fatal( 'spamprotectiontext' );
+
+                       foreach ( $matches as $match ) {
+                               $status->fatal( 'spamprotectionmatch', $match );
+                       }
+               }
+
+               // Always return true, EditPage will look at $status->isOk().
+               return true;
        }
 
        /**
-        * Hook function for APIEditBeforeSave
+        * Hook function for APIEditBeforeSave.
+        * This allows blacklist matches to be reported directly in the result 
structure
+        * of the API call.
         *
         * @param $editPage EditPage
         * @param $text string
@@ -40,14 +59,37 @@
         * @return bool
         */
        static function filterAPIEditBeforeSave( $editPage, $text, &$resultArr 
) {
-               $spamObj = BaseBlacklist::getInstance( 'spam' );
                $title = $editPage->mArticle->getTitle();
-               $ret = $spamObj->filter( $title, $text, '', '', $editPage );
-               if ( $ret!==false ) {
-                       $resultArr['spamblacklist'] = implode( '|', $ret );
+
+               // get the links from the not-yet-saved page content.
+               $content = ContentHandler::makeContent(
+                       $text,
+                       $editPage->getTitle(),
+                       $editPage->contentModel,
+                       $editPage->contentFormat
+               );
+               $editInfo = $editPage->mArticle->prepareContentForEdit( 
$content, null, null, $editPage->contentFormat );
+               $pout = $editInfo->output;
+               $links = array_keys( $pout->getExternalLinks() );
+
+               // HACK: treat the edit summary as a link
+               $summary = $editPage->summary;
+               if ( $summary !== '' ) {
+                       $links[] = $summary;
                }
-               // Return convention for hooks is the inverse of 
$wgFilterCallback
-               return ( $ret === false );
+
+               $spamObj = BaseBlacklist::getInstance( 'spam' );
+               $matches = $spamObj->filter( $links, $title );
+
+               if ( $matches !== false ) {
+                       $resultArr['spamblacklist'] = implode( '|', $matches );
+               }
+
+               // mark the title, so filterMergedContent can skip it.
+               $title->spamBlackListFiltered = true;
+
+               // return convention for hooks is the inverse of 
$wgFilterCallback
+               return ( $matches === false );
        }
 
        /**
@@ -136,20 +178,37 @@
        }
 
        /**
-        * Hook function for ArticleSaveComplete
+        * Hook function for PageContentSaveComplete
         * Clear local spam blacklist caches on page save.
         *
-        * @param $article Article
-        * @param $user User
-        * @param $text string
-        * @param $summary string
-        * @param $isminor
-        * @param $iswatch
-        * @param $section
+        * @param Page $wikiPage
+        * @param User     $user
+        * @param Content  $content
+        * @param string   $summary
+        * @param bool     $isMinor
+        * @param bool     $isWatch
+        * @param string   $section
+        * @param int      $flags
+        * @param int      $revision
+        * @param Status   $status
+        * @param int      $baseRevId
+        *
         * @return bool
         */
-       static function articleSave( &$article, &$user, $text, $summary, 
$isminor, $iswatch, $section ) {
-               if( !BaseBlacklist::isLocalSource( $article->getTitle() ) ) {
+       static function pageSaveContent(
+               Page $wikiPage,
+               User $user,
+               Content $content,
+               $summary,
+               $isMinor,
+               $isWatch,
+               $section,
+               $flags,
+               $revision,
+               Status $status,
+               $baseRevId
+       ) {
+               if( !BaseBlacklist::isLocalSource( $wikiPage->getTitle() ) ) {
                        return true;
                }
                global $wgMemc, $wgDBname;
diff --git a/SpamBlacklist_body.php b/SpamBlacklist_body.php
index 6b6b9cd..5ca7487 100644
--- a/SpamBlacklist_body.php
+++ b/SpamBlacklist_body.php
@@ -5,7 +5,6 @@
 }
 
 class SpamBlacklist extends BaseBlacklist {
-       var $ignoreEditSummary = false;
 
        /**
         * Returns the code for the blacklist implementation
@@ -17,49 +16,45 @@
        }
 
        /**
-        * @param Title $title
-        * @param string $text Text of section, or entire text if 
$editPage!=false
-        * @param string $section Section number or name
-        * @param string $editsummary Edit summary if one exists, some people 
use urls there too
-        * @param EditPage $editPage EditPage if EditFilterMerged was called, 
null otherwise
+        * Apply some basic anti-spoofing to the links before they get filtered,
+        * see @bug 12896
+        *
+        * @param string $text
+        *
+        * @return string
+        */
+       protected function antiSpoof( $text ) {
+               $text = str_replace( '.', '.', $text );
+               return $text;
+       }
+
+       /**
+        * @param string[] $links An array of links to check against the 
blacklist
+        * @param Title  $title The title of the page to which the filter shall 
be applied.
+        *               This is used to load the old links already on the 
page, so
+        *               the filter is only applied to links that got added. If 
not given,
+        *               the filter is applied to all $links.
+        *
         * @return Array Matched text(s) if the edit should not be allowed, 
false otherwise
         */
-       function filter( &$title, $text, $section, $editsummary = '', EditPage 
&$editPage = null ) {
-               /**
-                * @var $wgParser Parser
-                */
-               global $wgParser, $wgUser;
-
+       function filter( array $links, Title $title = null ) {
                $fname = 'wfSpamBlacklistFilter';
                wfProfileIn( $fname );
-
-               # These don't do anything, commenting out...
-               #$this->title = $title;
-               #$this->text = $text;
-               #$this->section = $section;
-               $text = str_replace( '.', '.', $text ); //@bug 12896
 
                $blacklists = $this->getBlacklists();
                $whitelists = $this->getWhitelists();
 
                if ( count( $blacklists ) ) {
-                       # Run parser to strip SGML comments and such out of the 
markup
-                       # This was being used to circumvent the filter (see bug 
5185)
-                       if ( $editPage ) {
-                               $editInfo = 
$editPage->mArticle->prepareTextForEdit( $text );
-                               $out = $editInfo->output;
-                       } else {
-                               $options = new ParserOptions();
-                               $text = $wgParser->preSaveTransform( $text, 
$title, $wgUser, $options );
-                               $out = $wgParser->parse( $text, $title, 
$options );
-                       }
-                       $newLinks = array_keys( $out->getExternalLinks() );
-                       $oldLinks = $this->getCurrentLinks( $title );
-                       $addedLinks = array_diff( $newLinks, $oldLinks );
+                       // poor man's anti-spoof, see bug 12896
+                       $newLinks = array_map( array( $this, 'antiSpoof' ), 
$links );
 
-                       // We add the edit summary if one exists
-                       if ( !$this->ignoreEditSummary && !empty( $editsummary 
) ) {
-                               $addedLinks[] = $editsummary;
+                       $oldLinks = array();
+                       if ( $title !== null ) {
+                               $oldLinks = $this->getCurrentLinks( $title );
+                               $addedLinks = array_diff( $newLinks, $oldLinks 
);
+                       } else {
+                               // can't load old links, so treat all links as 
added.
+                               $addedLinks = $newLinks;
                        }
 
                        wfDebugLog( 'SpamBlacklist', "Old URLs: " . implode( ', 
', $oldLinks ) );

-- 
To view, visit https://gerrit.wikimedia.org/r/75867
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I21e9cc8479f2b95fb53c502f6e279c8a1ea378a5
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/extensions/SpamBlacklist
Gerrit-Branch: master
Gerrit-Owner: Daniel Kinzler <daniel.kinz...@wikimedia.de>
Gerrit-Reviewer: Aaron Schulz <asch...@wikimedia.org>
Gerrit-Reviewer: Aude <aude.w...@gmail.com>
Gerrit-Reviewer: CSteipp <cste...@wikimedia.org>
Gerrit-Reviewer: Demon <ch...@wikimedia.org>
Gerrit-Reviewer: Denny Vrandecic <denny.vrande...@wikimedia.de>
Gerrit-Reviewer: Hoo man <h...@online.de>
Gerrit-Reviewer: Jforrester <jforres...@wikimedia.org>
Gerrit-Reviewer: Johnduhart <j...@compwhizii.net>
Gerrit-Reviewer: Krinkle <krinklem...@gmail.com>
Gerrit-Reviewer: Lydia Pintscher <lydia.pintsc...@wikimedia.de>
Gerrit-Reviewer: Platonides <platoni...@gmail.com>
Gerrit-Reviewer: Reedy <re...@wikimedia.org>
Gerrit-Reviewer: Tim Starling <tstarl...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to