Tim Starling has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/296370

Change subject: [WIP] Use the pure-PHP Balancer as the default Tidy 
implementation
......................................................................

[WIP] Use the pure-PHP Balancer as the default Tidy implementation

* Remove the concept of tidy being disabled
* Remove the non-tidy hacks from Sanitizer and Parser
* Use Balancer in parser tests
* Update parserTests.txt as appropriate

Change-Id: Ifea448c3de708a94768a1861a993c2c3c6292759
---
M includes/Sanitizer.php
M includes/parser/MWTidy.php
M includes/parser/Parser.php
M tests/TestsAutoLoader.php
M tests/parser/parserTest.inc
M tests/parser/parserTests.txt
M tests/phpunit/MediaWikiTestCase.php
M tests/phpunit/includes/SanitizerTest.php
M tests/phpunit/includes/parser/NewParserTest.php
M tests/phpunit/includes/parser/TidyTest.php
M tests/testHelpers.inc
11 files changed, 132 insertions(+), 491 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/70/296370/1

diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php
index 8f1fc99..f104502 100644
--- a/includes/Sanitizer.php
+++ b/includes/Sanitizer.php
@@ -468,188 +468,47 @@
                $text = Sanitizer::removeHTMLcomments( $text );
                $bits = explode( '<', $text );
                $text = str_replace( '>', '&gt;', array_shift( $bits ) );
-               if ( !MWTidy::isEnabled() ) {
-                       $tagstack = $tablestack = [];
-                       foreach ( $bits as $x ) {
-                               $regs = [];
-                               # $slash: Does the current element start with a 
'/'?
-                               # $t: Current element name
-                               # $params: String between element name and >
-                               # $brace: Ending '>' or '/>'
-                               # $rest: Everything until the next element of 
$bits
-                               if ( preg_match( self::ELEMENT_BITS_REGEX, $x, 
$regs ) ) {
-                                       list( /* $qbar */, $slash, $t, $params, 
$brace, $rest ) = $regs;
-                               } else {
-                                       $slash = $t = $params = $brace = $rest 
= null;
-                               }
+               foreach ( $bits as $x ) {
+                       if ( preg_match( self::ELEMENT_BITS_REGEX, $x, $regs ) 
) {
+                               list( /* $qbar */, $slash, $t, $params, $brace, 
$rest ) = $regs;
 
                                $badtag = false;
                                $t = strtolower( $t );
                                if ( isset( $htmlelements[$t] ) ) {
-                                       # Check our stack
-                                       if ( $slash && isset( 
$htmlsingleonly[$t] ) ) {
-                                               $badtag = true;
-                                       } elseif ( $slash ) {
-                                               # Closing a tag... is it the 
one we just opened?
-                                               MediaWiki\suppressWarnings();
-                                               $ot = array_pop( $tagstack );
-                                               MediaWiki\restoreWarnings();
-
-                                               if ( $ot != $t ) {
-                                                       if ( isset( 
$htmlsingleallowed[$ot] ) ) {
-                                                               # Pop all 
elements with an optional close tag
-                                                               # and see if we 
find a match below them
-                                                               $optstack = [];
-                                                               array_push( 
$optstack, $ot );
-                                                               
MediaWiki\suppressWarnings();
-                                                               $ot = 
array_pop( $tagstack );
-                                                               
MediaWiki\restoreWarnings();
-                                                               while ( $ot != 
$t && isset( $htmlsingleallowed[$ot] ) ) {
-                                                                       
array_push( $optstack, $ot );
-                                                                       
MediaWiki\suppressWarnings();
-                                                                       $ot = 
array_pop( $tagstack );
-                                                                       
MediaWiki\restoreWarnings();
-                                                               }
-                                                               if ( $t != $ot 
) {
-                                                                       # No 
match. Push the optional elements back again
-                                                                       $badtag 
= true;
-                                                                       
MediaWiki\suppressWarnings();
-                                                                       $ot = 
array_pop( $optstack );
-                                                                       
MediaWiki\restoreWarnings();
-                                                                       while ( 
$ot ) {
-                                                                               
array_push( $tagstack, $ot );
-                                                                               
MediaWiki\suppressWarnings();
-                                                                               
$ot = array_pop( $optstack );
-                                                                               
MediaWiki\restoreWarnings();
-                                                                       }
-                                                               }
-                                                       } else {
-                                                               
MediaWiki\suppressWarnings();
-                                                               array_push( 
$tagstack, $ot );
-                                                               
MediaWiki\restoreWarnings();
-
-                                                               # <li> can be 
nested in <ul> or <ol>, skip those cases:
-                                                               if ( !isset( 
$htmllist[$ot] ) || !isset( $listtags[$t] ) ) {
-                                                                       $badtag 
= true;
-                                                               }
-                                                       }
-                                               } else {
-                                                       if ( $t == 'table' ) {
-                                                               $tagstack = 
array_pop( $tablestack );
-                                                       }
-                                               }
-                                               $newparams = '';
-                                       } else {
-                                               # Keep track for later
-                                               if ( isset( $tabletags[$t] ) && 
!in_array( 'table', $tagstack ) ) {
-                                                       $badtag = true;
-                                               } elseif ( in_array( $t, 
$tagstack ) && !isset( $htmlnest[$t] ) ) {
-                                                       $badtag = true;
-                                               #  Is it a self closed htmlpair 
? (bug 5487)
-                                               } elseif ( $brace == '/>' && 
isset( $htmlpairs[$t] ) ) {
-                                                       // Eventually we'll 
just remove the self-closing
-                                                       // slash, in order to 
be consistent with HTML5
-                                                       // semantics.
-                                                       // $brace = '>';
-                                                       // For now, let's just 
warn authors to clean up.
-                                                       if ( is_callable( 
$warnCallback ) ) {
-                                                               
call_user_func_array( $warnCallback, [ 'deprecated-self-close-category' ] );
-                                                       }
-                                                       $badtag = true;
-                                               } elseif ( isset( 
$htmlsingleonly[$t] ) ) {
-                                                       # Hack to force empty 
tag for unclosable elements
-                                                       $brace = '/>';
-                                               } elseif ( isset( 
$htmlsingle[$t] ) ) {
-                                                       # Hack to not close 
$htmlsingle tags
-                                                       $brace = null;
-                                                       # Still need to push 
this optionally-closed tag to
-                                                       # the tag stack so that 
we can match end tags
-                                                       # instead of marking 
them as bad.
-                                                       array_push( $tagstack, 
$t );
-                                               } elseif ( isset( 
$tabletags[$t] ) && in_array( $t, $tagstack ) ) {
-                                                       // New table tag but 
forgot to close the previous one
-                                                       $text .= "</$t>";
-                                               } else {
-                                                       if ( $t == 'table' ) {
-                                                               array_push( 
$tablestack, $tagstack );
-                                                               $tagstack = [];
-                                                       }
-                                                       array_push( $tagstack, 
$t );
-                                               }
-
-                                               # Replace any variables or 
template parameters with
-                                               # plaintext results.
-                                               if ( is_callable( 
$processCallback ) ) {
-                                                       call_user_func_array( 
$processCallback, [ &$params, $args ] );
-                                               }
-
-                                               if ( !Sanitizer::validateTag( 
$params, $t ) ) {
-                                                       $badtag = true;
-                                               }
-
-                                               # Strip non-approved attributes 
from the tag
-                                               $newparams = 
Sanitizer::fixTagAttributes( $params, $t );
+                                       if ( is_callable( $processCallback ) ) {
+                                               call_user_func_array( 
$processCallback, [ &$params, $args ] );
                                        }
+
+                                       if ( $brace == '/>' && !( isset( 
$htmlsingle[$t] ) || isset( $htmlsingleonly[$t] ) ) ) {
+                                               // Eventually we'll just remove 
the self-closing
+                                               // slash, in order to be 
consistent with HTML5
+                                               // semantics.
+                                               // $brace = '>';
+                                               // For now, let's just warn 
authors to clean up.
+                                               if ( is_callable( $warnCallback 
) ) {
+                                                       call_user_func_array( 
$warnCallback, [ 'deprecated-self-close-category' ] );
+                                               }
+                                       }
+                                       if ( !Sanitizer::validateTag( $params, 
$t ) ) {
+                                               $badtag = true;
+                                       }
+
+                                       $newparams = 
Sanitizer::fixTagAttributes( $params, $t );
                                        if ( !$badtag ) {
+                                               if ( $brace === '/>' && !isset( 
$htmlsingleonly[$t] ) ) {
+                                                       # Interpret 
self-closing tags as empty tags even when
+                                                       # HTML 5 would 
interpret them as start tags. Such input
+                                                       # is commonly seen on 
Wikimedia wikis with this intention.
+                                                       $brace = "></$t>";
+                                               }
+
                                                $rest = str_replace( '>', 
'&gt;', $rest );
-                                               $close = ( $brace == '/>' && 
!$slash ) ? ' /' : '';
-                                               $text .= 
"<$slash$t$newparams$close>$rest";
+                                               $text .= 
"<$slash$t$newparams$brace$rest";
                                                continue;
                                        }
                                }
-                               $text .= '&lt;' . str_replace( '>', '&gt;', $x 
);
                        }
-                       # Close off any remaining tags
-                       while ( is_array( $tagstack ) && ( $t = array_pop( 
$tagstack ) ) ) {
-                               $text .= "</$t>\n";
-                               if ( $t == 'table' ) {
-                                       $tagstack = array_pop( $tablestack );
-                               }
-                       }
-               } else {
-                       # this might be possible using tidy itself
-                       foreach ( $bits as $x ) {
-                               if ( preg_match( self::ELEMENT_BITS_REGEX, $x, 
$regs ) ) {
-                                       list( /* $qbar */, $slash, $t, $params, 
$brace, $rest ) = $regs;
-
-                                       $badtag = false;
-                                       $t = strtolower( $t );
-                                       if ( isset( $htmlelements[$t] ) ) {
-                                               if ( is_callable( 
$processCallback ) ) {
-                                                       call_user_func_array( 
$processCallback, [ &$params, $args ] );
-                                               }
-
-                                               if ( $brace == '/>' && !( 
isset( $htmlsingle[$t] ) || isset( $htmlsingleonly[$t] ) ) ) {
-                                                       // Eventually we'll 
just remove the self-closing
-                                                       // slash, in order to 
be consistent with HTML5
-                                                       // semantics.
-                                                       // $brace = '>';
-                                                       // For now, let's just 
warn authors to clean up.
-                                                       if ( is_callable( 
$warnCallback ) ) {
-                                                               
call_user_func_array( $warnCallback, [ 'deprecated-self-close-category' ] );
-                                                       }
-                                               }
-                                               if ( !Sanitizer::validateTag( 
$params, $t ) ) {
-                                                       $badtag = true;
-                                               }
-
-                                               $newparams = 
Sanitizer::fixTagAttributes( $params, $t );
-                                               if ( !$badtag ) {
-                                                       if ( $brace === '/>' && 
!isset( $htmlsingleonly[$t] ) ) {
-                                                               # Interpret 
self-closing tags as empty tags even when
-                                                               # HTML 5 would 
interpret them as start tags. Such input
-                                                               # is commonly 
seen on Wikimedia wikis with this intention.
-                                                               $brace = 
"></$t>";
-                                                       }
-
-                                                       $rest = str_replace( 
'>', '&gt;', $rest );
-                                                       $text .= 
"<$slash$t$newparams$brace$rest";
-                                                       continue;
-                                               }
-                                       }
-                               }
-                               $text .= '&lt;' . str_replace( '>', '&gt;', $x 
);
-                       }
+                       $text .= '&lt;' . str_replace( '>', '&gt;', $x );
                }
                return $text;
        }
diff --git a/includes/parser/MWTidy.php b/includes/parser/MWTidy.php
index f281c25..a56f2c2 100644
--- a/includes/parser/MWTidy.php
+++ b/includes/parser/MWTidy.php
@@ -22,11 +22,7 @@
  */
 
 /**
- * Class to interact with HTML tidy
- *
- * Either the external tidy program or the in-process tidy extension
- * will be used depending on availability. Override the default
- * $wgTidyInternal setting to disable the internal if it's not working.
+ * Class to interact with various HTML tidy implementations
  *
  * @ingroup Parser
  */
@@ -43,12 +39,7 @@
         * @return string Corrected HTML output
         */
        public static function tidy( $text ) {
-               $driver = self::singleton();
-               if ( !$driver ) {
-                       throw new MWException( __METHOD__ .
-                               ': tidy is disabled, caller should have checked 
MWTidy::isEnabled()' );
-               }
-               return $driver->tidy( $text );
+               return self::singleton()->tidy( $text );
        }
 
        /**
@@ -78,10 +69,6 @@
         */
        public static function checkErrors( $text, &$errorStr = null ) {
                $driver = self::singleton();
-               if ( !$driver ) {
-                       throw new MWException( __METHOD__ .
-                               ': tidy is disabled, caller should have checked 
MWTidy::isEnabled()' );
-               }
                if ( $driver->supportsValidate() ) {
                        return $driver->validate( $text, $errorStr );
                } else {
@@ -89,8 +76,11 @@
                }
        }
 
+       /**
+        * @deprecated since 1.28
+        */
        public static function isEnabled() {
-               return self::singleton() !== false;
+               return true;
        }
 
        protected static function singleton() {
@@ -117,34 +107,45 @@
                                        $config['driver'] = 'RaggettExternal';
                                }
                        } else {
-                               return false;
+                               $config = [ 'driver' => 'Html5Internal' ];
                        }
-                       switch ( $config['driver'] ) {
-                               case 'RaggettInternalHHVM':
-                                       self::$instance = new 
MediaWiki\Tidy\RaggettInternalHHVM( $config );
-                                       break;
-                               case 'RaggettInternalPHP':
-                                       self::$instance = new 
MediaWiki\Tidy\RaggettInternalPHP( $config );
-                                       break;
-                               case 'RaggettExternal':
-                                       self::$instance = new 
MediaWiki\Tidy\RaggettExternal( $config );
-                                       break;
-                               case 'Html5Depurate':
-                                       self::$instance = new 
MediaWiki\Tidy\Html5Depurate( $config );
-                                       break;
-                               case 'Html5Internal':
-                                       self::$instance = new 
MediaWiki\Tidy\Html5Internal( $config );
-                                       break;
-                               default:
-                                       throw new MWException( "Invalid tidy 
driver: \"{$config['driver']}\"" );
-                       }
+                       self::$instance = self::factory( $config );
                }
                return self::$instance;
        }
 
        /**
+        * Create a new Tidy driver object from configuration.
+        * @see $wgTidyConfig
+        * @param array $config
+        * @return TidyDriverBase
+        */
+       public static function factory( array $config ) {
+               switch ( $config['driver'] ) {
+                       case 'RaggettInternalHHVM':
+                               $instance = new 
MediaWiki\Tidy\RaggettInternalHHVM( $config );
+                               break;
+                       case 'RaggettInternalPHP':
+                               $instance = new 
MediaWiki\Tidy\RaggettInternalPHP( $config );
+                               break;
+                       case 'RaggettExternal':
+                               $instance = new MediaWiki\Tidy\RaggettExternal( 
$config );
+                               break;
+                       case 'Html5Depurate':
+                               $instance = new MediaWiki\Tidy\Html5Depurate( 
$config );
+                               break;
+                       case 'Html5Internal':
+                               $instance = new MediaWiki\Tidy\Html5Internal( 
$config );
+                               break;
+                       default:
+                               throw new MWException( "Invalid tidy driver: 
\"{$config['driver']}\"" );
+               }
+               return $instance;
+       }
+
+       /**
         * Set the driver to be used. This is for testing.
-        * @param TidyDriverBase|false|null $instance
+        * @param TidyDriverBase|null $instance
         */
        public static function setInstance( $instance ) {
                self::$instance = $instance;
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index 55b5fc3..5c954f8 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -1355,35 +1355,9 @@
 
                $text = Sanitizer::normalizeCharReferences( $text );
 
-               if ( MWTidy::isEnabled() && $this->mOptions->getTidy() ) {
+               if ( $this->mOptions->getTidy() ) {
                        $text = MWTidy::tidy( $text );
                        $this->mOutput->addModuleStyles( 
MWTidy::getModuleStyles() );
-               } else {
-                       # attempt to sanitize at least some nesting problems
-                       # (bug #2702 and quite a few others)
-                       $tidyregs = [
-                               # ''Something [http://www.cool.com cool''] -->
-                               # <i>Something</i><a 
href="http://www.cool.com";..><i>cool></i></a>
-                               
'/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
-                               '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
-                               # fix up an anchor inside another anchor, only
-                               # at least for a single single nested link (bug 
3695)
-                               
'/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
-                               '\\1\\2</a>\\3</a>\\1\\4</a>',
-                               # fix div inside inline elements- doBlockLevels 
won't wrap a line which
-                               # contains a div, so fix it up here; replace
-                               # div with escaped text
-                               '/(<([aib]) 
[^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
-                               '\\1\\3&lt;div\\5&gt;\\6&lt;/div&gt;\\8\\9',
-                               # remove empty italic or bold tag pairs, some
-                               # introduced by rules above
-                               '/<([bi])><\/\\1>/' => '',
-                       ];
-
-                       $text = preg_replace(
-                               array_keys( $tidyregs ),
-                               array_values( $tidyregs ),
-                               $text );
                }
 
                if ( $isMain ) {
diff --git a/tests/TestsAutoLoader.php b/tests/TestsAutoLoader.php
index 8b100a2..e430c6e 100644
--- a/tests/TestsAutoLoader.php
+++ b/tests/TestsAutoLoader.php
@@ -36,7 +36,6 @@
        'TestRecorder' => "$testDir/testHelpers.inc",
        'ITestRecorder' => "$testDir/testHelpers.inc",
        'DjVuSupport' => "$testDir/testHelpers.inc",
-       'TidySupport' => "$testDir/testHelpers.inc",
 
        # tests/phpunit
        'MediaWikiTestCase' => "$testDir/phpunit/MediaWikiTestCase.php",
diff --git a/tests/parser/parserTest.inc b/tests/parser/parserTest.inc
index e519f59..f5b668c 100644
--- a/tests/parser/parserTest.inc
+++ b/tests/parser/parserTest.inc
@@ -70,11 +70,6 @@
         */
        private $djVuSupport;
 
-       /**
-        * @var TidySupport
-        */
-       private $tidySupport;
-
        private $maxFuzzTestLength = 300;
        private $fuzzSeed = 0;
        private $memoryLimit = 50;
@@ -149,10 +144,6 @@
                $this->runParsoid = isset( $options['run-parsoid'] );
 
                $this->djVuSupport = new DjVuSupport();
-               $this->tidySupport = new TidySupport();
-               if ( !$this->tidySupport->isEnabled() ) {
-                       echo "Warning: tidy is not installed, skipping some 
tests\n";
-               }
 
                if ( !extension_loaded( 'gd' ) ) {
                        echo "Warning: GD extension is not present, 
thumbnailing tests will probably fail\n";
@@ -625,11 +616,7 @@
                }
 
                if ( isset( $opts['tidy'] ) ) {
-                       if ( !$this->tidySupport->isEnabled() ) {
-                               return $this->showSkipped();
-                       } else {
-                               $options->setTidy( true );
-                       }
+                       $options->setTidy( true );
                }
 
                if ( isset( $opts['title'] ) ) {
@@ -919,12 +906,9 @@
                        'wgDisableLangConversion' => false,
                        'wgDisableTitleConversion' => false,
                        // Tidy options.
-                       'wgUseTidy' => isset( $opts['tidy'] ),
+                       'wgUseTidy' => false,
                        'wgTidyConfig' => null,
                        'wgDebugTidy' => false,
-                       'wgTidyConf' => $IP . '/includes/tidy/tidy.conf',
-                       'wgTidyOpts' => '',
-                       'wgTidyInternal' => $this->tidySupport->isInternal(),
                ];
 
                if ( $config ) {
@@ -1280,7 +1264,6 @@
                FileBackendGroup::destroySingleton();
                LockManagerGroup::destroySingletons();
                LinkCache::singleton()->clear();
-               MWTidy::destroySingleton();
 
                foreach ( $this->savedGlobals as $var => $val ) {
                        $GLOBALS[$var] = $val;
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt
index 2e059d7..ae9f436 100644
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -650,30 +650,22 @@
 </p>
 !! end
 
-# The PHP parser strips the empty tags out for giggles; parsoid doesn't.
 !! test
 Italics and bold: 2-quote opening sequence: (2,5)
 !! options
 parsoid=wt2html
 !! wikitext
 ''foo'''''
-!! html/php
-<p><i>foo</i>
-</p>
-!! html/parsoid
+!! html
 <p><i>foo</i><b></b>
 </p>
 !!end
 
-# same html as previous, but wikitext adjusted to match parsoid html2wt
 !! test
 Italics and bold: 2-quote opening sequence: (2,5+3) w/ nowiki
 !! wikitext
 ''foo'''''<nowiki/>'''
-!! html/php
-<p><i>foo</i>
-</p>
-!! html/parsoid
+!! html
 <p><i>foo</i><b></b>
 </p>
 !! end
@@ -710,30 +702,22 @@
 </p>
 !!end
 
-# The PHP parser strips the empty tags out for giggles; parsoid doesn't.
 !! test
 Italics and bold: 3-quote opening sequence: (3,5)
 !! options
 parsoid=wt2html
 !! wikitext
 '''foo'''''
-!! html/php
-<p><b>foo</b>
-</p>
-!! html/parsoid
+!! html
 <p><b>foo</b><i></i>
 </p>
 !!end
 
-# same html as previous, but wikitext adjusted to match parsoid html2wt
 !! test
 Italics and bold: 3-quote opening sequence: (3,5+2) w/ nowiki
 !! wikitext
 '''foo'''''<nowiki/>''
-!! html/php
-<p><b>foo</b>
-</p>
-!! html/parsoid
+!! html
 <p><b>foo</b><i></i>
 </p>
 !! end
@@ -794,17 +778,13 @@
 </p>
 !! end
 
-# The PHP parser strips the empty tags out for giggles; parsoid doesn't.
 !! test
 Italics and bold: 4-quote opening sequence: (4,5)
 !! options
 parsoid=wt2html
 !! wikitext
 ''''foo'''''
-!! html/php
-<p>'<b>foo</b>
-</p>
-!! html/parsoid
+!! html
 <p>'<b>foo</b><i></i>
 </p>
 !!end
@@ -814,10 +794,7 @@
 Italics and bold: 4-quote opening sequence: (4,5+2) w/ nowiki
 !! wikitext
 '<nowiki/>'''foo'''''<nowiki/>''
-!! html/php
-<p>'<b>foo</b>
-</p>
-!! html/parsoid
+!! html
 <p>'<b>foo</b><i></i>
 </p>
 !! end
@@ -907,7 +884,10 @@
 parsoid=wt2html
 !! wikitext
 ''foo''''bar''
-!! html/*
+!! html/php
+<p><i>foo'<b>bar</b></i><b></b>
+</p>
+!! html/parsoid
 <p><i>foo'<b>bar</b></i>
 </p>
 !!end
@@ -970,17 +950,13 @@
 !! end
 
 
-# The PHP parser strips the empty tags out for giggles; parsoid doesn't.
 !! test
 Italics and bold: multiple quote sequences: (3,4,2)
 !! options
 parsoid=wt2html
 !! wikitext
 '''foo''''bar''
-!! html/php
-<p><b>foo'</b>bar
-</p>
-!! html/parsoid
+!! html
 <p><b>foo'</b>bar<i></i>
 </p>
 !!end
@@ -993,7 +969,7 @@
 !! wikitext
 '''<nowiki>foo'</nowiki>'''bar''<nowiki/>''
 !! html/php
-<p><b>foo'</b>bar
+<p><b>foo'</b>bar<i></i>
 </p>
 !! html/parsoid
 <p><b><span typeof="mw:Nowiki">foo'</span></b>bar<i></i>
@@ -1001,17 +977,13 @@
 !! end
 
 
-# The PHP parser strips the empty tags out for giggles; parsoid doesn't.
 !! test
 Italics and bold: multiple quote sequences: (3,4,3)
 !! options
 parsoid=wt2html
 !! wikitext
 '''foo''''bar'''
-!! html/php
-<p><b>foo'</b>bar
-</p>
-!! html/parsoid
+!! html
 <p><b>foo'</b>bar<b></b>
 </p>
 !!end
@@ -1022,7 +994,7 @@
 !! wikitext
 '''<nowiki>foo'</nowiki>'''bar'''<nowiki/>'''
 !! html/php
-<p><b>foo'</b>bar
+<p><b>foo'</b>bar<b></b>
 </p>
 !! html/parsoid
 <p><b><span typeof="mw:Nowiki">foo'</span></b>bar<b></b>
@@ -1118,13 +1090,13 @@
 |}
 !! html/php+tidy
 <table>
-<tr>
+<tbody><tr>
 <th><i>a</i></th>
-<th><i>b</i></th>
+<th><i>b</i>
+</th>
 <td><i>a</i></td>
-<td><i>b</i></td>
-</tr>
-</table>
+<td><i>b</i>
+</td></tr></tbody></table>
 !! html/parsoid
 <table>
 <tbody><tr><th><i>a</i></th><th><i>b</i></th>
@@ -1157,7 +1129,7 @@
 <wbr> is valid wikitext (bug 52468)
 !! wikitext
 <wbr>
-!! html
+!! html+tidy
 <p><wbr />
 </p>
 !! end
@@ -1267,7 +1239,6 @@
 </p>
 !! end
 
-# The next two test different paths in the sanitizer.
 !! test
 Non-word characters don't terminate tag names (bug 17663, 40670, 52022)
 !! wikitext
@@ -1282,40 +1253,14 @@
 <s.foo> doesn't terminate </s.foo>
 
 <sub-ID#1>
-!! html
-<p>&lt;blockquote|&gt;a&lt;/blockquote&gt;
+!! html+tidy
+<p>&lt;blockquote|&gt;a
 </p><p>&lt;b→&gt; doesn't terminate &lt;/b→&gt;
 </p><p>&lt;bä&gt; doesn't terminate &lt;/bä&gt;
 </p><p>&lt;boo&gt; doesn't terminate &lt;/boo&gt;
 </p><p>&lt;s.foo&gt; doesn't terminate &lt;/s.foo&gt;
 </p><p>&lt;sub-ID#1&gt;
 </p>
-!! end
-
-# There is a tidy bug here: http://sourceforge.net/p/tidy/bugs/946/
-# If the non-word-character tag made it through the sanitizer, tidy
-# would munge it up.
-!! test
-Non-word characters don't terminate tag names + tidy
-!! wikitext
-<blockquote|>a</blockquote>
-
-<b→> doesn't terminate </b→>
-
-<bä> doesn't terminate </bä>
-
-<boo> doesn't terminate </boo>
-
-<s.foo> doesn't terminate </s.foo>
-
-<sub-ID#1>
-!! html+tidy
-<p>&lt;blockquote|&gt;a</p>
-<p>&lt;b→&gt; doesn't terminate &lt;/b→&gt;</p>
-<p>&lt;bä&gt; doesn't terminate &lt;/bä&gt;</p>
-<p>&lt;boo&gt; doesn't terminate &lt;/boo&gt;</p>
-<p>&lt;s.foo&gt; doesn't terminate &lt;/s.foo&gt;</p>
-<p>&lt;sub-ID#1&gt;</p>
 !! end
 
 ###
@@ -1859,11 +1804,8 @@
 <p>b
 </p>
 !! html+tidy
-<p>a</p>
-<blockquote>
-<p>foo</p>
-</blockquote>
-<p>b</p>
+<p>a </p><blockquote><p>foo</p></blockquote><p>b
+</p>
 !! end
 
 !! test
@@ -1894,14 +1836,8 @@
 b <blockquote>foo</blockquote>
 
 !! html+tidy
-<p>a</p>
-<blockquote>
-<p>foo</p>
-</blockquote>
-<p>b</p>
-<blockquote>
-<p>foo</p>
-</blockquote>
+<p>a </p><blockquote><p>foo</p></blockquote><p>
+b </p><blockquote><p>foo</p></blockquote>
 !! end
 
 !! test
@@ -2194,9 +2130,7 @@
 </blockquote>
 
 !! html+tidy
-<blockquote>
-<p>Foo</p>
-<del>bar</del> <ins>baz</ins> quux</blockquote>
+<blockquote><p>Foo </p><del>bar</del><ins>baz</ins><p> quux</p></blockquote>
 !! end
 
 !! test
@@ -2292,7 +2226,7 @@
 </nowiki>
 </pre>
 
-!! html/php
+!! html/php+tidy
 <pre>
 &lt;nowiki&gt;
 </pre>
@@ -5142,7 +5076,7 @@
 parsoid=wt2html,html2html
 !! wikitext
 [http://example.com [[wikilink]] embedded in ext link]
-!! html/php
+!! html/php+tidy
 <p><a rel="nofollow" class="external text" href="http://example.com";></a><a 
href="/index.php?title=Wikilink&amp;action=edit&amp;redlink=1" class="new" 
title="Wikilink (page does not exist)">wikilink</a><a rel="nofollow" 
class="external text" href="http://example.com";> embedded in ext link</a>
 </p>
 !! html/parsoid
@@ -5332,7 +5266,7 @@
 External link containing double-single-quotes in text embedded in italics (bug 
4598 sanity check)
 !! wikitext
 ''Some [http://example.com/ pretty ''italics'' and stuff]!''
-!! html
+!! html/php+tidy
 <p><i>Some </i><a rel="nofollow" class="external text" 
href="http://example.com/";><i>pretty </i>italics<i> and stuff</i></a><i>!</i>
 </p>
 !! end
@@ -5754,7 +5688,7 @@
 <p><i><b>Bold italic text </b>with bold deactivated<b> in between.</b></i>
 </p><p><b><i>Bold italic text </i>with italic deactivated<i> in 
between.</i></b>
 </p><p><b>Bold text..</b>
-</p><p>..spanning two paragraphs (should not work).
+</p><p>..spanning two paragraphs (should not work).<b></b>
 </p><p><b>Bold tag left open</b>
 </p><p><i>Italic tag left open</i>
 </p><p>Normal text.
@@ -8818,7 +8752,7 @@
 !! wikitext
 <br style="clear:both;" />
 !! html
-<p><br style="clear:both;" />
+<p><br style="clear:both;"/>
 </p>
 !! end
 
@@ -8828,7 +8762,7 @@
 <br style="clear: left;">
 <br style="clear: right;">
 <br style="clear: both;">
-!! html
+!! html+tidy
 <p><br style="clear: left;" />
 <br style="clear: right;" />
 <br style="clear: both;" />
@@ -11931,12 +11865,9 @@
 <tr><td></td></tr></table>
 
 !! html+tidy
-<p>a</p>
-<table>
-<tr>
-<td></td>
-</tr>
-</table>
+<p>
+a
+</p><table><tbody><tr><td></td></tr></tbody></table>
 !! end
 
 !!test
@@ -16861,7 +16792,7 @@
 Sanitizer: Closing of open but not closed tags
 !! wikitext
 <s>foo
-!! html
+!! html+tidy
 <p><s>foo</s>
 </p>
 !! end
@@ -16931,7 +16862,7 @@
        <link rel="stylesheet" href="{{SERVER}}">
        <link rel="stylesheet" itemprop="hello" href="{{SERVER}}">
 </div>
-!! html
+!! html+tidy
 <div itemscope="">
 <p>    <meta itemprop="hello" content="world" />
        &lt;meta http-equiv="refresh" content="5"&gt;
@@ -17515,15 +17446,14 @@
     <td> And yet som tabular data</td>
   </tr>
 </table>
-!! html
+!! html+tidy
 <table>
-  <tr>
+  <tbody><tr>
     <td> Some tabular data</td>
     <td> More tabular data ...
     </td><td> And yet som tabular data</td>
   </tr>
-</table>
-
+</tbody></table>
 !! end
 
 !! test
@@ -18547,7 +18477,7 @@
 !! wikitext
 '''''<nowiki/>'''''
 !! html/php
-<p><i></i>
+<p><i><b></b></i>
 </p>
 !! html/parsoid
 <p><b><i></i></b></p>
@@ -18559,7 +18489,7 @@
 !! wikitext
 '''''<nowiki/>'''''
 !! html/php
-<p><i></i>
+<p><i><b></b></i>
 </p>
 !! html/parsoid
 <p><i><b></b></i></p>
@@ -19322,9 +19252,8 @@
 Don't fall for the self-closing div
 !! wikitext
 <div>hello world</div/>
-!! html
+!! html+tidy
 <div>hello world</div>
-
 !! end
 
 !! test
@@ -20349,9 +20278,8 @@
 Line two</blockquote>
 
 !! html+tidy
-<blockquote>
-<p>Line one Line two</p>
-</blockquote>
+<blockquote><p>Line one
+Line two</p></blockquote>
 !! end
 
 !! test
@@ -20368,9 +20296,9 @@
 Line two</blockquote>
 
 !! html+tidy
-<blockquote>
-<p>Line one</p>
-Line two</blockquote>
+<blockquote><p>Line one
+</p><p>Line two
+</p></blockquote>
 !! end
 
 !! test
@@ -20387,10 +20315,9 @@
 </blockquote>
 
 !! html+tidy
-<blockquote>
-<p>Line one</p>
-<p>Line two</p>
-</blockquote>
+<blockquote><p>Line one
+</p><p>Line two
+</p></blockquote>
 !! end
 
 !! test
@@ -20946,7 +20873,7 @@
 <indicator name="10">Two
 
 paragraphs</indicator>
-!! html
+!! html+tidy
 01=hello world
 02=<a href="/wiki/Main_Page" title="Main Page">Main Page</a>
 03=<img alt="Foobar.jpg" 
src="http://example.com/images/thumb/3/3a/Foobar.jpg/25px-Foobar.jpg"; 
width="25" height="3" 
srcset="http://example.com/images/thumb/3/3a/Foobar.jpg/38px-Foobar.jpg 1.5x, 
http://example.com/images/thumb/3/3a/Foobar.jpg/50px-Foobar.jpg 2x" />
@@ -21287,21 +21214,13 @@
 <h2><span class="mw-headline" 
id="Quote"><blockquote>Quote</blockquote></span><span 
class="mw-editsection"><span class="mw-editsection-bracket">[</span><a 
href="/index.php?title=Main_Page&amp;action=edit&amp;section=1" title="Edit 
section: Quote">edit</a><span 
class="mw-editsection-bracket">]</span></span></h2>
 
 !! html+tidy
-<p></p>
-<div id="toc" class="toc">
-<div id="toctitle">
-<h2>Contents</h2>
-</div>
+<div id="toc" class="toc"><div id="toctitle"><h2>Contents</h2></div>
 <ul>
 <li class="toclevel-1 tocsection-1"><a href="#Quote"><span 
class="tocnumber">1</span> <span class="toctext">Quote</span></a></li>
 </ul>
 </div>
 <p></p>
-<h2><span class="mw-headline" id="Quote"></span></h2>
-<blockquote>
-<p><span class="mw-headline" id="Quote">Quote</span></p>
-</blockquote>
-<p><span class="mw-editsection"><span 
class="mw-editsection-bracket">[</span><a 
href="/index.php?title=Main_Page&amp;action=edit&amp;section=1" title="Edit 
section: Quote">edit</a><span class="mw-editsection-bracket">]</span></span></p>
+<h2><span class="mw-headline" id="Quote"></span></h2><blockquote><p><span 
class="mw-headline" id="Quote">Quote</span></p></blockquote><p><span 
class="mw-editsection"><span class="mw-editsection-bracket">[</span><a 
href="/index.php?title=Main_Page&amp;action=edit&amp;section=1" title="Edit 
section: Quote">edit</a><span class="mw-editsection-bracket">]</span></span></p>
 !! end
 
 !! test
@@ -23119,16 +23038,14 @@
 |x<div><nowiki>a|b</nowiki></div>
 |}
 !! html/php+tidy
-<table>
+<table><tbody>
 <tr>
 <td>foo|bar</td>
 </tr>
 <tr>
 <td>x
 <div>a|b</div>
-</td>
-</tr>
-</table>
+</td></tr></tbody></table>
 !! end
 
 !! test
@@ -26078,7 +25995,7 @@
 |}
 !! html/php+tidy
 <table>
-<tr>
+<tbody><tr>
 <th>-</th>
 <th>-</th>
 </tr>
@@ -26095,7 +26012,7 @@
 <p>-</p>
 </td>
 </tr>
-</table>
+</tbody></table>
 !! end
 
 !! test
diff --git a/tests/phpunit/MediaWikiTestCase.php 
b/tests/phpunit/MediaWikiTestCase.php
index 8dfe628..af132e6 100644
--- a/tests/phpunit/MediaWikiTestCase.php
+++ b/tests/phpunit/MediaWikiTestCase.php
@@ -1703,7 +1703,7 @@
                // of tidy. In that case however, we can not reliably detect 
whether a failing validation
                // is due to malformed HTML, or caused by tidy not being 
installed as a command line tool.
                // That would cause all HTML assertions to fail on a system 
that has no tidy installed.
-               if ( !$GLOBALS['wgTidyInternal'] || !MWTidy::isEnabled() ) {
+               if ( !( MWTidy::singleton() instanceof 
MediaWiki\Tidy\RaggettInternalPHP ) ) {
                        $this->markTestSkipped( 'Tidy extension not installed' 
);
                }
 
diff --git a/tests/phpunit/includes/SanitizerTest.php 
b/tests/phpunit/includes/SanitizerTest.php
index 72d7166..93ed19b 100644
--- a/tests/phpunit/includes/SanitizerTest.php
+++ b/tests/phpunit/includes/SanitizerTest.php
@@ -91,27 +91,6 @@
        }
 
        /**
-        * @covers Sanitizer::removeHTMLtags
-        * @dataProvider provideHtml5Tags
-        *
-        * @param string $tag Name of an HTML5 element (ie: 'video')
-        * @param bool $escaped Whether sanitizer let the tag in or escape it 
(ie: '&lt;video&gt;')
-        */
-       public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) {
-               MWTidy::setInstance( false );
-
-               if ( $escaped ) {
-                       $this->assertEquals( "&lt;$tag&gt;",
-                               Sanitizer::removeHTMLtags( "<$tag>" )
-                       );
-               } else {
-                       $this->assertEquals( "<$tag></$tag>\n",
-                               Sanitizer::removeHTMLtags( "<$tag>" )
-                       );
-               }
-       }
-
-       /**
         * Provide HTML5 tags
         */
        public static function provideHtml5Tags() {
@@ -153,15 +132,6 @@
                                '<abbr> inside <dfn>',
                        ],
                ];
-       }
-
-       /**
-        * @dataProvider dataRemoveHTMLtags
-        * @covers Sanitizer::removeHTMLtags
-        */
-       public function testRemoveHTMLtags( $input, $output, $msg = null ) {
-               MWTidy::setInstance( false );
-               $this->assertEquals( $output, Sanitizer::removeHTMLtags( $input 
), $msg );
        }
 
        /**
diff --git a/tests/phpunit/includes/parser/NewParserTest.php 
b/tests/phpunit/includes/parser/NewParserTest.php
index 8512572..eb0a888 100644
--- a/tests/phpunit/includes/parser/NewParserTest.php
+++ b/tests/phpunit/includes/parser/NewParserTest.php
@@ -35,10 +35,6 @@
         * @var DjVuSupport
         */
        private $djVuSupport;
-       /**
-        * @var TidySupport
-        */
-       private $tidySupport;
 
        protected $file = false;
 
@@ -162,13 +158,8 @@
                // DjVu support
                $this->djVuSupport = new DjVuSupport();
                // Tidy support
-               $this->tidySupport = new TidySupport();
                $tmpGlobals['wgTidyConfig'] = null;
                $tmpGlobals['wgUseTidy'] = false;
-               $tmpGlobals['wgDebugTidy'] = false;
-               $tmpGlobals['wgTidyConf'] = $IP . '/includes/tidy/tidy.conf';
-               $tmpGlobals['wgTidyOpts'] = '';
-               $tmpGlobals['wgTidyInternal'] = 
$this->tidySupport->isInternal();
 
                $this->setMwGlobals( $tmpGlobals );
 
@@ -452,7 +443,6 @@
                        'wgMathDirectory' => $uploadDir . '/math',
                        'wgDefaultLanguageVariant' => $variant,
                        'wgLinkHolderBatchSize' => $linkHolderBatchSize,
-                       'wgUseTidy' => isset( $opts['tidy'] ),
                ];
 
                if ( $config ) {
@@ -772,11 +762,7 @@
                }
 
                if ( isset( $opts['tidy'] ) ) {
-                       if ( !$this->tidySupport->isEnabled() ) {
-                               $this->markTestSkipped( "SKIPPED: tidy 
extension is not installed.\n" );
-                       } else {
-                               $options->setTidy( true );
-                       }
+                       $options->setTidy( true );
                }
 
                if ( isset( $opts['pst'] ) ) {
diff --git a/tests/phpunit/includes/parser/TidyTest.php 
b/tests/phpunit/includes/parser/TidyTest.php
index 62b84aa..ea8c4b9 100644
--- a/tests/phpunit/includes/parser/TidyTest.php
+++ b/tests/phpunit/includes/parser/TidyTest.php
@@ -4,14 +4,6 @@
  * @group Parser
  */
 class TidyTest extends MediaWikiTestCase {
-
-       protected function setUp() {
-               parent::setUp();
-               if ( !MWTidy::isEnabled() ) {
-                       $this->markTestSkipped( 'Tidy not found' );
-               }
-       }
-
        /**
         * @dataProvider provideTestWrapping
         */
diff --git a/tests/testHelpers.inc b/tests/testHelpers.inc
index d04e0fc..049362b 100644
--- a/tests/testHelpers.inc
+++ b/tests/testHelpers.inc
@@ -832,43 +832,3 @@
                        && is_executable( $wgDjvuTxt );
        }
 }
-
-/**
- * Initialize and detect the tidy support
- */
-class TidySupport {
-       private $internalTidy;
-       private $externalTidy;
-
-       /**
-        * Determine if there is a usable tidy.
-        */
-       public function __construct() {
-               global $wgTidyBin;
-
-               $this->internalTidy = extension_loaded( 'tidy' ) &&
-                       class_exists( 'tidy' ) && !wfIsHHVM();
-
-               $this->externalTidy = is_executable( $wgTidyBin ) ||
-                       Installer::locateExecutableInDefaultPaths( [ $wgTidyBin 
] )
-                       !== false;
-       }
-
-       /**
-        * Returns true if we should use internal tidy.
-        *
-        * @return bool
-        */
-       public function isInternal() {
-               return $this->internalTidy;
-       }
-
-       /**
-        * Returns true if tidy is usable
-        *
-        * @return bool
-        */
-       public function isEnabled() {
-               return $this->internalTidy || $this->externalTidy;
-       }
-}

-- 
To view, visit https://gerrit.wikimedia.org/r/296370
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ifea448c3de708a94768a1861a993c2c3c6292759
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Tim Starling <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to