Tim Starling has uploaded a new change for review.
https://gerrit.wikimedia.org/r/296370
Change subject: [WIP] Use the pure-PHP Balancer as the default Tidy
implementation
......................................................................
[WIP] Use the pure-PHP Balancer as the default Tidy implementation
* Remove the concept of tidy being disabled
* Remove the non-tidy hacks from Sanitizer and Parser
* Use Balancer in parser tests
* Update parserTests.txt as appropriate
Change-Id: Ifea448c3de708a94768a1861a993c2c3c6292759
---
M includes/Sanitizer.php
M includes/parser/MWTidy.php
M includes/parser/Parser.php
M tests/TestsAutoLoader.php
M tests/parser/parserTest.inc
M tests/parser/parserTests.txt
M tests/phpunit/MediaWikiTestCase.php
M tests/phpunit/includes/SanitizerTest.php
M tests/phpunit/includes/parser/NewParserTest.php
M tests/phpunit/includes/parser/TidyTest.php
M tests/testHelpers.inc
11 files changed, 132 insertions(+), 491 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/70/296370/1
diff --git a/includes/Sanitizer.php b/includes/Sanitizer.php
index 8f1fc99..f104502 100644
--- a/includes/Sanitizer.php
+++ b/includes/Sanitizer.php
@@ -468,188 +468,47 @@
$text = Sanitizer::removeHTMLcomments( $text );
$bits = explode( '<', $text );
$text = str_replace( '>', '>', array_shift( $bits ) );
- if ( !MWTidy::isEnabled() ) {
- $tagstack = $tablestack = [];
- foreach ( $bits as $x ) {
- $regs = [];
- # $slash: Does the current element start with a
'/'?
- # $t: Current element name
- # $params: String between element name and >
- # $brace: Ending '>' or '/>'
- # $rest: Everything until the next element of
$bits
- if ( preg_match( self::ELEMENT_BITS_REGEX, $x,
$regs ) ) {
- list( /* $qbar */, $slash, $t, $params,
$brace, $rest ) = $regs;
- } else {
- $slash = $t = $params = $brace = $rest
= null;
- }
+ foreach ( $bits as $x ) {
+ if ( preg_match( self::ELEMENT_BITS_REGEX, $x, $regs )
) {
+ list( /* $qbar */, $slash, $t, $params, $brace,
$rest ) = $regs;
$badtag = false;
$t = strtolower( $t );
if ( isset( $htmlelements[$t] ) ) {
- # Check our stack
- if ( $slash && isset(
$htmlsingleonly[$t] ) ) {
- $badtag = true;
- } elseif ( $slash ) {
- # Closing a tag... is it the
one we just opened?
- MediaWiki\suppressWarnings();
- $ot = array_pop( $tagstack );
- MediaWiki\restoreWarnings();
-
- if ( $ot != $t ) {
- if ( isset(
$htmlsingleallowed[$ot] ) ) {
- # Pop all
elements with an optional close tag
- # and see if we
find a match below them
- $optstack = [];
- array_push(
$optstack, $ot );
-
MediaWiki\suppressWarnings();
- $ot =
array_pop( $tagstack );
-
MediaWiki\restoreWarnings();
- while ( $ot !=
$t && isset( $htmlsingleallowed[$ot] ) ) {
-
array_push( $optstack, $ot );
-
MediaWiki\suppressWarnings();
- $ot =
array_pop( $tagstack );
-
MediaWiki\restoreWarnings();
- }
- if ( $t != $ot
) {
- # No
match. Push the optional elements back again
- $badtag
= true;
-
MediaWiki\suppressWarnings();
- $ot =
array_pop( $optstack );
-
MediaWiki\restoreWarnings();
- while (
$ot ) {
-
array_push( $tagstack, $ot );
-
MediaWiki\suppressWarnings();
-
$ot = array_pop( $optstack );
-
MediaWiki\restoreWarnings();
- }
- }
- } else {
-
MediaWiki\suppressWarnings();
- array_push(
$tagstack, $ot );
-
MediaWiki\restoreWarnings();
-
- # <li> can be
nested in <ul> or <ol>, skip those cases:
- if ( !isset(
$htmllist[$ot] ) || !isset( $listtags[$t] ) ) {
- $badtag
= true;
- }
- }
- } else {
- if ( $t == 'table' ) {
- $tagstack =
array_pop( $tablestack );
- }
- }
- $newparams = '';
- } else {
- # Keep track for later
- if ( isset( $tabletags[$t] ) &&
!in_array( 'table', $tagstack ) ) {
- $badtag = true;
- } elseif ( in_array( $t,
$tagstack ) && !isset( $htmlnest[$t] ) ) {
- $badtag = true;
- # Is it a self closed htmlpair
? (bug 5487)
- } elseif ( $brace == '/>' &&
isset( $htmlpairs[$t] ) ) {
- // Eventually we'll
just remove the self-closing
- // slash, in order to
be consistent with HTML5
- // semantics.
- // $brace = '>';
- // For now, let's just
warn authors to clean up.
- if ( is_callable(
$warnCallback ) ) {
-
call_user_func_array( $warnCallback, [ 'deprecated-self-close-category' ] );
- }
- $badtag = true;
- } elseif ( isset(
$htmlsingleonly[$t] ) ) {
- # Hack to force empty
tag for unclosable elements
- $brace = '/>';
- } elseif ( isset(
$htmlsingle[$t] ) ) {
- # Hack to not close
$htmlsingle tags
- $brace = null;
- # Still need to push
this optionally-closed tag to
- # the tag stack so that
we can match end tags
- # instead of marking
them as bad.
- array_push( $tagstack,
$t );
- } elseif ( isset(
$tabletags[$t] ) && in_array( $t, $tagstack ) ) {
- // New table tag but
forgot to close the previous one
- $text .= "</$t>";
- } else {
- if ( $t == 'table' ) {
- array_push(
$tablestack, $tagstack );
- $tagstack = [];
- }
- array_push( $tagstack,
$t );
- }
-
- # Replace any variables or
template parameters with
- # plaintext results.
- if ( is_callable(
$processCallback ) ) {
- call_user_func_array(
$processCallback, [ &$params, $args ] );
- }
-
- if ( !Sanitizer::validateTag(
$params, $t ) ) {
- $badtag = true;
- }
-
- # Strip non-approved attributes
from the tag
- $newparams =
Sanitizer::fixTagAttributes( $params, $t );
+ if ( is_callable( $processCallback ) ) {
+ call_user_func_array(
$processCallback, [ &$params, $args ] );
}
+
+ if ( $brace == '/>' && !( isset(
$htmlsingle[$t] ) || isset( $htmlsingleonly[$t] ) ) ) {
+ // Eventually we'll just remove
the self-closing
+ // slash, in order to be
consistent with HTML5
+ // semantics.
+ // $brace = '>';
+ // For now, let's just warn
authors to clean up.
+ if ( is_callable( $warnCallback
) ) {
+ call_user_func_array(
$warnCallback, [ 'deprecated-self-close-category' ] );
+ }
+ }
+ if ( !Sanitizer::validateTag( $params,
$t ) ) {
+ $badtag = true;
+ }
+
+ $newparams =
Sanitizer::fixTagAttributes( $params, $t );
if ( !$badtag ) {
+ if ( $brace === '/>' && !isset(
$htmlsingleonly[$t] ) ) {
+ # Interpret
self-closing tags as empty tags even when
+ # HTML 5 would
interpret them as start tags. Such input
+ # is commonly seen on
Wikimedia wikis with this intention.
+ $brace = "></$t>";
+ }
+
$rest = str_replace( '>',
'>', $rest );
- $close = ( $brace == '/>' &&
!$slash ) ? ' /' : '';
- $text .=
"<$slash$t$newparams$close>$rest";
+ $text .=
"<$slash$t$newparams$brace$rest";
continue;
}
}
- $text .= '<' . str_replace( '>', '>', $x
);
}
- # Close off any remaining tags
- while ( is_array( $tagstack ) && ( $t = array_pop(
$tagstack ) ) ) {
- $text .= "</$t>\n";
- if ( $t == 'table' ) {
- $tagstack = array_pop( $tablestack );
- }
- }
- } else {
- # this might be possible using tidy itself
- foreach ( $bits as $x ) {
- if ( preg_match( self::ELEMENT_BITS_REGEX, $x,
$regs ) ) {
- list( /* $qbar */, $slash, $t, $params,
$brace, $rest ) = $regs;
-
- $badtag = false;
- $t = strtolower( $t );
- if ( isset( $htmlelements[$t] ) ) {
- if ( is_callable(
$processCallback ) ) {
- call_user_func_array(
$processCallback, [ &$params, $args ] );
- }
-
- if ( $brace == '/>' && !(
isset( $htmlsingle[$t] ) || isset( $htmlsingleonly[$t] ) ) ) {
- // Eventually we'll
just remove the self-closing
- // slash, in order to
be consistent with HTML5
- // semantics.
- // $brace = '>';
- // For now, let's just
warn authors to clean up.
- if ( is_callable(
$warnCallback ) ) {
-
call_user_func_array( $warnCallback, [ 'deprecated-self-close-category' ] );
- }
- }
- if ( !Sanitizer::validateTag(
$params, $t ) ) {
- $badtag = true;
- }
-
- $newparams =
Sanitizer::fixTagAttributes( $params, $t );
- if ( !$badtag ) {
- if ( $brace === '/>' &&
!isset( $htmlsingleonly[$t] ) ) {
- # Interpret
self-closing tags as empty tags even when
- # HTML 5 would
interpret them as start tags. Such input
- # is commonly
seen on Wikimedia wikis with this intention.
- $brace =
"></$t>";
- }
-
- $rest = str_replace(
'>', '>', $rest );
- $text .=
"<$slash$t$newparams$brace$rest";
- continue;
- }
- }
- }
- $text .= '<' . str_replace( '>', '>', $x
);
- }
+ $text .= '<' . str_replace( '>', '>', $x );
}
return $text;
}
diff --git a/includes/parser/MWTidy.php b/includes/parser/MWTidy.php
index f281c25..a56f2c2 100644
--- a/includes/parser/MWTidy.php
+++ b/includes/parser/MWTidy.php
@@ -22,11 +22,7 @@
*/
/**
- * Class to interact with HTML tidy
- *
- * Either the external tidy program or the in-process tidy extension
- * will be used depending on availability. Override the default
- * $wgTidyInternal setting to disable the internal if it's not working.
+ * Class to interact with various HTML tidy implementations
*
* @ingroup Parser
*/
@@ -43,12 +39,7 @@
* @return string Corrected HTML output
*/
public static function tidy( $text ) {
- $driver = self::singleton();
- if ( !$driver ) {
- throw new MWException( __METHOD__ .
- ': tidy is disabled, caller should have checked
MWTidy::isEnabled()' );
- }
- return $driver->tidy( $text );
+ return self::singleton()->tidy( $text );
}
/**
@@ -78,10 +69,6 @@
*/
public static function checkErrors( $text, &$errorStr = null ) {
$driver = self::singleton();
- if ( !$driver ) {
- throw new MWException( __METHOD__ .
- ': tidy is disabled, caller should have checked
MWTidy::isEnabled()' );
- }
if ( $driver->supportsValidate() ) {
return $driver->validate( $text, $errorStr );
} else {
@@ -89,8 +76,11 @@
}
}
+ /**
+ * @deprecated since 1.28
+ */
public static function isEnabled() {
- return self::singleton() !== false;
+ return true;
}
protected static function singleton() {
@@ -117,34 +107,45 @@
$config['driver'] = 'RaggettExternal';
}
} else {
- return false;
+ $config = [ 'driver' => 'Html5Internal' ];
}
- switch ( $config['driver'] ) {
- case 'RaggettInternalHHVM':
- self::$instance = new
MediaWiki\Tidy\RaggettInternalHHVM( $config );
- break;
- case 'RaggettInternalPHP':
- self::$instance = new
MediaWiki\Tidy\RaggettInternalPHP( $config );
- break;
- case 'RaggettExternal':
- self::$instance = new
MediaWiki\Tidy\RaggettExternal( $config );
- break;
- case 'Html5Depurate':
- self::$instance = new
MediaWiki\Tidy\Html5Depurate( $config );
- break;
- case 'Html5Internal':
- self::$instance = new
MediaWiki\Tidy\Html5Internal( $config );
- break;
- default:
- throw new MWException( "Invalid tidy
driver: \"{$config['driver']}\"" );
- }
+ self::$instance = self::factory( $config );
}
return self::$instance;
}
/**
+ * Create a new Tidy driver object from configuration.
+ * @see $wgTidyConfig
+ * @param array $config
+ * @return TidyDriverBase
+ */
+ public static function factory( array $config ) {
+ switch ( $config['driver'] ) {
+ case 'RaggettInternalHHVM':
+ $instance = new
MediaWiki\Tidy\RaggettInternalHHVM( $config );
+ break;
+ case 'RaggettInternalPHP':
+ $instance = new
MediaWiki\Tidy\RaggettInternalPHP( $config );
+ break;
+ case 'RaggettExternal':
+ $instance = new MediaWiki\Tidy\RaggettExternal(
$config );
+ break;
+ case 'Html5Depurate':
+ $instance = new MediaWiki\Tidy\Html5Depurate(
$config );
+ break;
+ case 'Html5Internal':
+ $instance = new MediaWiki\Tidy\Html5Internal(
$config );
+ break;
+ default:
+ throw new MWException( "Invalid tidy driver:
\"{$config['driver']}\"" );
+ }
+ return $instance;
+ }
+
+ /**
* Set the driver to be used. This is for testing.
- * @param TidyDriverBase|false|null $instance
+ * @param TidyDriverBase|null $instance
*/
public static function setInstance( $instance ) {
self::$instance = $instance;
diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php
index 55b5fc3..5c954f8 100644
--- a/includes/parser/Parser.php
+++ b/includes/parser/Parser.php
@@ -1355,35 +1355,9 @@
$text = Sanitizer::normalizeCharReferences( $text );
- if ( MWTidy::isEnabled() && $this->mOptions->getTidy() ) {
+ if ( $this->mOptions->getTidy() ) {
$text = MWTidy::tidy( $text );
$this->mOutput->addModuleStyles(
MWTidy::getModuleStyles() );
- } else {
- # attempt to sanitize at least some nesting problems
- # (bug #2702 and quite a few others)
- $tidyregs = [
- # ''Something [http://www.cool.com cool''] -->
- # <i>Something</i><a
href="http://www.cool.com"..><i>cool></i></a>
-
'/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
- '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
- # fix up an anchor inside another anchor, only
- # at least for a single single nested link (bug
3695)
-
'/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
- '\\1\\2</a>\\3</a>\\1\\4</a>',
- # fix div inside inline elements- doBlockLevels
won't wrap a line which
- # contains a div, so fix it up here; replace
- # div with escaped text
- '/(<([aib])
[^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
- '\\1\\3<div\\5>\\6</div>\\8\\9',
- # remove empty italic or bold tag pairs, some
- # introduced by rules above
- '/<([bi])><\/\\1>/' => '',
- ];
-
- $text = preg_replace(
- array_keys( $tidyregs ),
- array_values( $tidyregs ),
- $text );
}
if ( $isMain ) {
diff --git a/tests/TestsAutoLoader.php b/tests/TestsAutoLoader.php
index 8b100a2..e430c6e 100644
--- a/tests/TestsAutoLoader.php
+++ b/tests/TestsAutoLoader.php
@@ -36,7 +36,6 @@
'TestRecorder' => "$testDir/testHelpers.inc",
'ITestRecorder' => "$testDir/testHelpers.inc",
'DjVuSupport' => "$testDir/testHelpers.inc",
- 'TidySupport' => "$testDir/testHelpers.inc",
# tests/phpunit
'MediaWikiTestCase' => "$testDir/phpunit/MediaWikiTestCase.php",
diff --git a/tests/parser/parserTest.inc b/tests/parser/parserTest.inc
index e519f59..f5b668c 100644
--- a/tests/parser/parserTest.inc
+++ b/tests/parser/parserTest.inc
@@ -70,11 +70,6 @@
*/
private $djVuSupport;
- /**
- * @var TidySupport
- */
- private $tidySupport;
-
private $maxFuzzTestLength = 300;
private $fuzzSeed = 0;
private $memoryLimit = 50;
@@ -149,10 +144,6 @@
$this->runParsoid = isset( $options['run-parsoid'] );
$this->djVuSupport = new DjVuSupport();
- $this->tidySupport = new TidySupport();
- if ( !$this->tidySupport->isEnabled() ) {
- echo "Warning: tidy is not installed, skipping some
tests\n";
- }
if ( !extension_loaded( 'gd' ) ) {
echo "Warning: GD extension is not present,
thumbnailing tests will probably fail\n";
@@ -625,11 +616,7 @@
}
if ( isset( $opts['tidy'] ) ) {
- if ( !$this->tidySupport->isEnabled() ) {
- return $this->showSkipped();
- } else {
- $options->setTidy( true );
- }
+ $options->setTidy( true );
}
if ( isset( $opts['title'] ) ) {
@@ -919,12 +906,9 @@
'wgDisableLangConversion' => false,
'wgDisableTitleConversion' => false,
// Tidy options.
- 'wgUseTidy' => isset( $opts['tidy'] ),
+ 'wgUseTidy' => false,
'wgTidyConfig' => null,
'wgDebugTidy' => false,
- 'wgTidyConf' => $IP . '/includes/tidy/tidy.conf',
- 'wgTidyOpts' => '',
- 'wgTidyInternal' => $this->tidySupport->isInternal(),
];
if ( $config ) {
@@ -1280,7 +1264,6 @@
FileBackendGroup::destroySingleton();
LockManagerGroup::destroySingletons();
LinkCache::singleton()->clear();
- MWTidy::destroySingleton();
foreach ( $this->savedGlobals as $var => $val ) {
$GLOBALS[$var] = $val;
diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt
index 2e059d7..ae9f436 100644
--- a/tests/parser/parserTests.txt
+++ b/tests/parser/parserTests.txt
@@ -650,30 +650,22 @@
</p>
!! end
-# The PHP parser strips the empty tags out for giggles; parsoid doesn't.
!! test
Italics and bold: 2-quote opening sequence: (2,5)
!! options
parsoid=wt2html
!! wikitext
''foo'''''
-!! html/php
-<p><i>foo</i>
-</p>
-!! html/parsoid
+!! html
<p><i>foo</i><b></b>
</p>
!!end
-# same html as previous, but wikitext adjusted to match parsoid html2wt
!! test
Italics and bold: 2-quote opening sequence: (2,5+3) w/ nowiki
!! wikitext
''foo'''''<nowiki/>'''
-!! html/php
-<p><i>foo</i>
-</p>
-!! html/parsoid
+!! html
<p><i>foo</i><b></b>
</p>
!! end
@@ -710,30 +702,22 @@
</p>
!!end
-# The PHP parser strips the empty tags out for giggles; parsoid doesn't.
!! test
Italics and bold: 3-quote opening sequence: (3,5)
!! options
parsoid=wt2html
!! wikitext
'''foo'''''
-!! html/php
-<p><b>foo</b>
-</p>
-!! html/parsoid
+!! html
<p><b>foo</b><i></i>
</p>
!!end
-# same html as previous, but wikitext adjusted to match parsoid html2wt
!! test
Italics and bold: 3-quote opening sequence: (3,5+2) w/ nowiki
!! wikitext
'''foo'''''<nowiki/>''
-!! html/php
-<p><b>foo</b>
-</p>
-!! html/parsoid
+!! html
<p><b>foo</b><i></i>
</p>
!! end
@@ -794,17 +778,13 @@
</p>
!! end
-# The PHP parser strips the empty tags out for giggles; parsoid doesn't.
!! test
Italics and bold: 4-quote opening sequence: (4,5)
!! options
parsoid=wt2html
!! wikitext
''''foo'''''
-!! html/php
-<p>'<b>foo</b>
-</p>
-!! html/parsoid
+!! html
<p>'<b>foo</b><i></i>
</p>
!!end
@@ -814,10 +794,7 @@
Italics and bold: 4-quote opening sequence: (4,5+2) w/ nowiki
!! wikitext
'<nowiki/>'''foo'''''<nowiki/>''
-!! html/php
-<p>'<b>foo</b>
-</p>
-!! html/parsoid
+!! html
<p>'<b>foo</b><i></i>
</p>
!! end
@@ -907,7 +884,10 @@
parsoid=wt2html
!! wikitext
''foo''''bar''
-!! html/*
+!! html/php
+<p><i>foo'<b>bar</b></i><b></b>
+</p>
+!! html/parsoid
<p><i>foo'<b>bar</b></i>
</p>
!!end
@@ -970,17 +950,13 @@
!! end
-# The PHP parser strips the empty tags out for giggles; parsoid doesn't.
!! test
Italics and bold: multiple quote sequences: (3,4,2)
!! options
parsoid=wt2html
!! wikitext
'''foo''''bar''
-!! html/php
-<p><b>foo'</b>bar
-</p>
-!! html/parsoid
+!! html
<p><b>foo'</b>bar<i></i>
</p>
!!end
@@ -993,7 +969,7 @@
!! wikitext
'''<nowiki>foo'</nowiki>'''bar''<nowiki/>''
!! html/php
-<p><b>foo'</b>bar
+<p><b>foo'</b>bar<i></i>
</p>
!! html/parsoid
<p><b><span typeof="mw:Nowiki">foo'</span></b>bar<i></i>
@@ -1001,17 +977,13 @@
!! end
-# The PHP parser strips the empty tags out for giggles; parsoid doesn't.
!! test
Italics and bold: multiple quote sequences: (3,4,3)
!! options
parsoid=wt2html
!! wikitext
'''foo''''bar'''
-!! html/php
-<p><b>foo'</b>bar
-</p>
-!! html/parsoid
+!! html
<p><b>foo'</b>bar<b></b>
</p>
!!end
@@ -1022,7 +994,7 @@
!! wikitext
'''<nowiki>foo'</nowiki>'''bar'''<nowiki/>'''
!! html/php
-<p><b>foo'</b>bar
+<p><b>foo'</b>bar<b></b>
</p>
!! html/parsoid
<p><b><span typeof="mw:Nowiki">foo'</span></b>bar<b></b>
@@ -1118,13 +1090,13 @@
|}
!! html/php+tidy
<table>
-<tr>
+<tbody><tr>
<th><i>a</i></th>
-<th><i>b</i></th>
+<th><i>b</i>
+</th>
<td><i>a</i></td>
-<td><i>b</i></td>
-</tr>
-</table>
+<td><i>b</i>
+</td></tr></tbody></table>
!! html/parsoid
<table>
<tbody><tr><th><i>a</i></th><th><i>b</i></th>
@@ -1157,7 +1129,7 @@
<wbr> is valid wikitext (bug 52468)
!! wikitext
<wbr>
-!! html
+!! html+tidy
<p><wbr />
</p>
!! end
@@ -1267,7 +1239,6 @@
</p>
!! end
-# The next two test different paths in the sanitizer.
!! test
Non-word characters don't terminate tag names (bug 17663, 40670, 52022)
!! wikitext
@@ -1282,40 +1253,14 @@
<s.foo> doesn't terminate </s.foo>
<sub-ID#1>
-!! html
-<p><blockquote|>a</blockquote>
+!! html+tidy
+<p><blockquote|>a
</p><p><b→> doesn't terminate </b→>
</p><p><bä> doesn't terminate </bä>
</p><p><boo> doesn't terminate </boo>
</p><p><s.foo> doesn't terminate </s.foo>
</p><p><sub-ID#1>
</p>
-!! end
-
-# There is a tidy bug here: http://sourceforge.net/p/tidy/bugs/946/
-# If the non-word-character tag made it through the sanitizer, tidy
-# would munge it up.
-!! test
-Non-word characters don't terminate tag names + tidy
-!! wikitext
-<blockquote|>a</blockquote>
-
-<b→> doesn't terminate </b→>
-
-<bä> doesn't terminate </bä>
-
-<boo> doesn't terminate </boo>
-
-<s.foo> doesn't terminate </s.foo>
-
-<sub-ID#1>
-!! html+tidy
-<p><blockquote|>a</p>
-<p><b→> doesn't terminate </b→></p>
-<p><bä> doesn't terminate </bä></p>
-<p><boo> doesn't terminate </boo></p>
-<p><s.foo> doesn't terminate </s.foo></p>
-<p><sub-ID#1></p>
!! end
###
@@ -1859,11 +1804,8 @@
<p>b
</p>
!! html+tidy
-<p>a</p>
-<blockquote>
-<p>foo</p>
-</blockquote>
-<p>b</p>
+<p>a </p><blockquote><p>foo</p></blockquote><p>b
+</p>
!! end
!! test
@@ -1894,14 +1836,8 @@
b <blockquote>foo</blockquote>
!! html+tidy
-<p>a</p>
-<blockquote>
-<p>foo</p>
-</blockquote>
-<p>b</p>
-<blockquote>
-<p>foo</p>
-</blockquote>
+<p>a </p><blockquote><p>foo</p></blockquote><p>
+b </p><blockquote><p>foo</p></blockquote>
!! end
!! test
@@ -2194,9 +2130,7 @@
</blockquote>
!! html+tidy
-<blockquote>
-<p>Foo</p>
-<del>bar</del> <ins>baz</ins> quux</blockquote>
+<blockquote><p>Foo </p><del>bar</del><ins>baz</ins><p> quux</p></blockquote>
!! end
!! test
@@ -2292,7 +2226,7 @@
</nowiki>
</pre>
-!! html/php
+!! html/php+tidy
<pre>
<nowiki>
</pre>
@@ -5142,7 +5076,7 @@
parsoid=wt2html,html2html
!! wikitext
[http://example.com [[wikilink]] embedded in ext link]
-!! html/php
+!! html/php+tidy
<p><a rel="nofollow" class="external text" href="http://example.com"></a><a
href="/index.php?title=Wikilink&action=edit&redlink=1" class="new"
title="Wikilink (page does not exist)">wikilink</a><a rel="nofollow"
class="external text" href="http://example.com"> embedded in ext link</a>
</p>
!! html/parsoid
@@ -5332,7 +5266,7 @@
External link containing double-single-quotes in text embedded in italics (bug
4598 sanity check)
!! wikitext
''Some [http://example.com/ pretty ''italics'' and stuff]!''
-!! html
+!! html/php+tidy
<p><i>Some </i><a rel="nofollow" class="external text"
href="http://example.com/"><i>pretty </i>italics<i> and stuff</i></a><i>!</i>
</p>
!! end
@@ -5754,7 +5688,7 @@
<p><i><b>Bold italic text </b>with bold deactivated<b> in between.</b></i>
</p><p><b><i>Bold italic text </i>with italic deactivated<i> in
between.</i></b>
</p><p><b>Bold text..</b>
-</p><p>..spanning two paragraphs (should not work).
+</p><p>..spanning two paragraphs (should not work).<b></b>
</p><p><b>Bold tag left open</b>
</p><p><i>Italic tag left open</i>
</p><p>Normal text.
@@ -8818,7 +8752,7 @@
!! wikitext
<br style="clear:both;" />
!! html
-<p><br style="clear:both;" />
+<p><br style="clear:both;"/>
</p>
!! end
@@ -8828,7 +8762,7 @@
<br style="clear: left;">
<br style="clear: right;">
<br style="clear: both;">
-!! html
+!! html+tidy
<p><br style="clear: left;" />
<br style="clear: right;" />
<br style="clear: both;" />
@@ -11931,12 +11865,9 @@
<tr><td></td></tr></table>
!! html+tidy
-<p>a</p>
-<table>
-<tr>
-<td></td>
-</tr>
-</table>
+<p>
+a
+</p><table><tbody><tr><td></td></tr></tbody></table>
!! end
!!test
@@ -16861,7 +16792,7 @@
Sanitizer: Closing of open but not closed tags
!! wikitext
<s>foo
-!! html
+!! html+tidy
<p><s>foo</s>
</p>
!! end
@@ -16931,7 +16862,7 @@
<link rel="stylesheet" href="{{SERVER}}">
<link rel="stylesheet" itemprop="hello" href="{{SERVER}}">
</div>
-!! html
+!! html+tidy
<div itemscope="">
<p> <meta itemprop="hello" content="world" />
<meta http-equiv="refresh" content="5">
@@ -17515,15 +17446,14 @@
<td> And yet som tabular data</td>
</tr>
</table>
-!! html
+!! html+tidy
<table>
- <tr>
+ <tbody><tr>
<td> Some tabular data</td>
<td> More tabular data ...
</td><td> And yet som tabular data</td>
</tr>
-</table>
-
+</tbody></table>
!! end
!! test
@@ -18547,7 +18477,7 @@
!! wikitext
'''''<nowiki/>'''''
!! html/php
-<p><i></i>
+<p><i><b></b></i>
</p>
!! html/parsoid
<p><b><i></i></b></p>
@@ -18559,7 +18489,7 @@
!! wikitext
'''''<nowiki/>'''''
!! html/php
-<p><i></i>
+<p><i><b></b></i>
</p>
!! html/parsoid
<p><i><b></b></i></p>
@@ -19322,9 +19252,8 @@
Don't fall for the self-closing div
!! wikitext
<div>hello world</div/>
-!! html
+!! html+tidy
<div>hello world</div>
-
!! end
!! test
@@ -20349,9 +20278,8 @@
Line two</blockquote>
!! html+tidy
-<blockquote>
-<p>Line one Line two</p>
-</blockquote>
+<blockquote><p>Line one
+Line two</p></blockquote>
!! end
!! test
@@ -20368,9 +20296,9 @@
Line two</blockquote>
!! html+tidy
-<blockquote>
-<p>Line one</p>
-Line two</blockquote>
+<blockquote><p>Line one
+</p><p>Line two
+</p></blockquote>
!! end
!! test
@@ -20387,10 +20315,9 @@
</blockquote>
!! html+tidy
-<blockquote>
-<p>Line one</p>
-<p>Line two</p>
-</blockquote>
+<blockquote><p>Line one
+</p><p>Line two
+</p></blockquote>
!! end
!! test
@@ -20946,7 +20873,7 @@
<indicator name="10">Two
paragraphs</indicator>
-!! html
+!! html+tidy
01=hello world
02=<a href="/wiki/Main_Page" title="Main Page">Main Page</a>
03=<img alt="Foobar.jpg"
src="http://example.com/images/thumb/3/3a/Foobar.jpg/25px-Foobar.jpg"
width="25" height="3"
srcset="http://example.com/images/thumb/3/3a/Foobar.jpg/38px-Foobar.jpg 1.5x,
http://example.com/images/thumb/3/3a/Foobar.jpg/50px-Foobar.jpg 2x" />
@@ -21287,21 +21214,13 @@
<h2><span class="mw-headline"
id="Quote"><blockquote>Quote</blockquote></span><span
class="mw-editsection"><span class="mw-editsection-bracket">[</span><a
href="/index.php?title=Main_Page&action=edit&section=1" title="Edit
section: Quote">edit</a><span
class="mw-editsection-bracket">]</span></span></h2>
!! html+tidy
-<p></p>
-<div id="toc" class="toc">
-<div id="toctitle">
-<h2>Contents</h2>
-</div>
+<div id="toc" class="toc"><div id="toctitle"><h2>Contents</h2></div>
<ul>
<li class="toclevel-1 tocsection-1"><a href="#Quote"><span
class="tocnumber">1</span> <span class="toctext">Quote</span></a></li>
</ul>
</div>
<p></p>
-<h2><span class="mw-headline" id="Quote"></span></h2>
-<blockquote>
-<p><span class="mw-headline" id="Quote">Quote</span></p>
-</blockquote>
-<p><span class="mw-editsection"><span
class="mw-editsection-bracket">[</span><a
href="/index.php?title=Main_Page&action=edit&section=1" title="Edit
section: Quote">edit</a><span class="mw-editsection-bracket">]</span></span></p>
+<h2><span class="mw-headline" id="Quote"></span></h2><blockquote><p><span
class="mw-headline" id="Quote">Quote</span></p></blockquote><p><span
class="mw-editsection"><span class="mw-editsection-bracket">[</span><a
href="/index.php?title=Main_Page&action=edit&section=1" title="Edit
section: Quote">edit</a><span class="mw-editsection-bracket">]</span></span></p>
!! end
!! test
@@ -23119,16 +23038,14 @@
|x<div><nowiki>a|b</nowiki></div>
|}
!! html/php+tidy
-<table>
+<table><tbody>
<tr>
<td>foo|bar</td>
</tr>
<tr>
<td>x
<div>a|b</div>
-</td>
-</tr>
-</table>
+</td></tr></tbody></table>
!! end
!! test
@@ -26078,7 +25995,7 @@
|}
!! html/php+tidy
<table>
-<tr>
+<tbody><tr>
<th>-</th>
<th>-</th>
</tr>
@@ -26095,7 +26012,7 @@
<p>-</p>
</td>
</tr>
-</table>
+</tbody></table>
!! end
!! test
diff --git a/tests/phpunit/MediaWikiTestCase.php
b/tests/phpunit/MediaWikiTestCase.php
index 8dfe628..af132e6 100644
--- a/tests/phpunit/MediaWikiTestCase.php
+++ b/tests/phpunit/MediaWikiTestCase.php
@@ -1703,7 +1703,7 @@
// of tidy. In that case however, we can not reliably detect
whether a failing validation
// is due to malformed HTML, or caused by tidy not being
installed as a command line tool.
// That would cause all HTML assertions to fail on a system
that has no tidy installed.
- if ( !$GLOBALS['wgTidyInternal'] || !MWTidy::isEnabled() ) {
+ if ( !( MWTidy::singleton() instanceof
MediaWiki\Tidy\RaggettInternalPHP ) ) {
$this->markTestSkipped( 'Tidy extension not installed'
);
}
diff --git a/tests/phpunit/includes/SanitizerTest.php
b/tests/phpunit/includes/SanitizerTest.php
index 72d7166..93ed19b 100644
--- a/tests/phpunit/includes/SanitizerTest.php
+++ b/tests/phpunit/includes/SanitizerTest.php
@@ -91,27 +91,6 @@
}
/**
- * @covers Sanitizer::removeHTMLtags
- * @dataProvider provideHtml5Tags
- *
- * @param string $tag Name of an HTML5 element (ie: 'video')
- * @param bool $escaped Whether sanitizer let the tag in or escape it
(ie: '<video>')
- */
- public function testRemovehtmltagsOnHtml5Tags( $tag, $escaped ) {
- MWTidy::setInstance( false );
-
- if ( $escaped ) {
- $this->assertEquals( "<$tag>",
- Sanitizer::removeHTMLtags( "<$tag>" )
- );
- } else {
- $this->assertEquals( "<$tag></$tag>\n",
- Sanitizer::removeHTMLtags( "<$tag>" )
- );
- }
- }
-
- /**
* Provide HTML5 tags
*/
public static function provideHtml5Tags() {
@@ -153,15 +132,6 @@
'<abbr> inside <dfn>',
],
];
- }
-
- /**
- * @dataProvider dataRemoveHTMLtags
- * @covers Sanitizer::removeHTMLtags
- */
- public function testRemoveHTMLtags( $input, $output, $msg = null ) {
- MWTidy::setInstance( false );
- $this->assertEquals( $output, Sanitizer::removeHTMLtags( $input
), $msg );
}
/**
diff --git a/tests/phpunit/includes/parser/NewParserTest.php
b/tests/phpunit/includes/parser/NewParserTest.php
index 8512572..eb0a888 100644
--- a/tests/phpunit/includes/parser/NewParserTest.php
+++ b/tests/phpunit/includes/parser/NewParserTest.php
@@ -35,10 +35,6 @@
* @var DjVuSupport
*/
private $djVuSupport;
- /**
- * @var TidySupport
- */
- private $tidySupport;
protected $file = false;
@@ -162,13 +158,8 @@
// DjVu support
$this->djVuSupport = new DjVuSupport();
// Tidy support
- $this->tidySupport = new TidySupport();
$tmpGlobals['wgTidyConfig'] = null;
$tmpGlobals['wgUseTidy'] = false;
- $tmpGlobals['wgDebugTidy'] = false;
- $tmpGlobals['wgTidyConf'] = $IP . '/includes/tidy/tidy.conf';
- $tmpGlobals['wgTidyOpts'] = '';
- $tmpGlobals['wgTidyInternal'] =
$this->tidySupport->isInternal();
$this->setMwGlobals( $tmpGlobals );
@@ -452,7 +443,6 @@
'wgMathDirectory' => $uploadDir . '/math',
'wgDefaultLanguageVariant' => $variant,
'wgLinkHolderBatchSize' => $linkHolderBatchSize,
- 'wgUseTidy' => isset( $opts['tidy'] ),
];
if ( $config ) {
@@ -772,11 +762,7 @@
}
if ( isset( $opts['tidy'] ) ) {
- if ( !$this->tidySupport->isEnabled() ) {
- $this->markTestSkipped( "SKIPPED: tidy
extension is not installed.\n" );
- } else {
- $options->setTidy( true );
- }
+ $options->setTidy( true );
}
if ( isset( $opts['pst'] ) ) {
diff --git a/tests/phpunit/includes/parser/TidyTest.php
b/tests/phpunit/includes/parser/TidyTest.php
index 62b84aa..ea8c4b9 100644
--- a/tests/phpunit/includes/parser/TidyTest.php
+++ b/tests/phpunit/includes/parser/TidyTest.php
@@ -4,14 +4,6 @@
* @group Parser
*/
class TidyTest extends MediaWikiTestCase {
-
- protected function setUp() {
- parent::setUp();
- if ( !MWTidy::isEnabled() ) {
- $this->markTestSkipped( 'Tidy not found' );
- }
- }
-
/**
* @dataProvider provideTestWrapping
*/
diff --git a/tests/testHelpers.inc b/tests/testHelpers.inc
index d04e0fc..049362b 100644
--- a/tests/testHelpers.inc
+++ b/tests/testHelpers.inc
@@ -832,43 +832,3 @@
&& is_executable( $wgDjvuTxt );
}
}
-
-/**
- * Initialize and detect the tidy support
- */
-class TidySupport {
- private $internalTidy;
- private $externalTidy;
-
- /**
- * Determine if there is a usable tidy.
- */
- public function __construct() {
- global $wgTidyBin;
-
- $this->internalTidy = extension_loaded( 'tidy' ) &&
- class_exists( 'tidy' ) && !wfIsHHVM();
-
- $this->externalTidy = is_executable( $wgTidyBin ) ||
- Installer::locateExecutableInDefaultPaths( [ $wgTidyBin
] )
- !== false;
- }
-
- /**
- * Returns true if we should use internal tidy.
- *
- * @return bool
- */
- public function isInternal() {
- return $this->internalTidy;
- }
-
- /**
- * Returns true if tidy is usable
- *
- * @return bool
- */
- public function isEnabled() {
- return $this->internalTidy || $this->externalTidy;
- }
-}
--
To view, visit https://gerrit.wikimedia.org/r/296370
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ifea448c3de708a94768a1861a993c2c3c6292759
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Tim Starling <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits