Tim Starling has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/337771 )

Change subject: [WIP] RemexHtml tidy driver with p-wrapping
......................................................................

[WIP] RemexHtml tidy driver with p-wrapping

Some tests still fail. Requires non-existent version of RemexHtml.

Change-Id: I900155b7dd199b0ae2a3b9cdb6db5136fc4f35a8
---
M autoload.php
M composer.json
A includes/tidy/RemexCompatFormatter.php
A includes/tidy/RemexCompatMunger.php
A includes/tidy/RemexDriver.php
A includes/tidy/RemexMungerData.php
A tests/phpunit/includes/tidy/RemexDriverTest.php
7 files changed, 720 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/71/337771/1

diff --git a/autoload.php b/autoload.php
index 0e719ae..595b6c0 100644
--- a/autoload.php
+++ b/autoload.php
@@ -914,6 +914,10 @@
        'MediaWiki\\Tidy\\RaggettInternalHHVM' => __DIR__ . 
'/includes/tidy/RaggettInternalHHVM.php',
        'MediaWiki\\Tidy\\RaggettInternalPHP' => __DIR__ . 
'/includes/tidy/RaggettInternalPHP.php',
        'MediaWiki\\Tidy\\RaggettWrapper' => __DIR__ . 
'/includes/tidy/RaggettWrapper.php',
+       'MediaWiki\\Tidy\\RemexDriver' => __DIR__ . 
'/includes/tidy/RemexDriver.php',
+       'MediaWiki\\Tidy\\RemexCompatMunger' => __DIR__ . 
'/includes/tidy/RemexCompatMunger.php',
+       'MediaWiki\\Tidy\\RemexCompatFormatter' => __DIR__ . 
'/includes/tidy/RemexCompatFormatter.php',
+       'MediaWiki\\Tidy\\RemexMungerData' => __DIR__ . 
'/includes/tidy/RemexMungerData.php',
        'MediaWiki\\Tidy\\TidyDriverBase' => __DIR__ . 
'/includes/tidy/TidyDriverBase.php',
        'MediaWiki\\Widget\\ComplexNamespaceInputWidget' => __DIR__ . 
'/includes/widget/ComplexNamespaceInputWidget.php',
        'MediaWiki\\Widget\\ComplexTitleInputWidget' => __DIR__ . 
'/includes/widget/ComplexTitleInputWidget.php',
diff --git a/composer.json b/composer.json
index d41492e..a3d5546 100644
--- a/composer.json
+++ b/composer.json
@@ -38,6 +38,7 @@
                "wikimedia/ip-set": "1.1.0",
                "wikimedia/php-session-serializer": "1.0.4",
                "wikimedia/relpath": "1.0.3",
+               "wikimedia/remex-html": "~1.0",
                "wikimedia/running-stat": "1.1.0",
                "wikimedia/scoped-callback": "1.0.0",
                "wikimedia/utfnormal": "1.1.0",
diff --git a/includes/tidy/RemexCompatFormatter.php 
b/includes/tidy/RemexCompatFormatter.php
new file mode 100644
index 0000000..2183432
--- /dev/null
+++ b/includes/tidy/RemexCompatFormatter.php
@@ -0,0 +1,68 @@
+<?php
+
+namespace MediaWiki\Tidy;
+
+use RemexHtml\HTMLData;
+use RemexHtml\Serializer\HtmlFormatter;
+use RemexHtml\Serializer\SerializerNode;
+use RemexHtml\Tokenizer\PlainAttributes;
+
+class RemexCompatFormatter extends HtmlFormatter {
+       private static $markedEmptyElements = [
+               'li' => true,
+               'p' => true,
+               'tr' => true,
+       ];
+
+       public function __construct( $options = [] ) {
+               parent::__construct( $options );
+               $this->attributeEscapes["\xc2\xa0"] = '&#160;';
+               unset( $this->attributeEscapes["&"] );
+               $this->textEscapes["\xc2\xa0"] = '&#160;';
+               unset( $this->textEscapes["&"] );
+       }
+
+       public function startDocument( $fragmentNamespace, $fragmentName ) {
+               return '';
+       }
+
+       public function element( SerializerNode $parent, SerializerNode $node, 
$contents ) {
+               $data = $node->snData;
+               if ( $data->isPWrapper ) {
+                       if ( !$data->isDisabledPWrapper && !$data->isBlank ) {
+                               return "<p>$contents</p>";
+                       } else {
+                               return $contents;
+                       }
+               }
+
+               $name = $node->name;
+               $attrs = $node->attrs;
+               if ( isset( self::$markedEmptyElements[$name] ) && 
$attrs->count() === 0 ) {
+                       if ( strspn( $contents, "\t\n\f\r " ) === strlen( 
$contents ) ) {
+                               return "<{$name} 
class=\"mw-empty-elt\">$contents</{$name}>";
+                       }
+               }
+
+               $s = "<$name";
+               foreach ( $attrs->getValues() as $attrName => $attrValue ) {
+                       $encValue = strtr( $attrValue, $this->attributeEscapes 
);
+                       $s .= " $attrName=\"$encValue\"";
+               }
+               if ( $node->namespace === HTMLData::NS_HTML && isset( 
$this->voidElements[$name] ) ) {
+                       $s .= ' />';
+                       return $s;
+               }
+
+               $s .= '>';
+               if ( $node->namespace === HTMLData::NS_HTML
+                       && isset( $contents[0] ) && $contents[0] === "\n"
+                       && isset( $this->prefixLfElements[$name] )
+               ) {
+                       $s .= "\n$contents</$name>";
+               } else {
+                       $s .= "$contents</$name>";
+               }
+               return $s;
+       }
+}
diff --git a/includes/tidy/RemexCompatMunger.php 
b/includes/tidy/RemexCompatMunger.php
new file mode 100644
index 0000000..3783d61
--- /dev/null
+++ b/includes/tidy/RemexCompatMunger.php
@@ -0,0 +1,356 @@
+<?php
+
+namespace MediaWiki\Tidy;
+
+use RemexHtml\HTMLData;
+use RemexHtml\Serializer\Serializer;
+use RemexHtml\Serializer\SerializerNode;
+use RemexHtml\Tokenizer\Attributes;
+use RemexHtml\Tokenizer\PlainAttributes;
+use RemexHtml\TreeBuilder\TreeBuilder;
+use RemexHtml\TreeBuilder\TreeHandler;
+use RemexHtml\TreeBuilder\Element;
+
+class RemexCompatMunger implements TreeHandler {
+       private static $onlyInlineElements = [
+               "a" => true,
+               "abbr" => true,
+               "acronym" => true,
+               "applet" => true,
+               "b" => true,
+               "basefont" => true,
+               "bdo" => true,
+               "big" => true,
+               "br" => true,
+               "button" => true,
+               "cite" => true,
+               "code" => true,
+               "dfn" => true,
+               "em" => true,
+               "font" => true,
+               "i" => true,
+               "iframe" => true,
+               "img" => true,
+               "input" => true,
+               "kbd" => true,
+               "label" => true,
+               "legend" => true,
+               "map" => true,
+               "object" => true,
+               "param" => true,
+               "q" => true,
+               "rb" => true,
+               "rbc" => true,
+               "rp" => true,
+               "rt" => true,
+               "rtc" => true,
+               "ruby" => true,
+               "s" => true,
+               "samp" => true,
+               "select" => true,
+               "small" => true,
+               "span" => true,
+               "strike" => true,
+               "strong" => true,
+               "sub" => true,
+               "sup" => true,
+               "textarea" => true,
+               "tt" => true,
+               "u" => true,
+               "var" => true,
+       ];
+
+       private static $formattingElements = [
+               'a' => true,
+               'b' => true,
+               'big' => true,
+               'code' => true,
+               'em' => true,
+               'font' => true,
+               'i' => true,
+               'nobr' => true,
+               's' => true,
+               'small' => true,
+               'strike' => true,
+               'strong' => true,
+               'tt' => true,
+               'u' => true,
+       ];
+
+       /**
+        * Constructor
+        *
+        * @param Serializer $serializer
+        */
+       public function __construct( Serializer $serializer ) {
+               $this->serializer = $serializer;
+       }
+
+       public function startDocument( $fragmentNamespace, $fragmentName ) {
+               $this->serializer->startDocument( $fragmentNamespace, 
$fragmentName );
+               $root = $this->serializer->getRootNode();
+               $root->snData = new RemexMungerData;
+               $root->snData->needsPWrapping = true;
+       }
+
+       public function endDocument( $pos ) {
+               $this->serializer->endDocument( $pos );
+       }
+
+       private function getParentForInsert( $preposition, $refElement, 
$isBlank ) {
+               if ( $preposition === TreeBuilder::ROOT ) {
+                       return [ $this->serializer->getRootNode(), null ];
+               } elseif ( $preposition === TreeBuilder::BEFORE ) {
+                       return [ $this->serializer->getParentNode( $refElement 
), $refElement->userData ];
+               } else {
+                       $refNode = $refElement->userData;
+                       $refData = $refNode->snData;
+                       if ( $refData->currentCloneElement ) {
+                               $refElement = $refData->currentCloneElement;
+                               return [ $refElement->userData, 
$refElement->userData ];
+                       } elseif ( $refData->childPElement
+                               && ( !$isBlank || 
!$refData->childPElement->userData->snData->isBlank )
+                       ) {
+                               $refElement = $refData->childPElement;
+                               return [ $refElement->userData, 
$refElement->userData ];
+                       } else {
+                               return [ $refNode, $refNode ];
+                       }
+               }
+       }
+
+       /**
+        * Insert a p-wrapper
+        *
+        * @param SerializerNode $parent
+        * @param integer $sourceStart
+        * @return SerializerNode
+        */
+       private function insertPWrapper( SerializerNode $parent, $sourceStart ) 
{
+               $pWrap = new Element( HTMLData::NS_HTML, 'mw:p-wrap', new 
PlainAttributes );
+               $this->serializer->insertElement( TreeBuilder::UNDER, $parent, 
$pWrap, false,
+                       $sourceStart, 0 );
+               $data = new RemexMungerData;
+               $data->isPWrapper = true;
+               $data->wrapBaseNode = $parent;
+               $pWrap->userData->snData = $data;
+               $parent->snData->childPElement = $pWrap;
+               return $pWrap->userData;
+       }
+
+       public function characters( $preposition, $refElement, $text, $start, 
$length,
+               $sourceStart, $sourceLength
+       ) {
+               $isBlank = strspn( $text, "\t\n\f\r ", $start, $length ) !== 
$length;
+
+               list( $parent, $refNode ) = $this->getParentForInsert(
+                       $preposition, $refElement, $isBlank );
+               $parentData = $parent->snData;
+
+               // If the parent is splittable and in block mode, split the tag 
stack
+               if ( $preposition === TreeBuilder::UNDER
+                       && $parentData->isSplittable
+                       && !$parentData->ancestorPNode
+               ) {
+                       $refNode = $this->splitTagStack( $refNode, true, 
$sourceStart );
+                       $parent = $refNode;
+                       $parentData = $parent->snData;
+               }
+
+               if ( $preposition === TreeBuilder::UNDER && 
$parentData->needsPWrapping ) {
+                       $refNode = $this->insertPWrapper( $refNode, 
$sourceStart );
+                       $parent = $refNode;
+                       $parentData = $parent->snData;
+               }
+
+               if ( $isBlank ) {
+                       // Non-whitespace characters detected
+                       $parentData->hasText = true;
+                       $parentData->isBlank = false;
+               }
+               $this->serializer->characters( $preposition, $refNode, $text, 
$start,
+                       $length, $sourceStart, $sourceLength );
+       }
+
+       public function insertElement( $preposition, $refElement, Element 
$element, $void,
+               $sourceStart, $sourceLength
+       ) {
+               list( $parent, $newRef ) = $this->getParentForInsert(
+                       $preposition, $refElement, false );
+               $parentData = $parent->snData;
+               $parentNs = $parent->namespace;
+               $parentName = $parent->name;
+               $elementName = $element->htmlName;
+
+               $inline = isset( self::$onlyInlineElements[$elementName] );
+               $under = $preposition === TreeBuilder::UNDER;
+
+               // If the element is non-inline and the parent is a p-wrapper,
+               // close the parent and insert into its parent instead
+               if ( $under && $parentData->isPWrapper ) {
+                       if ( !$inline ) {
+                               $newParent = $this->serializer->getParentNode( 
$parent );
+                               $parent = $newParent;
+                               $parentData = $parent->snData;
+                               $parentData->childPElement = null;
+                               $newRef = $refElement->userData;
+                               // FIXME cannot call endTag() since we don't 
have an Element
+                       }
+               }
+
+               // If the parent is splittable and the current element is 
inline in block
+               // context, or if the current element is a block under a 
p-wrapper, split
+               // the tag stack.
+               if ( $under && $parentData->isSplittable
+                       && (bool)$parentData->ancestorPNode !== $inline
+               ) {
+                       $newRef = $this->splitTagStack( $newRef, $inline, 
$sourceStart );
+                       $parent = $newRef;
+                       $parentData = $parent->snData;
+               }
+
+               // If the element is inline and we are in body/blockquote, we 
need
+               // to create a p-wrapper
+               if ( $under && $parentData->needsPWrapping && $inline ) {
+                       $newRef = $this->insertPWrapper( $newRef, $sourceStart 
);
+                       $parent = $newRef;
+                       $parentData = $parent->snData;
+               }
+
+               // If the element is non-inline and (despite attempting to 
split above)
+               // there is still an ancestor p-wrap, disable that p-wrap
+               if ( $parentData->ancestorPNode && !$inline ) {
+                       $parentData->ancestorPNode->snData->isDisabledPWrapper 
= true;
+               }
+
+               // An element with element children is a non-blank element
+               $parentData->isBlank = false;
+
+               // Insert the element downstream and so initialise its userData
+               $this->serializer->insertElement( $preposition, $newRef,
+                       $element, $void, $sourceStart, $sourceLength );
+
+               // Initialise snData
+               if ( !$element->userData->snData ) {
+                       $elementData = $element->userData->snData = new 
RemexMungerData;
+               }
+               if ( ( $parentData->isPWrapper || $parentData->isSplittable )
+                       && isset( self::$formattingElements[$elementName] )
+               ) {
+                       $elementData->isSplittable = true;
+               }
+               if ( $parentData->isPWrapper ) {
+                       $elementData->ancestorPNode = $parent;
+               } elseif ( $parentData->ancestorPNode ) {
+                       $elementData->ancestorPNode = 
$parentData->ancestorPNode;
+               }
+               if ( $parentData->wrapBaseNode ) {
+                       $elementData->wrapBaseNode = $parentData->wrapBaseNode;
+               } elseif ( $parentData->needsPWrapping ) {
+                       $elementData->wrapBaseNode = $parent;
+               }
+               if ( $elementName === 'body'
+                       || $elementName === 'blockquote'
+                       || $elementName === 'html'
+               ) {
+                       $elementData->needsPWrapping = true;
+               }
+       }
+
+       /**
+        * Clone nodes in a stack range and return the new parent
+        *
+        * @param SerializerNode $parentNode
+        * @param bool $inline
+        * @param integer $pos The source position
+        * @return SerializerNode
+        */
+       private function splitTagStack( SerializerNode $parentNode, $inline, 
$pos ) {
+               $parentData = $parentNode->snData;
+               $wrapBase = $parentData->wrapBaseNode;
+               if ( $inline ) {
+                       $cloneEnd = $wrapBase;
+               } else {
+                       $cloneEnd = $parentData->ancestorPNode;
+               }
+
+               $serializer = $this->serializer;
+               $node = $parentNode;
+               $haveContent = false;
+               $root = $serializer->getRootNode();
+               while ( $node !== $cloneEnd ) {
+                       $haveContent = $haveContent || $node->snData->hasText;
+
+                       $nodes[] = $node;
+                       $node = $serializer->getParentNode( $node );
+                       if ( $node === $root ) {
+                               throw new \Exception( 'Did not find end of 
clone range' );
+                       }
+               }
+
+               if ( !$haveContent ) {
+                       return $parentNode;
+               }
+
+               if ( $inline ) {
+                       $pWrap = $this->insertPWrapper( $wrapBase, $pos );
+                       $node = $pWrap;
+                       $nodes[] = $node;
+               } else {
+                       $pWrap = null;
+                       $node = $wrapBase;
+                       $nodes[] = $node;
+               }
+
+               for ( $i = count( $nodes ) - 2; $i >= 0; $i-- ) {
+                       $node = $nodes[$i];
+                       $nodeParent = $nodes[$i + 1];
+                       $element = new Element( $node->namespace, $node->name, 
$node->attrs );
+                       $this->serializer->insertElement( TreeBuilder::UNDER, 
$nodeParent,
+                               $element, false, $pos, 0 );
+                       $node->snData->currentCloneElement = $element;
+
+                       $node = $element->userData;
+                       $elementData = $node->snData = new RemexMungerData;
+                       if ( $pWrap ) {
+                               $elementData->ancestorPNode = $pWrap;
+                       }
+                       $elementData->isSplittable = true;
+                       $elementData->wrapBaseNode = $wrapBase;
+               }
+               return $node;
+       }
+
+       public function endTag( Element $element, $sourceStart, $sourceLength ) 
{
+               $this->serializer->endTag( $element, $sourceStart, 
$sourceLength );
+       }
+
+       public function doctype( $name, $public, $system, $quirks, 
$sourceStart, $sourceLength ) {
+               $this->serializer->doctype( $name, $public,  $system, $quirks,
+                       $sourceStart, $sourceLength );
+       }
+
+       public function comment( $preposition, $refElement, $text, 
$sourceStart, $sourceLength ) {
+               list( $parent, $refNode ) = $this->getParentForInsert(
+                       $preposition, $refElement, true );
+               $this->serializer->comment( $preposition, $refNode, $text,
+                       $sourceStart, $sourceLength );
+       }
+
+       public function error( $text, $pos ) {
+               $this->serializer->error( $text, $pos );
+       }
+
+       public function mergeAttributes( Element $element, Attributes $attrs, 
$sourceStart ) {
+               $this->serializer->mergeAttributes( $element, $attrs, 
$sourceStart );
+       }
+
+       public function removeNode( Element $element, $sourceStart ) {
+               $this->serializer->removeNode( $element, $sourceStart );
+       }
+
+       public function reparentChildren( Element $element, Element $newParent, 
$sourceStart ) {
+               $this->insertElement( TreeBuilder::UNDER, $element, $newParent, 
false, $sourceStart, 0 );
+               $this->serializer->reparentChildren( $element, $newParent, 
$sourceStart );
+       }
+}
diff --git a/includes/tidy/RemexDriver.php b/includes/tidy/RemexDriver.php
new file mode 100644
index 0000000..b42796e
--- /dev/null
+++ b/includes/tidy/RemexDriver.php
@@ -0,0 +1,37 @@
+<?php
+
+namespace MediaWiki\Tidy;
+
+use RemexHtml\Serializer\Serializer;
+use RemexHtml\Tokenizer\Tokenizer;
+use RemexHtml\TreeBuilder\Dispatcher;
+use RemexHtml\TreeBuilder\TreeBuilder;
+
+
+class RemexDriver extends TidyDriverBase {
+       public function __construct( array $config ) {
+               parent::__construct( $config );
+       }
+
+       public function tidy( $text ) {
+               $formatter = new RemexCompatFormatter;
+               $serializer = new Serializer( $formatter );
+               $munger = new RemexCompatMunger( $serializer );
+               $treeBuilder = new TreeBuilder( $munger, [
+                       'ignoreErrors' => true,
+                       'ignoreNulls' => true,
+               ] );
+               $dispatcher = new Dispatcher( $treeBuilder );
+               $tokenizer = new Tokenizer( $dispatcher, $text, [
+                       'ignoreErrors' => true,
+                       'ignoreCharRefs' => true,
+                       'ignoreNulls' => true,
+                       'skipPreprocess' => true,
+               ] );
+               $tokenizer->execute( [
+                       'fragmentNamespace' => \RemexHtml\HTMLData::NS_HTML,
+                       'fragmentName' => 'body'
+               ] );
+               return $serializer->getResult();
+       }
+}
diff --git a/includes/tidy/RemexMungerData.php 
b/includes/tidy/RemexMungerData.php
new file mode 100644
index 0000000..9f93f57
--- /dev/null
+++ b/includes/tidy/RemexMungerData.php
@@ -0,0 +1,28 @@
+<?php
+
+namespace MediaWiki\Tidy;
+
+class RemexMungerData {
+       /** @var \RemexHtml\TreeBuilder\Element|null */
+       public $childPElement;
+
+       /** @var \RemexHtml\Serializer\SerializerNode|null */
+       public $ancestorPNode;
+
+       /** @var \RemexHtml\Serializer\SerializerNode|null */
+       public $wrapBaseNode;
+
+       /** @var \RemexHtml\TreeBuilder\Element|null */
+       public $currentCloneElement;
+
+       public $isPWrapper = false;
+       public $isSplittable = false;
+       public $needsPWrapping = false;
+       public $hasText = false;
+       public $isBlank = true;
+       public $isDisabledPWrapper = false;
+
+       public function __set( $name, $value ) {
+               throw new \Exception( "Cannot set property \"$name\"" );
+       }
+}
diff --git a/tests/phpunit/includes/tidy/RemexDriverTest.php 
b/tests/phpunit/includes/tidy/RemexDriverTest.php
new file mode 100644
index 0000000..a085db3
--- /dev/null
+++ b/tests/phpunit/includes/tidy/RemexDriverTest.php
@@ -0,0 +1,226 @@
+<?php
+
+class RemexDriverTest extends MediaWikiTestCase {
+       static private $remexTidyTestData = [
+               // Tests from Html5Depurate
+               [
+                       'Empty string',
+                       "",
+                       ""
+               ],
+               [
+                       'Simple p-wrap',
+                       "x",
+                       "<p>x</p>"
+               ],
+               [
+                       'No p-wrap of blank node',
+                       " ",
+                       " "
+               ],
+               [
+                       'p-wrap terminated by div',
+                       "x<div></div>",
+                       "<p>x</p><div></div>"
+               ],
+               [
+                       'p-wrap not terminated by span',
+                       "x<span></span>",
+                       "<p>x<span></span></p>"
+               ],
+               [
+                       'An element is non-blank and so gets p-wrapped',
+                       "<span></span>",
+                       "<p><span></span></p>"
+               ],
+               [
+                       'The blank flag is set after a block-level element',
+                       "<div></div> ",
+                       "<div></div> "
+               ],
+               [
+                       'Blank detection between two block-level elements',
+                       "<div></div> <div></div>",
+                       "<div></div> <div></div>"
+               ],
+               [
+                       'But p-wrapping of non-blank content works after an 
element',
+                       "<div></div>x",
+                       "<div></div><p>x</p>"
+               ],
+               [
+                       'p-wrapping between two block-level elements',
+                       "<div></div>x<div></div>",
+                       "<div></div><p>x</p><div></div>"
+               ],
+               [
+                       'p-wrap inside blockquote',
+                       "<blockquote>x</blockquote>",
+                       "<blockquote><p>x</p></blockquote>"
+               ],
+               [
+                       'A comment is blank for p-wrapping purposes',
+                       "<!-- x -->",
+                       "<!-- x -->"
+               ],
+               [
+                       'A comment is blank even when a p-wrap was opened by a 
text node',
+                       " <!-- x -->",
+                       " <!-- x -->"
+               ],
+               [
+                       'A comment does not open a p-wrap',
+                       "<!-- x -->x",
+                       "<!-- x --><p>x</p>"
+               ],
+               [
+                       'A comment does not close a p-wrap',
+                       "x<!-- x -->",
+                       "<p>x<!-- x --></p>"
+               ],
+               [
+                       'Empty li',
+                       "<ul><li></li></ul>",
+                       "<ul><li class=\"mw-empty-elt\"></li></ul>"
+               ],
+               [
+                       'li with element',
+                       "<ul><li><span></span></li></ul>",
+                       "<ul><li><span></span></li></ul>"
+               ],
+               [
+                       'li with text',
+                       "<ul><li>x</li></ul>",
+                       "<ul><li>x</li></ul>"
+               ],
+               [
+                       'Empty tr',
+                       "<table><tbody><tr></tr></tbody></table>",
+                       "<table><tbody><tr 
class=\"mw-empty-elt\"></tr></tbody></table>"
+               ],
+               [
+                       'Empty p',
+                       "<p>\n</p>",
+                       "<p class=\"mw-empty-elt\">\n</p>"
+               ],
+               [
+                       'No p-wrapping of an inline element which contains a 
block element (T150317)',
+                       "<small><div>x</div></small>",
+                       "<small><div>x</div></small>"
+               ],
+               [
+                       'p-wrapping of an inline element which contains an 
inline element',
+                       "<small><b>x</b></small>",
+                       "<p><small><b>x</b></small></p>"
+               ],
+               [
+                       'p-wrapping is enabled in a blockquote in an inline 
element',
+                       "<small><blockquote>x</blockquote></small>",
+                       "<small><blockquote><p>x</p></blockquote></small>"
+               ],
+               [
+                       'All bare text should be p-wrapped even when surrounded 
by block tags',
+                       
"<small><blockquote>x</blockquote></small>y<div></div>z",
+                       
"<small><blockquote><p>x</p></blockquote></small><p>y</p><div></div><p>z</p>"
+               ],
+
+               [
+                       'Split tag stack 1',
+                       "<small>x<div>y</div>z</small>",
+                       
"<p><small>x</small></p><small><div>y</div></small><p><small>z</small></p>"
+               ],
+               [
+                       'Split tag stack 2',
+                       "<small><div>y</div>z</small>",
+                       "<small><div>y</div></small><p><small>z</small></p>"
+               ],
+               [
+                       'Split tag stack 3',
+                       "<small>x<div>y</div></small>",
+                       "<p><small>x</small></p><small><div>y</div></small>"
+               ],
+               [
+                       'Split tag stack 4',
+                       "a<span>b<i>c<div>d</div></i>e</span>",
+                       
"<p>a<span>b<i>c</i></span></p><span><i><div>d</div></i></span><p><span>e</span></p>"
+               ],
+               [
+                       "Split tag stack regression check 1",
+                       "x<span><div>y</div></span>",
+                       "<p>x</p><span><div>y</div></span>"
+               ],
+               [
+                       "Split tag stack regression check 2",
+                       "a<span><i><div>d</div></i>e</span>",
+                       
"<p>a</p><span><i><div>d</div></i></span><p><span>e</span></p>"
+               ],
+               // Simple tests from pwrap.js
+               [
+                       'Simple pwrap test 1',
+                       'a',
+                       '<p>a</p>'
+               ],
+               [
+                       '<span> is not a splittable tag, but gets p-wrapped in 
simple wrapping scenarios',
+                       '<span>a</span>',
+                       '<p><span>a</span></p>'
+               ],
+               [
+                       'Simple pwrap test 3',
+                       'x <div>a</div> <div>b</div> y',
+                       '<p>x </p><div>a</div> <div>b</div><p> y</p>'
+               ],
+               [
+                       'Simple pwrap test 4',
+                       'x<!--c--> <div>a</div> <div>b</div> <!--c-->y',
+                       '<p>x<!--c--> </p><div>a</div> <div>b</div> 
<!--c--><p>y</p>'
+               ],
+               // Complex tests from pwrap.js
+               [
+                       'Complex pwrap test 1',
+                       '<i>x<div>a</div>y</i>',
+                       '<p><i>x</i></p><i><div>a</div></i><p><i>y</i></p>'
+               ],
+               [
+                       'Complex pwrap test 2',
+                       'a<small>b</small><i>c<div>d</div>e</i>f',
+                       
'<p>a<small>b</small><i>c</i></p><i><div>d</div></i><p><i>e</i>f</p>'
+               ],
+               [
+                       'Complex pwrap test 3',
+                       'a<small>b<i>c<div>d</div></i>e</small>',
+                       
'<p>a<small>b<i>c</i></small></p><small><i><div>d</div></i></small><p><small>e</small></p>'
+               ],
+               [
+                       'Complex pwrap test 4',
+                       'x<small><div>y</div></small>',
+                       '<p>x</p><small><div>y</div></small>'
+               ],
+               [
+                       'Complex pwrap test 5',
+                       'a<small><i><div>d</div></i>e</small>',
+                       
'<p>a</p><small><i><div>d</div></i></small><p><small>e</small></p>'
+               ],
+               [
+                       'Complex pwrap test 6',
+                       '<i>a<div>b</div>c<b>d<div>e</div>f</b>g</i>',
+                       
'<p><i>a</i></p><i><div>b</div></i><p><i>c<b>d</b></i></p><i><b><div>e</div></b></i><p><i><b>f</b>g</i></p>'
+               ],
+               [
+                       'Complex pwrap test 7',
+                       
'<i><b><font><div>x</div></font></b><div>y</div><b><font><div>z</div></font></b></i>',
+                       
'<i><b><font><div>x</div></font></b><div>y</div><b><font><div>z</div></font></b></i>'
+               ],
+       ];
+
+       public function provider() {
+               return self::$remexTidyTestData;
+       }
+
+       /** @dataProvider provider */
+       public function testTidy( $desc, $input, $expected ) {
+               $r = new MediaWiki\Tidy\RemexDriver( [] );
+               $result = $r->tidy( $input );
+               $this->assertEquals( $expected, $result, $desc );
+       }
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/337771
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I900155b7dd199b0ae2a3b9cdb6db5136fc4f35a8
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Tim Starling <tstarl...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to