Jdlrobson has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/401776 )
Change subject: Hygiene: Begin refactoring of MobileFormatter mega class ...................................................................... Hygiene: Begin refactoring of MobileFormatter mega class After a discussion with Piotr we agreed that the MobileFormatter code is becoming unmanageable and hard to test. Going forward we will aim to reduce all transforms to simple classes that take a DomElement and transform it into another DomElement. This will improve testability and sanity. As a first pass we will only update the lead paragraph transform given that we are currently working on improving that feature in T170006 Bug: T170006 Change-Id: I1fb2dc9e7ad1fe0431819ce3ca6295b3df8410e0 --- M extension.json M includes/MobileFormatter.php A includes/transforms/IMobileTransform.php A includes/transforms/MoveLeadParagraphTransform.php A tests/phpunit/transforms/MoveLeadParagraphTransformTest.php 5 files changed, 176 insertions(+), 92 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/MobileFrontend refs/changes/76/401776/1 diff --git a/extension.json b/extension.json index d650d4d..351d06e 100644 --- a/extension.json +++ b/extension.json @@ -45,6 +45,9 @@ "ExtensionMessagesFiles": { "MobileFrontendAlias": "MobileFrontend.alias.php" }, + "AutoloadNamespaces": { + "MobileFrontend\\Transforms\\": "includes/transforms/" + }, "AutoloadClasses": { "ExtMobileFrontend": "includes/MobileFrontend.body.php", "MobileFrontendHooks": "includes/MobileFrontend.hooks.php", diff --git a/includes/MobileFormatter.php b/includes/MobileFormatter.php index cfabb07..7a39d4d 100644 --- a/includes/MobileFormatter.php +++ b/includes/MobileFormatter.php @@ -2,6 +2,7 @@ use HtmlFormatter\HtmlFormatter; use MobileFrontend\ContentProviders\IContentProvider; +use MobileFrontend\Transforms\MoveLeadParagraphTransform; /** * Converts HTML into a mobile-friendly version @@ -218,94 +219,6 @@ } } - /** - * Move the first paragraph in the lead section above the infobox - * - * In order for a paragraph to be moved the following conditions must be met: - * - the lead section contains at least one infobox; - * - the paragraph doesn't already appear before the first infobox - * if any in the DOM; - * - the paragraph contains text content, e.g. no <p></p>; - * - the paragraph doesn't contain coordinates, i.e. span#coordinates. - * - article belongs to the MAIN namespace - * - * Additionally if paragraph immediate sibling is a list (ol or ul element), the list - * is also moved along with paragraph above infobox. - * - * Note that the first paragraph is not moved before hatnotes, or mbox or other - * elements that are not infoboxes. - * - * @param DOMElement $leadSectionBody - * @param DOMDocument $doc Document to which the section belongs - */ - private function moveFirstParagraphBeforeInfobox( $leadSectionBody, $doc ) { - // Move lead parapgraph only on pages in MAIN namespace (see @T163805) - if ( $this->title->getNamespace() !== NS_MAIN ) { - return; - } - $xPath = new DOMXPath( $doc ); - // Find infoboxes and paragraphs that have text content, i.e. paragraphs - // that are not empty nor are wrapper paragraphs that contain span#coordinates. - $infoboxAndParagraphs = $xPath->query( - './table[contains(@class,"infobox")] | ./p[string-length(text()) > 0]', - $leadSectionBody - ); - // We need both an infobox and a paragraph and the first element of our query result - // ought to be an infobox. - if ( $infoboxAndParagraphs->length >= 2 && - $infoboxAndParagraphs->item( 0 )->nodeName == 'table' - ) { - $firstP = null; - for ( $i = 1; $i < $infoboxAndParagraphs->length; $i++ ) { - if ( $infoboxAndParagraphs->item( $i )->nodeName == 'p' ) { - $firstP = $infoboxAndParagraphs->item( $i ); - break; - } - } - if ( $firstP ) { - $listElementAfterParagraph = null; - $where = $infoboxAndParagraphs->item( 0 ); - - $elementAfterParagraphQuery = $xPath->query( 'following-sibling::*[1]', $firstP ); - if ( $elementAfterParagraphQuery->length > 0 ) { - $elem = $elementAfterParagraphQuery->item( 0 ); - if ( $elem->tagName === 'ol' || $elem->tagName === 'ul' ) { - $listElementAfterParagraph = $elem; - } - } - - $leadSectionBody->insertBefore( $firstP, $where ); - if ( $listElementAfterParagraph !== null ) { - $leadSectionBody->insertBefore( $listElementAfterParagraph, $where ); - } - } - } - /** - * @see https://phabricator.wikimedia.org/T149884 - * @todo remove after research is done - */ - if ( MobileContext::singleton()->getMFConfig()->get( 'MFLogWrappedInfoboxes' ) ) { - $this->logInfoboxesWrappedInContainers( $leadSectionBody, $xPath ); - } - } - - /** - * Finds all infoboxes which are one or more levels deep in $xPath content. When at least one - * element is found - log the page title and revision - * - * @see https://phabricator.wikimedia.org/T149884 - * @param $leadSectionBody - * @param DOMXPath $xPath - */ - private function logInfoboxesWrappedInContainers( $leadSectionBody, DOMXPath $xPath ) { - $infoboxes = $xPath->query( './*//table[contains(@class,"infobox")]' . - '[not(ancestor::table[contains(@class,"infobox")])]', $leadSectionBody ); - if ( $infoboxes->length > 0 ) { - \MediaWiki\Logger\LoggerFactory::getInstance( 'mobile' )->info( - "Found infobox wrapped with container on {$this->title} (rev:{$this->revId})" - ); - } - } /** * Replaces any references links with a link to Special:MobileCite * @@ -646,6 +559,10 @@ * @param array $transformOptions Options to pass when transforming content per section */ protected function makeSections( DOMDocument $doc, array $headings, array $transformOptions ) { + $leadTransform = $transformOptions[ self::SHOW_FIRST_PARAGRAPH_BEFORE_INFOBOX ] && + $this->title->getNamespace() === NS_MAIN ? + new MoveLeadParagraphTransform( $this->title, $this->revId ) : false; + // Find the parser output wrapper div $xpath = new DOMXPath( $doc ); $containers = $xpath->query( 'body/div[@class="mw-parser-output"][1]' ); @@ -695,8 +612,8 @@ $toc->appendChild( $tocHeading ); $sectionBody->appendChild( $toc ); } - if ( $transformOptions[ self::SHOW_FIRST_PARAGRAPH_BEFORE_INFOBOX ] ) { - $this->moveFirstParagraphBeforeInfobox( $sectionBody, $doc ); + if ( $leadTransform ) { + $leadTransform->transform( $sectionBody ); } } $sectionNumber += 1; @@ -710,8 +627,8 @@ } // If the document had the lead section only: - if ( $sectionNumber == 0 && $transformOptions[ self::SHOW_FIRST_PARAGRAPH_BEFORE_INFOBOX ] ) { - $this->moveFirstParagraphBeforeInfobox( $sectionBody, $doc ); + if ( $sectionNumber == 0 && $leadTransform ) { + $leadTransform->transform( $sectionBody ); } if ( $sectionBody->hasChildNodes() ) { diff --git a/includes/transforms/IMobileTransform.php b/includes/transforms/IMobileTransform.php new file mode 100644 index 0000000..ee54d72 --- /dev/null +++ b/includes/transforms/IMobileTransform.php @@ -0,0 +1,13 @@ +<?php + +namespace MobileFrontend\Transforms; + +use DomElement; + +interface IMobileTransform { + /** + * Transforms the DomElement in some way + * @param DomElement $node to be transformed + */ + public function transform( DomElement $node ); +} diff --git a/includes/transforms/MoveLeadParagraphTransform.php b/includes/transforms/MoveLeadParagraphTransform.php new file mode 100644 index 0000000..d41269d --- /dev/null +++ b/includes/transforms/MoveLeadParagraphTransform.php @@ -0,0 +1,112 @@ +<?php + +namespace MobileFrontend\Transforms; + +use DOMXPath; +use MobileContext; +use DomElement; + +class MoveLeadParagraphTransform implements IMobileTransform { + /** + * @param string $title for logging purposes + * @param number $revId for logging purposes + */ + public function __construct( $title, $revId ) { + $this->title = $title; + $this->revId = $revId; + } + + /** + * Rearranges content so that text in the lead paragraph is prioritised to appear + * before the infobox + * @param DomElement $node to be transformed + */ + public function transform( DomElement $node ) { + $this->moveFirstParagraphBeforeInfobox( $node, $node->ownerDocument ); + } + + /** + * Move the first paragraph in the lead section above the infobox + * + * In order for a paragraph to be moved the following conditions must be met: + * - the lead section contains at least one infobox; + * - the paragraph doesn't already appear before the first infobox + * if any in the DOM; + * - the paragraph contains text content, e.g. no <p></p>; + * - the paragraph doesn't contain coordinates, i.e. span#coordinates. + * - article belongs to the MAIN namespace + * + * Additionally if paragraph immediate sibling is a list (ol or ul element), the list + * is also moved along with paragraph above infobox. + * + * Note that the first paragraph is not moved before hatnotes, or mbox or other + * elements that are not infoboxes. + * + * @param DOMElement $leadSectionBody + * @param DOMDocument $doc Document to which the section belongs + */ + private function moveFirstParagraphBeforeInfobox( $leadSectionBody, $doc ) { + $xPath = new DOMXPath( $doc ); + // Find infoboxes and paragraphs that have text content, i.e. paragraphs + // that are not empty nor are wrapper paragraphs that contain span#coordinates. + $infoboxAndParagraphs = $xPath->query( + './table[contains(@class,"infobox")] | ./p[string-length(text()) > 0]', + $leadSectionBody + ); + // We need both an infobox and a paragraph and the first element of our query result + // ought to be an infobox. + if ( $infoboxAndParagraphs->length >= 2 && + $infoboxAndParagraphs->item( 0 )->nodeName == 'table' + ) { + $firstP = null; + for ( $i = 1; $i < $infoboxAndParagraphs->length; $i++ ) { + if ( $infoboxAndParagraphs->item( $i )->nodeName == 'p' ) { + $firstP = $infoboxAndParagraphs->item( $i ); + break; + } + } + if ( $firstP ) { + $listElementAfterParagraph = null; + $where = $infoboxAndParagraphs->item( 0 ); + + $elementAfterParagraphQuery = $xPath->query( 'following-sibling::*[1]', $firstP ); + if ( $elementAfterParagraphQuery->length > 0 ) { + $elem = $elementAfterParagraphQuery->item( 0 ); + if ( $elem->tagName === 'ol' || $elem->tagName === 'ul' ) { + $listElementAfterParagraph = $elem; + } + } + + $leadSectionBody->insertBefore( $firstP, $where ); + if ( $listElementAfterParagraph !== null ) { + $leadSectionBody->insertBefore( $listElementAfterParagraph, $where ); + } + } + } + /** + * @see https://phabricator.wikimedia.org/T149884 + * @todo remove after research is done + */ + if ( MobileContext::singleton()->getMFConfig()->get( 'MFLogWrappedInfoboxes' ) ) { + $this->logInfoboxesWrappedInContainers( $leadSectionBody, $xPath ); + } + } + + /** + * Finds all infoboxes which are one or more levels deep in $xPath content. When at least one + * element is found - log the page title and revision + * + * @see https://phabricator.wikimedia.org/T149884 + * @param $leadSectionBody + * @param DOMXPath $xPath + */ + private function logInfoboxesWrappedInContainers( $leadSectionBody, DOMXPath $xPath ) { + $infoboxes = $xPath->query( './*//table[contains(@class,"infobox")]' . + '[not(ancestor::table[contains(@class,"infobox")])]', $leadSectionBody ); + if ( $infoboxes->length > 0 ) { + \MediaWiki\Logger\LoggerFactory::getInstance( 'mobile' )->info( + "Found infobox wrapped with container on {$this->title} (rev:{$this->revId})" + ); + } + } +} diff --git a/tests/phpunit/transforms/MoveLeadParagraphTransformTest.php b/tests/phpunit/transforms/MoveLeadParagraphTransformTest.php new file mode 100644 index 0000000..3ddf67a --- /dev/null +++ b/tests/phpunit/transforms/MoveLeadParagraphTransformTest.php @@ -0,0 +1,39 @@ +<?php + +use MobileFrontend\Transforms\MoveLeadParagraphTransform; + +/** + * @group MobileFrontend + */ +class MoveLeadParagraphTransformTest extends MediaWikiTestCase { + public static function wrap( $html ) { + return "<!DOCTYPE HTML> +<html><body>$html</body></html> +"; + } + /** + * @dataProvider provideTransform + * + * @param string $html + * @param string $expected + */ + public function testTransform( $html, $expected ) { + $transform = new MoveLeadParagraphTransform( 'A', 1 ); + $doc = new DOMDocument(); + $doc->loadHTML( self::wrap( $html ) ); + $transform->transform( $doc->getElementsByTagName( 'body' )->item( 0 ) ); + $this->assertEquals( $doc->saveHTML(), self::wrap( $expected ) ); + } + + public function provideTransform() { + $infobox = '<table class="infobox"></table>'; + $paragraph = '<p>first paragraph</p>'; + + return [ + [ + "$infobox$paragraph", + "$paragraph$infobox", + ] + ]; + } +} -- To view, visit https://gerrit.wikimedia.org/r/401776 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I1fb2dc9e7ad1fe0431819ce3ca6295b3df8410e0 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/MobileFrontend Gerrit-Branch: master Gerrit-Owner: Jdlrobson <jrob...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits