jenkins-bot has submitted this change and it was merged. Change subject: Provide fallbacks for use of mb_convert_encoding() in HtmlFormatter ......................................................................
Provide fallbacks for use of mb_convert_encoding() in HtmlFormatter Since we don't strictly require mbstring in core, provide fallbacks for the use of mb_convert_encoding() to go to/from 'HTML-ENTITIES' in HtmlFormatter. Bug: T62174 Change-Id: I2dcde96e0e68a7d141f2ba79558b20e1d9c799ec --- M includes/HtmlFormatter.php 1 file changed, 17 insertions(+), 2 deletions(-) Approvals: Seb35: Looks good to me, but someone else must approve Legoktm: Looks good to me, approved Florianschmidtwelzow: Looks good to me, but someone else must approve jenkins-bot: Verified diff --git a/includes/HtmlFormatter.php b/includes/HtmlFormatter.php index b2926d1..221cefb 100644 --- a/includes/HtmlFormatter.php +++ b/includes/HtmlFormatter.php @@ -63,7 +63,15 @@ */ public function getDoc() { if ( !$this->doc ) { - $html = mb_convert_encoding( $this->html, 'HTML-ENTITIES', 'UTF-8' ); + // DOMDocument::loadHTML apparently isn't very good with encodings, so + // convert input to ASCII by encoding everything above 128 as entities. + if ( function_exists( 'mb_convert_encoding' ) ) { + $html = mb_convert_encoding( $this->html, 'HTML-ENTITIES', 'UTF-8' ); + } else { + $html = preg_replace_callback( '/[\x{80}-\x{10ffff}]/u', function ( $m ) { + return '&#' . UtfNormal\Utils::utf8ToCodepoint( $m[0] ) . ';'; + }, $this->html ); + } // Workaround for bug that caused spaces before references // to disappear during processing: @@ -244,7 +252,14 @@ ) ); } $html = $replacements->replace( $html ); - $html = mb_convert_encoding( $html, 'UTF-8', 'HTML-ENTITIES' ); + + if ( function_exists( 'mb_convert_encoding' ) ) { + // Just in case the conversion in getDoc() above used named + // entities that aren't known to html_entity_decode(). + $html = mb_convert_encoding( $html, 'UTF-8', 'HTML-ENTITIES' ); + } else { + $html = html_entity_decode( $html, ENT_COMPAT, 'utf-8' ); + } return $html; } -- To view, visit https://gerrit.wikimedia.org/r/232935 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I2dcde96e0e68a7d141f2ba79558b20e1d9c799ec Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: Anomie <bjor...@wikimedia.org> Gerrit-Reviewer: Daniel Friesen <dan...@nadir-seen-fire.com> Gerrit-Reviewer: Florianschmidtwelzow <florian.schmidt.stargatewis...@gmail.com> Gerrit-Reviewer: Legoktm <legoktm.wikipe...@gmail.com> Gerrit-Reviewer: Seb35 <seb35wikipe...@gmail.com> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits