Jforrester has uploaded a new change for review. https://gerrit.wikimedia.org/r/257046
Change subject: [WIP] Replace local regexs for browser detection with ua-parser ...................................................................... [WIP] Replace local regexs for browser detection with ua-parser Untested as yet. Change-Id: I076a855a1160d59d29926a04b31e5bf676aa5b92 --- M composer.json M includes/DefaultSettings.php M includes/EditPage.php 3 files changed, 87 insertions(+), 14 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core refs/changes/46/257046/1 diff --git a/composer.json b/composer.json index 0f8da12..a8ebaf6 100644 --- a/composer.json +++ b/composer.json @@ -25,6 +25,7 @@ "oyejorge/less.php": "1.7.0.9", "php": ">=5.3.3", "psr/log": "1.0.0", + "ua-parser/uap-php": "3.4.4", "wikimedia/assert": "0.2.2", "wikimedia/base-convert": "1.0.1", "wikimedia/cdb": "1.3.0", diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index e76b627..3cdc52d 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -2858,11 +2858,10 @@ $wgLegacyEncoding = false; /** - * Browser Blacklist for unicode non compliant browsers. Contains a list of - * regexps : "/regexp/" matching problematic browsers. These browsers will - * be served encoded unicode in the edit box instead of real unicode. + * User agent blacklist for Unicode non-compliant browsers, as detected where possible by + * the UAP library. */ -$wgBrowserBlackList = array( +$wgUserAgentEditingBlackList = array( /** * Netscape 2-4 detection * The minor version may contain strings such as "Gold" or "SGoldC-SGI" @@ -2873,9 +2872,8 @@ * * Reference: http://www.psychedelix.com/agents/index.shtml */ - '/^Mozilla\/2\.[^ ]+ [^(]*?\((?!compatible).*; [UIN]/', - '/^Mozilla\/3\.[^ ]+ [^(]*?\((?!compatible).*; [UIN]/', - '/^Mozilla\/4\.[^ ]+ [^(]*?\((?!compatible).*; [UIN]/', + // FIXME: This won't catch anything right now. Need to upstream Netscape detection. + array( "b_family" => "Netscape", "b_major" => array( "2", "4") ), /** * MSIE on Mac OS 9 is teh sux0r, converts þ to <thorn>, ð to <eth>, @@ -2890,13 +2888,14 @@ * @link https://en.wikipedia.org/w/index.php?diff=12356041&oldid=12355864 * @link https://en.wikipedia.org/wiki/Template%3AOS9 */ - '/^Mozilla\/4\.0 \(compatible; MSIE \d+\.\d+; Mac_PowerPC\)/', + array( "b_family" => "IE", "o_family" => "Mac OS" ), /** * Google wireless transcoder, seems to eat a lot of chars alive * https://it.wikipedia.org/w/index.php?title=Luciano_Ligabue&diff=prev&oldid=8857361 */ - '/^Mozilla\/4\.0 \(compatible; MSIE 6.0; Windows NT 5.0; Google Wireless Transcoder;\)/' + // FIXME: Might want to upstream. + array( "b_family" => "IE", "match" => "Google Wireless Transcoder" ), ); /** diff --git a/includes/EditPage.php b/includes/EditPage.php index c709d00..e1460c5 100644 --- a/includes/EditPage.php +++ b/includes/EditPage.php @@ -4055,7 +4055,7 @@ * @return bool */ private function checkUnicodeCompliantBrowser() { - global $wgBrowserBlackList, $wgRequest; + global $wgUserAgentEditingBlackList, $wgRequest; $currentbrowser = $wgRequest->getHeader( 'User-Agent' ); if ( $currentbrowser === false ) { @@ -4063,11 +4063,84 @@ return true; } - foreach ( $wgBrowserBlackList as $browser ) { - if ( preg_match( $browser, $currentbrowser ) ) { - return false; + use UAParser\Parser; + $uaparser = Parser::create(); + $result = $uaparser->parse( $currentbrowser ); + + foreach ( $wgUserAgentEditingBlackList as $entry ) { + // If there's a required browser … + if ( isset( $entry[ "b_family" ] ) ) { + // … and we don't match it, we pass to the next entry + if ( $entry[ "b_family" ] !== $result->ua->family ) { + continue; + } + + // … and there's a version requirement + if ( isset( $entry[ "b_major" ] ) { + // … the version is a number (so maximum), if we don't match pass to the next entry + if ( + is_int( $entry[ "b_major" ] ) && + $entry[ "b_major" ] < $result->ua->major + ) { + continue; + } + + // … the version is an array (so range), if we match return false + if ( + is_array( $entry[ "b_major" ] ) && + $entry[ "b_major" ][1] < $result->ua->major || + $entry[ "b_major" ][0] > $result->ua->major + ) { + continue; + } + } } + + // If there's a required operating system … + if ( isset( $entry[ "o_family" ] ) ) { + // … and we don't match it, we pass to the next entry + if ( $entry[ "o_family" ] !== $result->os->family ) { + continue; + } + + // … and there's a version requirement + if ( isset( $entry[ "o_major" ] ) { + // … the version is a string (so specific), if we don't match pass to the next entry + if ( + is_string( $entry[ "o_major" ] ) && + $entry[ "o_major" ] !== $result->os->major + ) { + continue; + } + + // … the version is a number (so maximum), if we don't match pass to the next entry + if ( + is_int( $entry[ "o_major" ] ) && + $entry[ "o_major" ] < $result->os->major + ) { + continue; + } + + // … the version is an array (so range), if we match return false + if ( + is_array( $entry[ "b_major" ] ) && + $entry[ "o_major" ][1] < $result->os->major || + $entry[ "o_major" ][0] > $result->os->major + ) { + continue; + } + } + } + + // If there's a required regex and we don't match it, we pass to the next entry + if !( isset( $entry[ "match" ] ) && !preg_match( $entry[ "match" ], $currentbrowser ) ) { + continue; + } + + // If we've got to this point we've matched every single thing, so fail. + return false; } + // If we've got to this point we've not matched any blacklist entry, so success. return true; } @@ -4150,7 +4223,7 @@ /** * Reverse the previously applied transliteration of non-ASCII characters * back to UTF-8. Used to protect data from corruption by broken web browsers - * as listed in $wgBrowserBlackList. + * as listed in $wgUserAgentEditingBlackList. * * @param string $invalue * @return string -- To view, visit https://gerrit.wikimedia.org/r/257046 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I076a855a1160d59d29926a04b31e5bf676aa5b92 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: Jforrester <jforres...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits