Jforrester has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/257046

Change subject: [WIP] Replace local regexs for browser detection with ua-parser
......................................................................

[WIP] Replace local regexs for browser detection with ua-parser

Untested as yet.

Change-Id: I076a855a1160d59d29926a04b31e5bf676aa5b92
---
M composer.json
M includes/DefaultSettings.php
M includes/EditPage.php
3 files changed, 87 insertions(+), 14 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/46/257046/1

diff --git a/composer.json b/composer.json
index 0f8da12..a8ebaf6 100644
--- a/composer.json
+++ b/composer.json
@@ -25,6 +25,7 @@
                "oyejorge/less.php": "1.7.0.9",
                "php": ">=5.3.3",
                "psr/log": "1.0.0",
+               "ua-parser/uap-php": "3.4.4",
                "wikimedia/assert": "0.2.2",
                "wikimedia/base-convert": "1.0.1",
                "wikimedia/cdb": "1.3.0",
diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php
index e76b627..3cdc52d 100644
--- a/includes/DefaultSettings.php
+++ b/includes/DefaultSettings.php
@@ -2858,11 +2858,10 @@
 $wgLegacyEncoding = false;
 
 /**
- * Browser Blacklist for unicode non compliant browsers. Contains a list of
- * regexps : "/regexp/"  matching problematic browsers. These browsers will
- * be served encoded unicode in the edit box instead of real unicode.
+ * User agent blacklist for Unicode non-compliant browsers, as detected where 
possible by
+ * the UAP library.
  */
-$wgBrowserBlackList = array(
+$wgUserAgentEditingBlackList = array(
        /**
         * Netscape 2-4 detection
         * The minor version may contain strings such as "Gold" or "SGoldC-SGI"
@@ -2873,9 +2872,8 @@
         *
         * Reference: http://www.psychedelix.com/agents/index.shtml
         */
-       '/^Mozilla\/2\.[^ ]+ [^(]*?\((?!compatible).*; [UIN]/',
-       '/^Mozilla\/3\.[^ ]+ [^(]*?\((?!compatible).*; [UIN]/',
-       '/^Mozilla\/4\.[^ ]+ [^(]*?\((?!compatible).*; [UIN]/',
+       // FIXME: This won't catch anything right now. Need to upstream 
Netscape detection.
+       array( "b_family" => "Netscape", "b_major" => array( "2", "4") ),
 
        /**
         * MSIE on Mac OS 9 is teh sux0r, converts þ to <thorn>, ð to <eth>,
@@ -2890,13 +2888,14 @@
         * @link 
https://en.wikipedia.org/w/index.php?diff=12356041&oldid=12355864
         * @link https://en.wikipedia.org/wiki/Template%3AOS9
         */
-       '/^Mozilla\/4\.0 \(compatible; MSIE \d+\.\d+; Mac_PowerPC\)/',
+       array( "b_family" => "IE", "o_family" => "Mac OS" ),
 
        /**
         * Google wireless transcoder, seems to eat a lot of chars alive
         * 
https://it.wikipedia.org/w/index.php?title=Luciano_Ligabue&diff=prev&oldid=8857361
         */
-       '/^Mozilla\/4\.0 \(compatible; MSIE 6.0; Windows NT 5.0; Google 
Wireless Transcoder;\)/'
+        // FIXME: Might want to upstream.
+       array( "b_family" => "IE", "match" => "Google Wireless Transcoder" ),
 );
 
 /**
diff --git a/includes/EditPage.php b/includes/EditPage.php
index c709d00..e1460c5 100644
--- a/includes/EditPage.php
+++ b/includes/EditPage.php
@@ -4055,7 +4055,7 @@
         * @return bool
         */
        private function checkUnicodeCompliantBrowser() {
-               global $wgBrowserBlackList, $wgRequest;
+               global $wgUserAgentEditingBlackList, $wgRequest;
 
                $currentbrowser = $wgRequest->getHeader( 'User-Agent' );
                if ( $currentbrowser === false ) {
@@ -4063,11 +4063,84 @@
                        return true;
                }
 
-               foreach ( $wgBrowserBlackList as $browser ) {
-                       if ( preg_match( $browser, $currentbrowser ) ) {
-                               return false;
+               use UAParser\Parser;
+               $uaparser = Parser::create();
+               $result = $uaparser->parse( $currentbrowser );
+
+               foreach ( $wgUserAgentEditingBlackList as $entry ) {
+                       // If there's a required browser …
+                       if ( isset( $entry[ "b_family" ] ) ) {
+                               // … and we don't match it, we pass to the next 
entry
+                               if ( $entry[ "b_family" ] !== 
$result->ua->family ) {
+                                       continue;
+                               }
+
+                               // … and there's a version requirement
+                               if ( isset( $entry[ "b_major" ] ) {
+                                       // … the version is a number (so 
maximum), if we don't match pass to the next entry
+                                       if (
+                                               is_int( $entry[ "b_major" ] ) &&
+                                               $entry[ "b_major" ] < 
$result->ua->major
+                                       ) {
+                                               continue;
+                                       }
+
+                                       // … the version is an array (so 
range), if we match return false
+                                       if (
+                                               is_array( $entry[ "b_major" ] ) 
&&
+                                               $entry[ "b_major" ][1] < 
$result->ua->major ||
+                                               $entry[ "b_major" ][0] > 
$result->ua->major
+                                       ) {
+                                               continue;
+                                       }
+                               }
                        }
+
+                       // If there's a required operating system …
+                       if ( isset( $entry[ "o_family" ] ) ) {
+                               // … and we don't match it, we pass to the next 
entry
+                               if ( $entry[ "o_family" ] !== 
$result->os->family ) {
+                                       continue;
+                               }
+
+                               // … and there's a version requirement
+                               if ( isset( $entry[ "o_major" ] ) {
+                                       // … the version is a string (so 
specific), if we don't match pass to the next entry
+                                       if (
+                                               is_string( $entry[ "o_major" ] 
) &&
+                                               $entry[ "o_major" ] !== 
$result->os->major
+                                       ) {
+                                               continue;
+                                       }
+
+                                       // … the version is a number (so 
maximum), if we don't match pass to the next entry
+                                       if (
+                                               is_int( $entry[ "o_major" ] ) &&
+                                               $entry[ "o_major" ] < 
$result->os->major
+                                       ) {
+                                               continue;
+                                       }
+
+                                       // … the version is an array (so 
range), if we match return false
+                                       if (
+                                               is_array( $entry[ "b_major" ] ) 
&&
+                                               $entry[ "o_major" ][1] < 
$result->os->major ||
+                                               $entry[ "o_major" ][0] > 
$result->os->major
+                                       ) {
+                                               continue;
+                                       }
+                               }
+                       }
+
+                       // If there's a required regex and we don't match it, 
we pass to the next entry
+                       if !( isset( $entry[ "match" ] ) && !preg_match( 
$entry[ "match" ], $currentbrowser ) ) {
+                               continue;
+                       }
+
+                       // If we've got to this point we've matched every 
single thing, so fail.
+                       return false;
                }
+               // If we've got to this point we've not matched any blacklist 
entry, so success.
                return true;
        }
 
@@ -4150,7 +4223,7 @@
        /**
         * Reverse the previously applied transliteration of non-ASCII 
characters
         * back to UTF-8. Used to protect data from corruption by broken web 
browsers
-        * as listed in $wgBrowserBlackList.
+        * as listed in $wgUserAgentEditingBlackList.
         *
         * @param string $invalue
         * @return string

-- 
To view, visit https://gerrit.wikimedia.org/r/257046
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I076a855a1160d59d29926a04b31e5bf676aa5b92
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Jforrester <jforres...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to