Dr0ptp4kt has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/169210

Change subject: Redirect zero-rated users more nicely
......................................................................

Redirect zero-rated users more nicely

In the event the user hits zero.wikipedia.org/ (webroot), but
zerodot isn't in the zero-rating list, send the user to mdot.
Furthermore, if there's a good language homepage match, ideally
zero-rated, send the user to that language homepage match.

We're trialing this on the zero-rated experience. In the future
if this goes into mdot Wikipedia at large, we'll need to ensure
that traffic that isn't eligible for zero-rating doesn't take
zero-rating rules into account, although the language detection
logic would otherwise work the same.

Change-Id: I31d93509afc1f5620c8f60d8d1052f63735ae0e8
---
M includes/PageRendering.php
1 file changed, 131 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ZeroBanner 
refs/changes/10/169210/1

diff --git a/includes/PageRendering.php b/includes/PageRendering.php
index 1dd2645..109d49c 100644
--- a/includes/PageRendering.php
+++ b/includes/PageRendering.php
@@ -679,6 +679,9 @@
        private function makeRedirectInfo() {
                $request = $this->getRequest();
                if ( $request->getCheck( 'isroot' ) ) {
+                       // !!!BEWARE!!! We're only varying on Accept-Language 
for the webroot
+                       // We don't want the cached object pool to get huge.
+                       $this->getOutput()->addVaryHeader( 'Accept-Language' );
                        $url = $this->getLandingRedirect();
                        return array( 'redirect' => $url, 'code' => '302' );
                }
@@ -958,8 +961,8 @@
                                $flags |= self::GET_LANDING;
                        }
                } else {
-                       $showLangs = $config->showLangs();
-                       $langCode = $showLangs[0];
+                       $langCode = $this->getOptimalLanguageCode( $config, 
$request );
+
                        if ( $config->showZeroPage() ) {
                                $flags |= self::GET_LANDING;
                        }
@@ -985,6 +988,11 @@
                                        $resp->setcookie( 'forceHTTPS', '', 
$time,
                                                array( 'prefix' => '', 'domain' 
=> '.wikipedia.' . $wgZeroBannerClusterDomain ) );
                                }
+                       }
+                       // if the operator zero-rates mdot only,
+                       // but not zerodot, then send the user to mdot
+                       if ( $this->isZeroSubdomain() && !in_array( 'zero', 
$config->sites() ) ) {
+                               $flags |= self::FORCE_MDOT;
                        }
                }
                $url = $this->getStartPageUrl( $langCode, $flags );
@@ -1092,4 +1100,125 @@
        public function isHttps() {
                return $this->getRequest()->getProtocol() === 'https';
        }
+
+       /**
+        * Helper to determine a good language code for redirects and vary on 
Accept-Language.
+        * @param ZeroConfig $config
+        * @param WebRequest $request
+        * @return string A language code string for redirects.
+        */
+       private function getOptimalLanguageCode( $config, $request ) {
+               global $wgLocalDatabases;
+
+               $showLangs = $config->showLangs();
+               $userLangs = array_keys( $request->getAcceptLang() );
+
+               // in case the Accept-Language header wasn't helpful, send user 
to preconfigured language
+               if ( count( $userLangs ) === 0 || ( count ( $userLangs ) === 1 
&& $userLangs[0] === '*' ) ) {
+                       return $showLangs[0];
+               }
+
+               $userLangs = array_map( function( $elem ) {
+                       $prefix = strstr( $elem, '-', true);
+                       if ( $prefix !== false ) {
+                               $elem = $prefix;
+                       }
+                       return $elem;
+               }, $userLangs);
+
+               // This is a small optimization for zero-rated sourced traffic,
+               // which is the first place where we're trialing this code.
+               // We're confident that our showLangs variable will contain
+               // a qualified language code, so we may be able to save 
ourselves
+               // a check against the full list of languages.
+               $okLangs = array_intersect( $userLangs, $showLangs );
+               if ( count( $okLangs ) ) {
+                       return array_pop( $safeLangs );
+               }
+               // showLangs and whitelistedLangs are likely to be equivalent,
+               // or showLangs may be a subset of whitelistedLangs. It's not
+               // clear that we should try to micro-optimize to check 
whitelistedLangs.
+               // That would actually probably be a waste of time, and 
furthermore
+               // in the case that whitelistedLangs is an empty array (all 
languages)
+               // we still need to vet whether the language is part of the 
system-
+               // defined languages. Which we end up doing next as part of a 
full
+               // routine.
+
+               // Well, that didn't work. Let's do this the long way.
+               // Adapted from SiteMatrix_body.php in the SiteMatrix extension
+               $sysLangs = array();
+               foreach ( $wgLocalDatabases as $db ) {
+                       if ( preg_match( "/(.+)wiki\$/", $db, $m ) ) {
+                               $lang = $m[1];
+                               $langhost = str_replace( '_', '-', $lang );
+                               $sysLangs[] = $langhost;
+                       }
+               }
+
+               /*
+                * Note on mapping: In practice, our hyphenated language 
subodmains
+                * don't map cleanly to ISO codes that show up in the prefixes 
in
+                * Accept-Language header. The following yielded no results:
+                               mediawiki-config $ grep _ wikipedia.dblist
+
+                               bat_smgwiki
+                               be_x_oldwiki
+                               cbk_zamwiki
+                               fiu_vrowiki
+                               map_bmswiki
+                               nds_nlwiki
+                               roa_rupwiki
+                               roa_tarawiki
+                               zh_classicalwiki
+                               zh_min_nanwiki
+                               zh_yuewiki
+
+                               $ hive
+                               use wmf_raw;
+                               select accept_language, count(accept_language)
+                               from webrequest where
+                               year = 2014 and month = 10 and day = 26 and 
hour = 17
+                               and uri_host = "m.wikipedia.org"
+                               and uri_path = "/"
+                               and webrequest_source = "mobile"
+                               and (lower(accept_language) like "%bat-smg%" or
+                               lower(accept_language) like "%be-x-old%" or
+                               lower(accept_language) like "%cbk-za%" or
+                               lower(accept_language) like "%fiu-vro%" or
+                               lower(accept_language) like "%map-bms%" or
+                               lower(accept_language) like "%nds-nl%" or
+                               lower(accept_language) like "%roa-rup%" or
+                               lower(accept_language) like "%zh-classical%" or
+                               lower(accept_language) like "%zh-min%" or
+                               lower(accept_language) like "%zh-yue%")
+                               group by accept_language;
+
+                * This said, there are definitely cases of ISO prefixes in 
Accept-Language
+                * headers not mapping to our subdomains (for example, "nb" => 
"no").
+                * Nonetheless, the language subdomain on our servers is "good 
enough".
+                * At least for now. In a future state we may want to examine a 
fuller
+                * set of checks. awight had even started on some pretty 
interesting BCP
+                * 47 compliance stuff 
(https://github.com/adamwight/LanguageTag).
+                * Some other useful pages for future reference:
+                * https://meta.wikimedia.org/wiki/List_of_Wikipedias/Table
+                * https://meta.wikimedia.org/wiki/Www.wikipedia.org_template
+               */
+
+               // reuse $okLangs
+               $okLangs = array_intersect( $userLangs, $sysLangs );
+
+               $whitelistedLangs = $config->whitelistedLangs();
+               $safeLangs =
+                       count( $whitelistedLangs ) === 0 ? $okLangs : 
array_intersect( $okLangs, $whitelistedLangs );
+
+               // If we couldn't find an overlapping language, to avoid a 
charge for
+               // the user, we send the user to the primary showLangs value.
+               // @TODO: if this goes to mdot Wikipedia at large, the concept 
of
+               // showLangs and whitelistedLangs shouldn't be in force. So 
refactor
+               // accordingly.
+
+               $langCode = count( $safeLangs ) === 0 ? $showLangs[0] : 
array_pop( $safeLangs );
+
+               return $langCode === '*' ? $showLangs[0] : $langCode;
+       }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/169210
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I31d93509afc1f5620c8f60d8d1052f63735ae0e8
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/ZeroBanner
Gerrit-Branch: master
Gerrit-Owner: Dr0ptp4kt <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to