Dr0ptp4kt has uploaded a new change for review.
https://gerrit.wikimedia.org/r/169210
Change subject: Redirect zero-rated users more nicely
......................................................................
Redirect zero-rated users more nicely
In the event the user hits zero.wikipedia.org/ (webroot), but
zerodot isn't in the zero-rating list, send the user to mdot.
Furthermore, if there's a good language homepage match, ideally
zero-rated, send the user to that language homepage match.
We're trialing this on the zero-rated experience. In the future
if this goes into mdot Wikipedia at large, we'll need to ensure
that traffic that isn't eligible for zero-rating doesn't take
zero-rating rules into account, although the language detection
logic would otherwise work the same.
Change-Id: I31d93509afc1f5620c8f60d8d1052f63735ae0e8
---
M includes/PageRendering.php
1 file changed, 131 insertions(+), 2 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/ZeroBanner
refs/changes/10/169210/1
diff --git a/includes/PageRendering.php b/includes/PageRendering.php
index 1dd2645..109d49c 100644
--- a/includes/PageRendering.php
+++ b/includes/PageRendering.php
@@ -679,6 +679,9 @@
private function makeRedirectInfo() {
$request = $this->getRequest();
if ( $request->getCheck( 'isroot' ) ) {
+ // !!!BEWARE!!! We're only varying on Accept-Language
for the webroot
+ // We don't want the cached object pool to get huge.
+ $this->getOutput()->addVaryHeader( 'Accept-Language' );
$url = $this->getLandingRedirect();
return array( 'redirect' => $url, 'code' => '302' );
}
@@ -958,8 +961,8 @@
$flags |= self::GET_LANDING;
}
} else {
- $showLangs = $config->showLangs();
- $langCode = $showLangs[0];
+ $langCode = $this->getOptimalLanguageCode( $config,
$request );
+
if ( $config->showZeroPage() ) {
$flags |= self::GET_LANDING;
}
@@ -985,6 +988,11 @@
$resp->setcookie( 'forceHTTPS', '',
$time,
array( 'prefix' => '', 'domain'
=> '.wikipedia.' . $wgZeroBannerClusterDomain ) );
}
+ }
+ // if the operator zero-rates mdot only,
+ // but not zerodot, then send the user to mdot
+ if ( $this->isZeroSubdomain() && !in_array( 'zero',
$config->sites() ) ) {
+ $flags |= self::FORCE_MDOT;
}
}
$url = $this->getStartPageUrl( $langCode, $flags );
@@ -1092,4 +1100,125 @@
public function isHttps() {
return $this->getRequest()->getProtocol() === 'https';
}
+
+ /**
+ * Helper to determine a good language code for redirects and vary on
Accept-Language.
+ * @param ZeroConfig $config
+ * @param WebRequest $request
+ * @return string A language code string for redirects.
+ */
+ private function getOptimalLanguageCode( $config, $request ) {
+ global $wgLocalDatabases;
+
+ $showLangs = $config->showLangs();
+ $userLangs = array_keys( $request->getAcceptLang() );
+
+ // in case the Accept-Language header wasn't helpful, send user
to preconfigured language
+ if ( count( $userLangs ) === 0 || ( count ( $userLangs ) === 1
&& $userLangs[0] === '*' ) ) {
+ return $showLangs[0];
+ }
+
+ $userLangs = array_map( function( $elem ) {
+ $prefix = strstr( $elem, '-', true);
+ if ( $prefix !== false ) {
+ $elem = $prefix;
+ }
+ return $elem;
+ }, $userLangs);
+
+ // This is a small optimization for zero-rated sourced traffic,
+ // which is the first place where we're trialing this code.
+ // We're confident that our showLangs variable will contain
+ // a qualified language code, so we may be able to save
ourselves
+ // a check against the full list of languages.
+ $okLangs = array_intersect( $userLangs, $showLangs );
+ if ( count( $okLangs ) ) {
+ return array_pop( $safeLangs );
+ }
+ // showLangs and whitelistedLangs are likely to be equivalent,
+ // or showLangs may be a subset of whitelistedLangs. It's not
+ // clear that we should try to micro-optimize to check
whitelistedLangs.
+ // That would actually probably be a waste of time, and
furthermore
+ // in the case that whitelistedLangs is an empty array (all
languages)
+ // we still need to vet whether the language is part of the
system-
+ // defined languages. Which we end up doing next as part of a
full
+ // routine.
+
+ // Well, that didn't work. Let's do this the long way.
+ // Adapted from SiteMatrix_body.php in the SiteMatrix extension
+ $sysLangs = array();
+ foreach ( $wgLocalDatabases as $db ) {
+ if ( preg_match( "/(.+)wiki\$/", $db, $m ) ) {
+ $lang = $m[1];
+ $langhost = str_replace( '_', '-', $lang );
+ $sysLangs[] = $langhost;
+ }
+ }
+
+ /*
+ * Note on mapping: In practice, our hyphenated language
subodmains
+ * don't map cleanly to ISO codes that show up in the prefixes
in
+ * Accept-Language header. The following yielded no results:
+ mediawiki-config $ grep _ wikipedia.dblist
+
+ bat_smgwiki
+ be_x_oldwiki
+ cbk_zamwiki
+ fiu_vrowiki
+ map_bmswiki
+ nds_nlwiki
+ roa_rupwiki
+ roa_tarawiki
+ zh_classicalwiki
+ zh_min_nanwiki
+ zh_yuewiki
+
+ $ hive
+ use wmf_raw;
+ select accept_language, count(accept_language)
+ from webrequest where
+ year = 2014 and month = 10 and day = 26 and
hour = 17
+ and uri_host = "m.wikipedia.org"
+ and uri_path = "/"
+ and webrequest_source = "mobile"
+ and (lower(accept_language) like "%bat-smg%" or
+ lower(accept_language) like "%be-x-old%" or
+ lower(accept_language) like "%cbk-za%" or
+ lower(accept_language) like "%fiu-vro%" or
+ lower(accept_language) like "%map-bms%" or
+ lower(accept_language) like "%nds-nl%" or
+ lower(accept_language) like "%roa-rup%" or
+ lower(accept_language) like "%zh-classical%" or
+ lower(accept_language) like "%zh-min%" or
+ lower(accept_language) like "%zh-yue%")
+ group by accept_language;
+
+ * This said, there are definitely cases of ISO prefixes in
Accept-Language
+ * headers not mapping to our subdomains (for example, "nb" =>
"no").
+ * Nonetheless, the language subdomain on our servers is "good
enough".
+ * At least for now. In a future state we may want to examine a
fuller
+ * set of checks. awight had even started on some pretty
interesting BCP
+ * 47 compliance stuff
(https://github.com/adamwight/LanguageTag).
+ * Some other useful pages for future reference:
+ * https://meta.wikimedia.org/wiki/List_of_Wikipedias/Table
+ * https://meta.wikimedia.org/wiki/Www.wikipedia.org_template
+ */
+
+ // reuse $okLangs
+ $okLangs = array_intersect( $userLangs, $sysLangs );
+
+ $whitelistedLangs = $config->whitelistedLangs();
+ $safeLangs =
+ count( $whitelistedLangs ) === 0 ? $okLangs :
array_intersect( $okLangs, $whitelistedLangs );
+
+ // If we couldn't find an overlapping language, to avoid a
charge for
+ // the user, we send the user to the primary showLangs value.
+ // @TODO: if this goes to mdot Wikipedia at large, the concept
of
+ // showLangs and whitelistedLangs shouldn't be in force. So
refactor
+ // accordingly.
+
+ $langCode = count( $safeLangs ) === 0 ? $showLangs[0] :
array_pop( $safeLangs );
+
+ return $langCode === '*' ? $showLangs[0] : $langCode;
+ }
}
--
To view, visit https://gerrit.wikimedia.org/r/169210
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I31d93509afc1f5620c8f60d8d1052f63735ae0e8
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/ZeroBanner
Gerrit-Branch: master
Gerrit-Owner: Dr0ptp4kt <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits