EBernhardson has uploaded a new change for review. https://gerrit.wikimedia.org/r/319498
Change subject: [WIP] Script to generate wmgCirrusSearchInterwikiSources ...................................................................... [WIP] Script to generate wmgCirrusSearchInterwikiSources CirrusSearch has a variable, $wgCirrusSearchInterwikiSources, that lists the sister-wiki's that should be queried as part of Special:Search and displayed in a sidebar. This generates the full map for $wmgCirrusSearchInterwikiSources that will be included into InitialiseSettings.php. We are working up something so this can be more directly done, but before that is complete we need to start running some load tests to get an idea of other changes that need to be made. This should be a reasonable stop-gap solution. Change-Id: I7145b63a97ed6caf142df076d19486752af46426 --- A dumpCirrusInterwikiSources.php 1 file changed, 112 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/WikimediaMaintenance refs/changes/98/319498/1 diff --git a/dumpCirrusInterwikiSources.php b/dumpCirrusInterwikiSources.php new file mode 100644 index 0000000..01f9340 --- /dev/null +++ b/dumpCirrusInterwikiSources.php @@ -0,0 +1,112 @@ +<?php + +/** + * Build map of wikidb to the list of it's sister sites db names + * and the interwiki prefixes that are used. + * + * The output of this is intended to be used in the mediawiki-config + * repository as the value for 'wmgCirrusSearchInterwikiSources' + * in InitialiseSettings.php + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Maintenance + * @ingroup Wikimedia + */ +require_once __DIR__ . '/WikimediaMaintenance.php'; + + +class DumpCirrusInterwikiSources extends Maintenance { + + public function __construct() { + parent::__construct(); + $this->mDescription = "Build CirrusSearch interwiki source map"; + } + + private function expandDbList( $project ) { + static $private = null; + if ( $private === null ) { + $private = MWWikiversions::evalDbListExpression( 'private' ); + } + if ( $project === 'wiki' ) { + $project = 'wikipedia'; + } + $list = MWWikiversions::evalDbListExpression( $project ); + return array_diff( $list, $private ); + } + + private function findInterwikiPrefix( $dbName, $targetDomain ) { + global $wgInterwikiCache; + + if ( $targetDomain === 'wiki' ) { + $targetDomain = 'wikipedia'; + } + + foreach ( $wgInterwikiCache as $key => $value ) { + if (substr($key, 0, strlen($dbName)) !== $dbName) { + continue; + } + list( $_, $prefix ) = explode( ':', $key, 2 ); + list( $_, $url ) = explode( ' ', $value, 2 ); + $host = parse_url( $url, PHP_URL_HOST ); + list( $lang, $domain, $tld ) = explode( '.', $host, 3 ); + if ( $domain === $targetDomain ) { + return $prefix; + } + } + return null; + } + + public function execute() { + $all = array_flip( expandDbList( "all" ) ); + + $matrix = new SiteMatrix(); + $map = []; + foreach ( $matrix->getSites() as $baseSite ) { + // Collect things that look like wikipedias + $wikis = expandDbList( $baseSite ); + foreach ( $wikis as $dbName ) { + list( $_, $lang ) = $wgConf->siteFromDB( $dbName ); + foreach ( $matrix->getSites() as $sisterSite ) { + if ( $baseSite === $sisterSite ) { + continue; + } + if ( !$matrix->exist( $lang, $sisterSite ) ) { + continue; + } + $iwPrefix = findInterwikiPrefix( $dbName, $sisterSite ); + if ($iwPrefix === null ) { + fwrite( STDERR, "Matrix reports site at $lang / $sisterSite, but no interwiki prefix found" ); + continue; + } + $sisterDbName = strtr( $lang, '-', '_' ) . $sisterSite; + if ( !isset( $all[$sisterDbName] ) ) { + fwrite( STDERR, "Found prefix ($iwPrefix) from $dbName but no db ($sisterDbName)\n" ); + continue; + } + $map[$dbName][$sisterDbName] = $iwPrefix; + } + } + } + + foreach ( array_keys($map) as $dbName ) { + ksort($map[$dbName]); + } + ksort($map); + var_export($map); + } +} -- To view, visit https://gerrit.wikimedia.org/r/319498 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I7145b63a97ed6caf142df076d19486752af46426 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/WikimediaMaintenance Gerrit-Branch: master Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits