Smalyshev has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/339575 )
Change subject: Add script to search entities from command line ...................................................................... Add script to search entities from command line This is mainly to be used to test search results, etc. on relforge. Based on runSearch.php from CirrusSearch, as such requires CirrusSearch to be installed. Change-Id: I65f272dcea0b1f63ddb5cf5c6ba23b5cf104c7ba --- A repo/maintenance/searchEntities.php 1 file changed, 156 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase refs/changes/75/339575/1 diff --git a/repo/maintenance/searchEntities.php b/repo/maintenance/searchEntities.php new file mode 100644 index 0000000..5316414 --- /dev/null +++ b/repo/maintenance/searchEntities.php @@ -0,0 +1,156 @@ +<?php + +namespace Wikibase; + +use CirrusSearch\Maintenance\OrderedStreamingForkController; +use Maintenance; +use RequestContext; +use Wikibase\Lib\Store\LanguageFallbackLabelDescriptionLookup; +use Wikibase\Repo\Api\EntitySearcher; +use Wikibase\Repo\Api\EntitySearchHelper; +use Wikibase\Repo\Search\Elastic\EntitySearchElastic; +use Wikibase\Repo\WikibaseRepo; + +$basePath = getenv( 'MW_INSTALL_PATH' ) !== false ? getenv( 'MW_INSTALL_PATH' ) : __DIR__ . '/../../../..'; + +require_once $basePath . '/maintenance/Maintenance.php'; + +class SearchEntities extends Maintenance { + + /** + * @var EntitySearcher + */ + private $searchHelper; + + public function __construct() { + parent::__construct(); + + $this->addDescription( 'Search entity a-la wbsearchentities API.' ); + + $this->addOption( 'entity-type', "Only search this kind of entity, e.g. `item` or `property`.", true, true ); + $this->addOption( 'limit', "Limit how many results are returned.", false, true ); + $this->addOption( 'language', "Language for the search.", true, true ); + $this->addOption( 'display-language', "Language for the display.", false, true ); + $this->addOption( 'strict', "Should we use strict language match?", false, true ); + $this->addOption( 'engine', "Which engine to use - e.g. sql, elastic.", false, true ); + $this->addOption( 'fork', 'Fork multiple processes to run queries from.' . + 'defaults to false.', false, true ); + $this->addOption( 'options', 'A JSON object mapping from global variable to ' . + 'its test value', false, true ); + } + + /** + * Do the actual work. All child classes will need to implement this + */ + public function execute() { + $engine = $this->getOption( 'engine', 'sql' ); + $this->searchHelper = $this->getSearchHelper( $engine ); + + $callback = [ $this, 'doSearch' ]; + $this->applyGlobals(); + $forks = $this->getOption( 'fork', false ); + $forks = ctype_digit( $forks ) ? intval( $forks ) : 0; + $controller = new OrderedStreamingForkController( $forks, $callback, STDIN, STDOUT ); + $controller->start(); + } + + /** + * Applies global variables provided as the options CLI argument + * to override current settings. + */ + protected function applyGlobals() { + $optionsData = $this->getOption( 'options', 'false' ); + if ( substr_compare( $optionsData, 'B64://', 0, strlen( 'B64://' ) ) === 0 ) { + $optionsData = base64_decode( substr( $optionsData, strlen( 'B64://' ) ) ); + } + $options = json_decode( $optionsData, true ); + if ( $options ) { + foreach ( $options as $key => $value ) { + if ( array_key_exists( $key, $GLOBALS ) ) { + $GLOBALS[$key] = $value; + } else { + $this->error( "\nERROR: $key is not a valid global variable\n" ); + exit(); + } + } + } + } + + /** + * Run search for one query. + * @param $query + * @return string + */ + public function doSearch( $query ) { + $limit = (int)$this->getOption( 'limit', 5 ); + + $results = $this->searchHelper->getRankedSearchResults( + $query, + $this->getOption( 'language' ), + $this->getOption( 'entity-type' ), + $limit, + $this->getOption( 'strict', false ) + ); + $out = [ + 'query' => $query, + 'totalHits' => count($results), + 'rows' => [] + ]; + + foreach ( $results as $match ) { + $entityId = $match->getEntityId(); + + $out['rows'][] = [ + 'pageId' => $entityId->getSerialization(), + 'title' => $entityId->getSerialization(), + 'snippets' => [ + 'term' => $match->getMatchedTerm()->getText(), + 'type' => $match->getMatchedTermType(), + 'title' => $match->getDisplayLabel()->getText(), + 'text' => $match->getDisplayDescription()->getText(), + ] + ]; + } + return json_encode( $out, JSON_PRETTY_PRINT ) . "\n"; + } + + /** + * Get appropriate searcher. + * @param $engine + * @return EntitySearcher + * @throws \MWException + */ + private function getSearchHelper( $engine ) { + $repo = WikibaseRepo::getDefaultInstance(); + $settings = $repo->getSettings()->getSetting( 'entitySearch' ); + + switch ( $engine ) { + case 'sql': + return new EntitySearchHelper( + $repo->getEntityLookup(), + $repo->getEntityIdParser(), + $repo->newTermSearchInteractor( $repo->getUserLanguage()->getCode() ), + new LanguageFallbackLabelDescriptionLookup( + $repo->getTermLookup(), + $repo->getLanguageFallbackChainFactory()->newFromLanguage( $repo->getUserLanguage() ) + ) + ); + case 'elastic': + $lang = $repo->getUserLanguage(); + return new EntitySearchElastic( + $repo->getLanguageFallbackChainFactory(), + $repo->getEntityIdParser(), + $lang, + $repo->getContentModelMappings(), + RequestContext::getMain()->getRequest(), + $settings + ); + default: + throw new \MWException( "Unknown engine: $engine" ); + } + + } +} + +$maintClass = SearchEntities::class; +require_once RUN_MAINTENANCE_IF_MAIN; -- To view, visit https://gerrit.wikimedia.org/r/339575 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I65f272dcea0b1f63ddb5cf5c6ba23b5cf104c7ba Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Wikibase Gerrit-Branch: master Gerrit-Owner: Smalyshev <smalys...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits