Phoenix303 has uploaded a new change for review.
https://gerrit.wikimedia.org/r/218859
Change subject: Allow cross language search for a string in other languages
......................................................................
Allow cross language search for a string in other languages
Used filtered query to search for a string in a source language and
select language filter to show translated messages in a target language.
Modified query to use Filtered query as a top level search query which
searches for a string in a source language, e.g. search for "wiki" in "en"
{
"query" :
{
"filtered":
{
"query":
{
"bool":
{
"should":
{
{
"match" : {
"content" : {
"query" : "wiki"
}
}
},
{
"term" : {
"localid" : {
"value": "wiki",
"boost" : 1
}
}
}
}
}
},
"filter" :
{
"bool" :
{
"must" :
{
{
"term": {
"language" :"en"
}
}
}
}
}
}
}
}
Bug: T101220
Change-Id: I246aeea623c1d81f5ce148185be8c9c9ef8f0923
---
M i18n/search/en.json
M specials/SpecialSearchTranslations.php
M ttmserver/ElasticSearchTTMServer.php
3 files changed, 186 insertions(+), 67 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Translate
refs/changes/59/218859/1
diff --git a/i18n/search/en.json b/i18n/search/en.json
index 5b7ea8c..a029126 100644
--- a/i18n/search/en.json
+++ b/i18n/search/en.json
@@ -17,5 +17,6 @@
"tux-sst-solr-offline-title": "Search unavailable",
"tux-sst-solr-offline-body": "The search service is temporarily
unavailable.",
"tux-sst-next": "Next ›",
- "tux-sst-prev": "‹ Previous"
-}
\ No newline at end of file
+ "tux-sst-prev": "‹ Previous",
+ "tux-sst-nolang-selected": "Please select a target language to see
messages"
+}
diff --git a/specials/SpecialSearchTranslations.php
b/specials/SpecialSearchTranslations.php
index d8a9b12..4959b36 100644
--- a/specials/SpecialSearchTranslations.php
+++ b/specials/SpecialSearchTranslations.php
@@ -87,13 +87,63 @@
throw new ErrorPageError( 'tux-sst-solr-offline-title',
'tux-sst-solr-offline-body' );
}
- // Part 1: facets
- $facets = $server->getFacets( $resultset );
+ $result = $server->applyFilter( $resultset, $opts );
+ // Part 1: facets
+ $facets = $server->getFacets( $result );
+ $facetHtml = $this->viewFacets( $facets );
+
+ // Part 2: results
+ $documents = $server->getDocuments( $result );
+ $resultsHtml = $this->getResultsHtml( $documents );
+
+ $resultsHtml .= Html::rawElement( 'hr', array( 'class' =>
'tux-pagination-line' ) );
+
+ $prev = $next = '';
+ $total = $server->getTotalHits( $result );
+ $offset = $this->opts->getValue( 'offset' );
+ $params = $this->opts->getChangedValues();
+
+ if ( $total - $offset > $this->limit ) {
+ $newParams = array( 'offset' => $offset + $this->limit
) + $params;
+ $attribs = array(
+ 'class' => 'mw-ui-button pager-next',
+ 'href' => $this->getPageTitle()->getLocalUrl(
$newParams ),
+ );
+ $next = Html::element( 'a', $attribs, $this->msg(
'tux-sst-next' )->text() );
+ }
+ if ( $offset ) {
+ $newParams = array( 'offset' => max( 0, $offset -
$this->limit ) ) + $params;
+ $attribs = array(
+ 'class' => 'mw-ui-button pager-prev',
+ 'href' => $this->getPageTitle()->getLocalUrl(
$newParams ),
+ );
+ $prev = Html::element( 'a', $attribs, $this->msg(
'tux-sst-prev' )->text() );
+ }
+
+ $resultsHtml .= Html::rawElement( 'div', array( 'class' =>
'tux-pagination-links' ),
+ "$prev $next"
+ );
+
+ $search = $this->getSearchInput( $queryString );
+ $count = $this->msg( 'tux-sst-count' )->numParams( $total );
+
+ $language = $opts->getValue( 'language' );
+ if ( $language === '') {
+ $resultsHtml = Html::element( 'span',
+ array(),
+ $this->msg( 'tux-sst-nolang-selected' )->text()
+ );
+ }
+
+ $this->showSearch( $search, $count, $facetHtml, $resultsHtml );
+ }
+
+ protected function viewFacets( $facets ) {
$facetHtml = Html::element( 'div',
array( 'class' => 'row facet languages',
'data-facets' => FormatJson::encode(
$this->getLanguages( $facets['language'] ) ),
- 'data-language' => $opts->getValue( 'language'
),
+ 'data-language' => $this->opts->getValue(
'language' ),
),
$this->msg( 'tux-sst-facet-language' )
);
@@ -101,14 +151,14 @@
$facetHtml .= Html::element( 'div',
array( 'class' => 'row facet groups',
'data-facets' => FormatJson::encode(
$this->getGroups( $facets['group'] ) ),
- 'data-group' => $opts->getValue( 'group' ) ),
+ 'data-group' => $this->opts->getValue( 'group'
) ),
$this->msg( 'tux-sst-facet-group' )
);
+ return $facetHtml;
+ }
- // Part 2: results
- $resultsHtml = '';
- $documents = $server->getDocuments( $resultset );
-
+ protected function getResultsHtml( $documents ) {
+ $resultsHtml='';
foreach ( $documents as $document ) {
$text = $document['content'];
$text = TranslateUtils::convertWhiteSpaceToHTML( $text
);
@@ -169,39 +219,7 @@
. $edit
. Html::closeElement( 'div' );
}
-
- $resultsHtml .= Html::rawElement( 'hr', array( 'class' =>
'tux-pagination-line' ) );
-
- $prev = $next = '';
- $total = $server->getTotalHits( $resultset );
- $offset = $this->opts->getValue( 'offset' );
- $params = $this->opts->getChangedValues();
-
- if ( $total - $offset > $this->limit ) {
- $newParams = array( 'offset' => $offset + $this->limit
) + $params;
- $attribs = array(
- 'class' => 'mw-ui-button pager-next',
- 'href' => $this->getPageTitle()->getLocalUrl(
$newParams ),
- );
- $next = Html::element( 'a', $attribs, $this->msg(
'tux-sst-next' )->text() );
- }
- if ( $offset ) {
- $newParams = array( 'offset' => max( 0, $offset -
$this->limit ) ) + $params;
- $attribs = array(
- 'class' => 'mw-ui-button pager-prev',
- 'href' => $this->getPageTitle()->getLocalUrl(
$newParams ),
- );
- $prev = Html::element( 'a', $attribs, $this->msg(
'tux-sst-prev' )->text() );
- }
-
- $resultsHtml .= Html::rawElement( 'div', array( 'class' =>
'tux-pagination-links' ),
- "$prev $next"
- );
-
- $search = $this->getSearchInput( $queryString );
- $count = $this->msg( 'tux-sst-count' )->numParams( $total );
-
- $this->showSearch( $search, $count, $facetHtml, $resultsHtml );
+ return $resultsHtml;
}
protected function getLanguages( array $facet ) {
@@ -327,7 +345,9 @@
$input = Xml::input( 'query', false, $query, $attribs );
$submit = Xml::submitButton( $this->msg( 'tux-sst-search' ),
array( 'class' => 'button' ) );
$lang = $this->getRequest()->getVal( 'language' );
- $language = is_null( $lang ) ? '' : Html::hidden( 'language',
$lang );
+ $code = $this->getLanguage()->getCode();
+ $language = is_null( $lang ) ?
+ Html::hidden( 'language', $code ) : Html::hidden(
'language', $lang );
$form = Html::rawElement( 'form', array( 'action' => wfScript()
),
$title . $input . $submit . $language
diff --git a/ttmserver/ElasticSearchTTMServer.php
b/ttmserver/ElasticSearchTTMServer.php
index 40d9199..bd9fa0d 100644
--- a/ttmserver/ElasticSearchTTMServer.php
+++ b/ttmserver/ElasticSearchTTMServer.php
@@ -460,19 +460,30 @@
// Allow searching either by message content or message id
(page name
// without language subpage) with exact match only.
- $serchQuery = new \Elastica\Query\Bool();
+ $searchQuery = new \Elastica\Query\Bool();
$contentQuery = new \Elastica\Query\Match();
$contentQuery->setFieldQuery( 'content', $queryString );
- $serchQuery->addShould( $contentQuery );
+ $searchQuery->addShould( $contentQuery );
$messageQuery = new \Elastica\Query\Term();
$messageQuery->setTerm( 'localid', $queryString );
- $serchQuery->addShould( $messageQuery );
- $query->setQuery( $serchQuery );
+ $searchQuery->addShould( $messageQuery );
- $language = new \Elastica\Facet\Terms( 'language' );
- $language->setField( 'language' );
- $language->setSize( 500 );
- $query->addFacet( $language );
+ $filteredQuery = new \Elastica\Query\Filtered();
+ $filterbool = new \Elastica\Filter\Bool();
+
+ $context = RequestContext::getMain();
+ $languageCode = $context->getLanguage()->getCode();
+
+ $languageFilter = new \Elastica\Filter\Term();
+ $languageFilter->setTerm( 'language', $languageCode );
+ $filterbool->addMust( $languageFilter );
+
+ $filteredQuery->setFilter($filterbool);
+ $filteredQuery->setQuery($searchQuery);
+
+ $query->setQuery( $filteredQuery );
+ $query->setParam( '_source', array( 'localid', 'content',
'uri', 'wiki', 'group' ) );
+ $query->setSize( 1000 );
$group = new \Elastica\Facet\Terms( 'group' );
$group->setField( 'group' );
@@ -481,13 +492,101 @@
$group->setSize( 500 );
$query->addFacet( $group );
- $query->setSize( $opts->getValue( 'limit' ) );
- $query->setFrom( $opts->getValue( 'offset' ) );
-
// BoolAnd filters are executed in sequence per document. Bool
filters with
// multiple must clauses are executed by converting each filter
into a bit
// field then anding them together. The latter is normally
faster if either
// of the subfilters are reused. May not make a difference in
this context.
+ $filters = new \Elastica\Filter\Bool();
+
+ $group = $opts->getValue( 'group' );
+ if ( $group !== '' ) {
+ $groupFilter = new \Elastica\Filter\Term();
+ $groupFilter->setTerm( 'group', $group );
+ $filters->addMust( $groupFilter );
+ }
+
+ // Check that we have at least one filter to avoid invalid
query errors.
+ if ( $group !== '' ) {
+ $query->setFilter( $filters );
+ }
+
+ list( $pre, $post ) = $highlight;
+ $query->setHighlight( array(
+ // The value must be an object
+ 'fields' => array(
+ 'content' => array(
+ 'number_of_fragments' => 0,
+ ),
+ ),
+ 'pre_tags' => array( $pre ),
+ 'post_tags' => array( $post ),
+ ) );
+
+ try {
+ // Return results for a search string in a source
language
+ return $this->getType()->getIndex()->search( $query );
+ } catch ( \Elastica\Exception\ExceptionInterface $e ) {
+ throw new TTMServerException( $e->getMessage() );
+ }
+ }
+
+ protected function getLocalId( $resultsource ) {
+ $terms = array();
+ foreach ( $resultsource->getResults() as $result ) {
+
+ $data = $result->getData();
+ $score = $result->getScore();
+
+ $scores[$data['localid']] = $score;
+ $terms[] = $data['localid'];
+
+ }
+ return array(
+ 'terms' => $terms,
+ 'scores' => $scores
+ );
+ }
+
+ protected function filterTranslation( $data, $opts ) {
+
+ $idQuery = new \Elastica\Query\Terms();
+ $idQuery->setTerms( 'localid', $data['terms'] );
+
+ $query = new \Elastica\Query();
+ $groovyScript =
+<<<GROOVY
+return prescore.get(doc['localid'].value);
+GROOVY;
+ $script = new \Elastica\Script(
+ $groovyScript,
+ array( 'prescore' => $data['scores'] ),
+ \Elastica\Script::LANG_GROOVY
+ );
+
+ // Use Function Score to retain scores from the previous query
+ $boostQuery = new \Elastica\Query\FunctionScore();
+ $boostQuery->addScriptScoreFunction( $script );
+ $boostQuery->setBoostMode(
\Elastica\Query\FunctionScore::BOOST_MODE_REPLACE );
+
+ $filteredQuery = new \Elastica\Query\Filtered();
+ $filterbool = new \Elastica\Filter\Bool();
+
+ $boostQuery->setQuery( $idQuery );
+
+ // Wrap inside another query
+ $query->setQuery( $boostQuery );
+
+ $language = new \Elastica\Facet\Terms( 'language' );
+ $language->setField( 'language' );
+ $language->setSize( 500 );
+ $query->addFacet( $language );
+
+ $group = new \Elastica\Facet\Terms( 'group' );
+ $group->setField( 'group' );
+ $group->setSize( 500 );
+ $query->addFacet( $group );
+
+
$filters = new \Elastica\Filter\Bool();
$language = $opts->getValue( 'language' );
@@ -509,18 +608,7 @@
$query->setFilter( $filters );
}
- list( $pre, $post ) = $highlight;
- $query->setHighlight( array(
- // The value must be an object
- 'fields' => array(
- 'content' => array(
- 'number_of_fragments' => 0,
- ),
- ),
- 'pre_tags' => array( $pre ),
- 'post_tags' => array( $post ),
- ) );
-
+ $query->setParam( '_source', array( 'content', 'localid',
'language', 'group', 'wiki', 'uri' ) );
try {
return $this->getType()->getIndex()->search( $query );
} catch ( \Elastica\Exception\ExceptionInterface $e ) {
@@ -528,6 +616,16 @@
}
}
+ public function applyFilter( $resultquery, $opts ) {
+
+ // Get list of localids and scores to find second query
+ $output = $this->getLocalId( $resultquery );
+ // Get the list of messages for which translations exist
+ $resultset = $this->filterTranslation( $output, $opts );
+
+ return $resultset;
+ }
+
public function getFacets( $resultset ) {
$facets = $resultset->getFacets();
--
To view, visit https://gerrit.wikimedia.org/r/218859
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I246aeea623c1d81f5ce148185be8c9c9ef8f0923
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Translate
Gerrit-Branch: master
Gerrit-Owner: Phoenix303 <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits