Phoenix303 has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/218859

Change subject: Allow cross language search for a string in other languages
......................................................................

Allow cross language search for a string in other languages

Used filtered query to search for a string in a source language and
select language filter to show translated messages in a target language.

Modified query to use Filtered query as a top level search query which
searches for a string in a source language, e.g. search for "wiki" in "en"
{
    "query" :
    {
        "filtered":
        {
            "query":
            {
                "bool":
                {
                    "should":
                    {
                        {
                            "match" : {
                                "content" : {
                                    "query" : "wiki"
                                }
                            }
                        },
                        {
                            "term" : {
                                "localid" : {
                                    "value": "wiki",
                                    "boost" : 1
                                }
                            }
                        }
                    }
                }
            },
            "filter" :
            {
                "bool" :
                {
                    "must" :
                    {
                        {
                            "term": {
                                "language" :"en"
                            }
                        }
                    }
                }
            }
        }
    }
}

Bug: T101220
Change-Id: I246aeea623c1d81f5ce148185be8c9c9ef8f0923
---
M i18n/search/en.json
M specials/SpecialSearchTranslations.php
M ttmserver/ElasticSearchTTMServer.php
3 files changed, 186 insertions(+), 67 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Translate 
refs/changes/59/218859/1

diff --git a/i18n/search/en.json b/i18n/search/en.json
index 5b7ea8c..a029126 100644
--- a/i18n/search/en.json
+++ b/i18n/search/en.json
@@ -17,5 +17,6 @@
        "tux-sst-solr-offline-title": "Search unavailable",
        "tux-sst-solr-offline-body": "The search service is temporarily 
unavailable.",
        "tux-sst-next": "Next ›",
-       "tux-sst-prev": "‹ Previous"
-}
\ No newline at end of file
+       "tux-sst-prev": "‹ Previous",
+       "tux-sst-nolang-selected": "Please select a target language to see 
messages"
+}
diff --git a/specials/SpecialSearchTranslations.php 
b/specials/SpecialSearchTranslations.php
index d8a9b12..4959b36 100644
--- a/specials/SpecialSearchTranslations.php
+++ b/specials/SpecialSearchTranslations.php
@@ -87,13 +87,63 @@
                        throw new ErrorPageError( 'tux-sst-solr-offline-title', 
'tux-sst-solr-offline-body' );
                }
 
-               // Part 1: facets
-               $facets = $server->getFacets( $resultset );
+               $result = $server->applyFilter( $resultset, $opts );
 
+               // Part 1: facets
+               $facets = $server->getFacets( $result );
+               $facetHtml = $this->viewFacets( $facets );
+
+               // Part 2: results
+               $documents = $server->getDocuments( $result );
+               $resultsHtml = $this->getResultsHtml( $documents );
+
+               $resultsHtml .= Html::rawElement( 'hr', array( 'class' => 
'tux-pagination-line' ) );
+
+               $prev = $next = '';
+               $total = $server->getTotalHits( $result );
+               $offset = $this->opts->getValue( 'offset' );
+               $params = $this->opts->getChangedValues();
+
+               if ( $total - $offset > $this->limit ) {
+                       $newParams = array( 'offset' => $offset + $this->limit 
) + $params;
+                       $attribs = array(
+                               'class' => 'mw-ui-button pager-next',
+                               'href' => $this->getPageTitle()->getLocalUrl( 
$newParams ),
+                       );
+                       $next = Html::element( 'a', $attribs, $this->msg( 
'tux-sst-next' )->text() );
+               }
+               if ( $offset ) {
+                       $newParams = array( 'offset' => max( 0, $offset - 
$this->limit ) ) + $params;
+                       $attribs = array(
+                               'class' => 'mw-ui-button pager-prev',
+                               'href' => $this->getPageTitle()->getLocalUrl( 
$newParams ),
+                       );
+                       $prev = Html::element( 'a', $attribs, $this->msg( 
'tux-sst-prev' )->text() );
+               }
+
+               $resultsHtml .= Html::rawElement( 'div', array( 'class' => 
'tux-pagination-links' ),
+                       "$prev $next"
+               );
+
+               $search = $this->getSearchInput( $queryString );
+               $count = $this->msg( 'tux-sst-count' )->numParams( $total );
+
+               $language = $opts->getValue( 'language' );
+               if ( $language === '') {
+                       $resultsHtml = Html::element( 'span',
+                               array(),
+                               $this->msg( 'tux-sst-nolang-selected' )->text()
+                       );
+               }
+
+               $this->showSearch( $search, $count, $facetHtml, $resultsHtml );
+       }
+
+       protected function viewFacets( $facets ) {
                $facetHtml = Html::element( 'div',
                        array( 'class' => 'row facet languages',
                                'data-facets' => FormatJson::encode( 
$this->getLanguages( $facets['language'] ) ),
-                               'data-language' => $opts->getValue( 'language' 
),
+                               'data-language' => $this->opts->getValue( 
'language' ),
                        ),
                        $this->msg( 'tux-sst-facet-language' )
                );
@@ -101,14 +151,14 @@
                $facetHtml .= Html::element( 'div',
                        array( 'class' => 'row facet groups',
                                'data-facets' => FormatJson::encode( 
$this->getGroups( $facets['group'] ) ),
-                               'data-group' => $opts->getValue( 'group' ) ),
+                               'data-group' => $this->opts->getValue( 'group' 
) ),
                        $this->msg( 'tux-sst-facet-group' )
                );
+               return $facetHtml;
+       }
 
-               // Part 2: results
-               $resultsHtml = '';
-               $documents = $server->getDocuments( $resultset );
-
+       protected function getResultsHtml( $documents ) {
+               $resultsHtml='';
                foreach ( $documents as $document ) {
                        $text = $document['content'];
                        $text = TranslateUtils::convertWhiteSpaceToHTML( $text 
);
@@ -169,39 +219,7 @@
                                . $edit
                                . Html::closeElement( 'div' );
                }
-
-               $resultsHtml .= Html::rawElement( 'hr', array( 'class' => 
'tux-pagination-line' ) );
-
-               $prev = $next = '';
-               $total = $server->getTotalHits( $resultset );
-               $offset = $this->opts->getValue( 'offset' );
-               $params = $this->opts->getChangedValues();
-
-               if ( $total - $offset > $this->limit ) {
-                       $newParams = array( 'offset' => $offset + $this->limit 
) + $params;
-                       $attribs = array(
-                               'class' => 'mw-ui-button pager-next',
-                               'href' => $this->getPageTitle()->getLocalUrl( 
$newParams ),
-                       );
-                       $next = Html::element( 'a', $attribs, $this->msg( 
'tux-sst-next' )->text() );
-               }
-               if ( $offset ) {
-                       $newParams = array( 'offset' => max( 0, $offset - 
$this->limit ) ) + $params;
-                       $attribs = array(
-                               'class' => 'mw-ui-button pager-prev',
-                               'href' => $this->getPageTitle()->getLocalUrl( 
$newParams ),
-                       );
-                       $prev = Html::element( 'a', $attribs, $this->msg( 
'tux-sst-prev' )->text() );
-               }
-
-               $resultsHtml .= Html::rawElement( 'div', array( 'class' => 
'tux-pagination-links' ),
-                       "$prev $next"
-               );
-
-               $search = $this->getSearchInput( $queryString );
-               $count = $this->msg( 'tux-sst-count' )->numParams( $total );
-
-               $this->showSearch( $search, $count, $facetHtml, $resultsHtml );
+               return $resultsHtml;
        }
 
        protected function getLanguages( array $facet ) {
@@ -327,7 +345,9 @@
                $input = Xml::input( 'query', false, $query, $attribs );
                $submit = Xml::submitButton( $this->msg( 'tux-sst-search' ), 
array( 'class' => 'button' ) );
                $lang = $this->getRequest()->getVal( 'language' );
-               $language = is_null( $lang ) ? '' : Html::hidden( 'language', 
$lang );
+               $code = $this->getLanguage()->getCode();
+               $language = is_null( $lang ) ?
+                       Html::hidden( 'language', $code ) : Html::hidden( 
'language', $lang );
 
                $form = Html::rawElement( 'form', array( 'action' => wfScript() 
),
                        $title . $input . $submit . $language
diff --git a/ttmserver/ElasticSearchTTMServer.php 
b/ttmserver/ElasticSearchTTMServer.php
index 40d9199..bd9fa0d 100644
--- a/ttmserver/ElasticSearchTTMServer.php
+++ b/ttmserver/ElasticSearchTTMServer.php
@@ -460,19 +460,30 @@
 
                // Allow searching either by message content or message id 
(page name
                // without language subpage) with exact match only.
-               $serchQuery = new \Elastica\Query\Bool();
+               $searchQuery = new \Elastica\Query\Bool();
                $contentQuery = new \Elastica\Query\Match();
                $contentQuery->setFieldQuery( 'content', $queryString );
-               $serchQuery->addShould( $contentQuery );
+               $searchQuery->addShould( $contentQuery );
                $messageQuery = new \Elastica\Query\Term();
                $messageQuery->setTerm( 'localid', $queryString );
-               $serchQuery->addShould( $messageQuery );
-               $query->setQuery( $serchQuery );
+               $searchQuery->addShould( $messageQuery );
 
-               $language = new \Elastica\Facet\Terms( 'language' );
-               $language->setField( 'language' );
-               $language->setSize( 500 );
-               $query->addFacet( $language );
+               $filteredQuery = new \Elastica\Query\Filtered();
+               $filterbool = new \Elastica\Filter\Bool();
+
+               $context = RequestContext::getMain();
+               $languageCode = $context->getLanguage()->getCode();
+
+               $languageFilter = new \Elastica\Filter\Term();
+               $languageFilter->setTerm( 'language', $languageCode );
+               $filterbool->addMust( $languageFilter );
+
+               $filteredQuery->setFilter($filterbool);
+               $filteredQuery->setQuery($searchQuery);
+
+               $query->setQuery( $filteredQuery );
+               $query->setParam( '_source', array( 'localid', 'content', 
'uri', 'wiki', 'group' ) );
+               $query->setSize( 1000 );
 
                $group = new \Elastica\Facet\Terms( 'group' );
                $group->setField( 'group' );
@@ -481,13 +492,101 @@
                $group->setSize( 500 );
                $query->addFacet( $group );
 
-               $query->setSize( $opts->getValue( 'limit' ) );
-               $query->setFrom( $opts->getValue( 'offset' ) );
-
                // BoolAnd filters are executed in sequence per document. Bool 
filters with
                // multiple must clauses are executed by converting each filter 
into a bit
                // field then anding them together. The latter is normally 
faster if either
                // of the subfilters are reused. May not make a difference in 
this context.
+               $filters = new \Elastica\Filter\Bool();
+
+               $group = $opts->getValue( 'group' );
+               if ( $group !== '' ) {
+                       $groupFilter = new \Elastica\Filter\Term();
+                       $groupFilter->setTerm( 'group', $group );
+                       $filters->addMust( $groupFilter );
+               }
+
+               // Check that we have at least one filter to avoid invalid 
query errors.
+               if ( $group !== '' ) {
+                       $query->setFilter( $filters );
+               }
+
+               list( $pre, $post ) = $highlight;
+               $query->setHighlight( array(
+                       // The value must be an object
+                       'fields' => array(
+                               'content' => array(
+                                       'number_of_fragments' => 0,
+                               ),
+                       ),
+                       'pre_tags' => array( $pre ),
+                       'post_tags' => array( $post ),
+               ) );
+
+               try {
+                       // Return results for a search string in a source 
language
+                       return $this->getType()->getIndex()->search( $query );
+               } catch ( \Elastica\Exception\ExceptionInterface $e ) {
+                       throw new TTMServerException( $e->getMessage() );
+               }
+       }
+
+       protected function getLocalId( $resultsource ) {
+               $terms = array();
+               foreach ( $resultsource->getResults() as $result ) {
+
+                       $data = $result->getData();
+                       $score = $result->getScore();
+
+                       $scores[$data['localid']] = $score;
+                       $terms[] = $data['localid'];
+
+               }
+               return array(
+                               'terms' => $terms,
+                               'scores' => $scores
+                       );
+       }
+
+       protected function filterTranslation( $data, $opts ) {
+
+               $idQuery = new \Elastica\Query\Terms();
+               $idQuery->setTerms( 'localid', $data['terms'] );
+
+               $query = new \Elastica\Query();
+               $groovyScript =
+<<<GROOVY
+return prescore.get(doc['localid'].value);
+GROOVY;
+               $script = new \Elastica\Script(
+                       $groovyScript,
+                       array( 'prescore' => $data['scores'] ),
+                       \Elastica\Script::LANG_GROOVY
+               );
+
+               // Use Function Score to retain scores from the previous query
+               $boostQuery = new \Elastica\Query\FunctionScore();
+               $boostQuery->addScriptScoreFunction( $script );
+               $boostQuery->setBoostMode( 
\Elastica\Query\FunctionScore::BOOST_MODE_REPLACE );
+
+               $filteredQuery = new \Elastica\Query\Filtered();
+               $filterbool = new \Elastica\Filter\Bool();
+
+               $boostQuery->setQuery( $idQuery );
+
+               // Wrap inside another query
+               $query->setQuery( $boostQuery );
+
+               $language = new \Elastica\Facet\Terms( 'language' );
+               $language->setField( 'language' );
+               $language->setSize( 500 );
+               $query->addFacet( $language );
+
+               $group = new \Elastica\Facet\Terms( 'group' );
+               $group->setField( 'group' );
+               $group->setSize( 500 );
+               $query->addFacet( $group );
+
+
                $filters = new \Elastica\Filter\Bool();
 
                $language = $opts->getValue( 'language' );
@@ -509,18 +608,7 @@
                        $query->setFilter( $filters );
                }
 
-               list( $pre, $post ) = $highlight;
-               $query->setHighlight( array(
-                       // The value must be an object
-                       'fields' => array(
-                               'content' => array(
-                                       'number_of_fragments' => 0,
-                               ),
-                       ),
-                       'pre_tags' => array( $pre ),
-                       'post_tags' => array( $post ),
-               ) );
-
+               $query->setParam( '_source', array( 'content', 'localid', 
'language', 'group', 'wiki', 'uri' ) );
                try {
                        return $this->getType()->getIndex()->search( $query );
                } catch ( \Elastica\Exception\ExceptionInterface $e ) {
@@ -528,6 +616,16 @@
                }
        }
 
+       public function applyFilter( $resultquery, $opts ) {
+
+               // Get list of localids and scores to find second query
+               $output = $this->getLocalId( $resultquery );
+               // Get the list of messages for which translations exist
+               $resultset = $this->filterTranslation( $output, $opts );
+
+               return $resultset;
+       }
+
        public function getFacets( $resultset ) {
                $facets = $resultset->getFacets();
 

-- 
To view, visit https://gerrit.wikimedia.org/r/218859
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I246aeea623c1d81f5ce148185be8c9c9ef8f0923
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Translate
Gerrit-Branch: master
Gerrit-Owner: Phoenix303 <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to