jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/337604 )
Change subject: Add support for elastic5
......................................................................
Add support for elastic5
Add support for Elastica 5 by checking the inexistence
of the \Elastica\Script class.
The compatibility issues were small enough to keep
support for Elastica 3.x (elastic 2.x).
Bug: T158070
Change-Id: I68a8a838bb1e53fd9ab26e2754f03a5104770441
---
M ttmserver/ElasticSearchTTMServer.php
M ttmserver/FuzzyLikeThis.php
2 files changed, 58 insertions(+), 55 deletions(-)
Approvals:
EBernhardson: Looks good to me, approved
jenkins-bot: Verified
diff --git a/ttmserver/ElasticSearchTTMServer.php
b/ttmserver/ElasticSearchTTMServer.php
index dc46eb4..ae12364 100644
--- a/ttmserver/ElasticSearchTTMServer.php
+++ b/ttmserver/ElasticSearchTTMServer.php
@@ -84,7 +84,7 @@
protected function doQuery( $sourceLanguage, $targetLanguage, $text ) {
if ( !$this->useWikimediaExtraPlugin() ) {
- // ElasticTTM is currently not compatible with
elasticsearch 2.x
+ // ElasticTTM is currently not compatible with
elasticsearch 2.x/5.x
// It needs FuzzyLikeThis ported via the wmf extra
plugin
throw new \RuntimeException( 'The wikimedia extra
plugin is mandatory.' );
}
@@ -111,15 +111,24 @@
)
);
} else {
+ // TODO: should we remove this code block the extra
+ // plugin is now mandatory and we will never use the
+ // groovy script.
+ if ( $this->isElastica5() ) {
+ $scriptClass = \Elastica\Script\Script::class;
+ } else {
+ $scriptClass = \Elastica\Script::class;
+ }
+
$groovyScript =
<<<GROOVY
import org.apache.lucene.search.spell.*
new LevensteinDistance().getDistance(srctxt, _source['content'])
GROOVY;
- $script = new \Elastica\Script(
+ $script = new $scriptClass(
$groovyScript,
array( 'srctxt' => $text ),
- \Elastica\Script::LANG_GROOVY
+ $scriptClass::LANG_GROOVY
);
$boostQuery->addScriptScoreFunction( $script );
}
@@ -381,28 +390,35 @@
$mapping = new \Elastica\Type\Mapping();
$mapping->setType( $type );
+
+ $keywordType = array( 'type' => 'string', 'index' =>
'not_analyzed' );
+ $textType = 'string';
+ if ( $this->isElastica5() ) {
+ $keywordType = array( 'type' => 'keyword' );
+ $textType = 'text';
+ }
$mapping->setProperties( array(
- 'wiki' => array( 'type' => 'string', 'index' =>
'not_analyzed' ),
- 'localid' => array( 'type' => 'string', 'index' =>
'not_analyzed' ),
- 'uri' => array( 'type' => 'string', 'index' =>
'not_analyzed' ),
- 'language' => array( 'type' => 'string', 'index' =>
'not_analyzed' ),
- 'group' => array( 'type' => 'string', 'index' =>
'not_analyzed' ),
+ 'wiki' => $keywordType,
+ 'localid' => $keywordType,
+ 'uri' => $keywordType,
+ 'language' => $keywordType,
+ 'group' => $keywordType,
'content' => array(
- 'type' => 'string',
+ 'type' => $textType,
'fields' => array(
'content' => array(
- 'type' => 'string',
+ 'type' => $textType,
'index' => 'analyzed',
'term_vector' => 'yes'
),
'prefix_complete' => array(
- 'type' => 'string',
+ 'type' => $textType,
'analyzer' => 'prefix',
'search_analyzer' => 'standard',
'term_vector' => 'yes'
),
'case_sensitive' => array(
- 'type' => 'string',
+ 'type' => $textType,
'index' => 'analyzed',
'analyzer' => 'casesensitive',
'term_vector' => 'yes'
@@ -457,7 +473,11 @@
public function endBootstrap() {
$index = $this->getType()->getIndex();
$index->refresh();
- $index->optimize();
+ if ( $this->isElastica5() ) {
+ $index->forcemerge();
+ } else {
+ $index->optimize();
+ }
$index->getSettings()->setRefreshInterval( '5s' );
}
@@ -752,26 +772,23 @@
*/
private function deleteByQuery( \Elastica\Type $type, \Elastica\Query
$query ) {
$retryAttempts = self::BULK_INDEX_RETRY_ATTEMPTS;
- $scrollOptions = array(
- 'search_type' => 'scan',
- 'scroll' => '15m',
- 'size' => self::BULK_DELETE_CHUNK_SIZE,
- );
- $result = $type->search( $query, $scrollOptions );
- MWElasticUtils::iterateOverScroll( $type->getIndex(),
- $result->getResponse()->getScrollId(), '15m',
- function( $results ) use( $retryAttempts, $type ) {
- $ids = array();
- foreach ( $results as $result ) {
- $ids[] = $result->getId();
+ $search = new \Elastica\Search( $this->getClient() );
+ $search->setQuery( $query );
+ $search->addType( $type );
+ $scroll = new \Elastica\Scroll( $search, '15m' );
+
+ foreach ( $scroll as $results ) {
+ $ids = array();
+ foreach ( $results as $result ) {
+ $ids[] = $result->getId();
+ }
+ MWElasticUtils::withRetry( $retryAttempts,
+ function() use ( $ids, $type ) {
+ $type->deleteIds( $ids );
}
- MWElasticUtils::withRetry( $retryAttempts,
- function() use ( $ids, $type ) {
- $type->deleteIds( $ids );
- }
- );
- }, 0, $retryAttempts );
+ );
+ }
}
/**
@@ -804,4 +821,14 @@
return false;
}
}
+
+ /**
+ * @return bool true if running with Elastica 5+
+ */
+ private function isElastica5() {
+ // Sadly Elastica does not seem to expose its version so we
+ // check the inexistence of a class that was removed in the
+ // version 5
+ return !class_exists( \Elastica\Script::class );
+ }
}
diff --git a/ttmserver/FuzzyLikeThis.php b/ttmserver/FuzzyLikeThis.php
index 592ac34..96fe331 100644
--- a/ttmserver/FuzzyLikeThis.php
+++ b/ttmserver/FuzzyLikeThis.php
@@ -90,13 +90,6 @@
protected $_prefixLength = 0;
/**
- * Boost.
- *
- * @var float Boost
- */
- protected $_boost = 1.0;
-
- /**
* Analyzer.
*
* @var sting Analyzer
@@ -159,19 +152,6 @@
}
/**
- * Set boost.
- *
- * @param float $value Boost value
- *
- * @return $this
- */
- public function setBoost( $value ) {
- $this->_boost = (float) $value;
-
- return $this;
- }
-
- /**
* Set Prefix Length.
*
* @param int $value Prefix length
@@ -221,10 +201,6 @@
public function toArray() {
if ( !empty( $this->_fields ) ) {
$args['fields'] = $this->_fields;
- }
-
- if ( !empty( $this->_boost ) ) {
- $args['boost'] = $this->_boost;
}
if ( !empty( $this->_analyzer ) ) {
--
To view, visit https://gerrit.wikimedia.org/r/337604
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I68a8a838bb1e53fd9ab26e2754f03a5104770441
Gerrit-PatchSet: 4
Gerrit-Project: mediawiki/extensions/Translate
Gerrit-Branch: master
Gerrit-Owner: DCausse <[email protected]>
Gerrit-Reviewer: DCausse <[email protected]>
Gerrit-Reviewer: EBernhardson <[email protected]>
Gerrit-Reviewer: Nikerabbit <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits