WMDE-leszek has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/358531 )

Change subject: Only load entity data TermSqlIndexBuilder when necessary
......................................................................

Only load entity data TermSqlIndexBuilder when necessary

Makes TermSqlIndexBuilder only load entity data and check
if any of terms are missing or are outdated when requested.

This should make the "defult" rebuild faster, as it
will only check the contents of wb_terms to detect possible
unpopulated ID fields and/or duplicate term entries,
without needing to load all the data for each
processed entity.

RebuildTermSqlIndex maintenance script also gets the
respective option, which is turned off by default.

Bug: T162533
Change-Id: Ia0409ebfd7c905b178c8ebe17945226a845e010c
---
M repo/includes/Store/Sql/TermSqlIndexBuilder.php
M repo/maintenance/rebuildTermSqlIndex.php
2 files changed, 44 insertions(+), 7 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Wikibase 
refs/changes/31/358531/1

diff --git a/repo/includes/Store/Sql/TermSqlIndexBuilder.php 
b/repo/includes/Store/Sql/TermSqlIndexBuilder.php
index a734705..e468e00 100644
--- a/repo/includes/Store/Sql/TermSqlIndexBuilder.php
+++ b/repo/includes/Store/Sql/TermSqlIndexBuilder.php
@@ -13,6 +13,12 @@
 
 /**
  * (Re)builds term index in the SQL table.
+ * This can add missing information to the SQL table like missing full entity 
ID. It also removes
+ * possible duplicate terms.
+ * It can also ensure that all expected entity terms are stored in the term 
index, i.e. add
+ * all possible missing terms of the given entity, and remove all possible no 
longer valid
+ * terms of the entity, even if there is no other need for rebuilding the index
+ * (i.e. all ID fields are populated, there are no duplicate entries).
  *
  * @license GPL-2.0+
  * @author Katie Filbert < aude.w...@gmail.com >
@@ -72,6 +78,11 @@
        private $batchSize;
 
        /**
+        * @var bool
+        */
+       private $rebuildAllEntityTerms = false;
+
+       /**
         * @var int|null
         */
        private $fromId = null;
@@ -128,6 +139,15 @@
        }
 
        /**
+        * Makes the builder rebuild all entity terms, i.e. it will check if 
any of entity terms
+        * is missing, and/or any of existing entity terms is no longer 
"correct".
+        * Missing terms will be added, and no longer expected terms will be 
removed.
+        */
+       public function setRebuildAllEntityTerms() {
+               $this->rebuildAllEntityTerms = true;
+       }
+
+       /**
         * @param string $entityType
         */
        private function rebuildForEntityType( $entityType ) {
@@ -163,16 +183,22 @@
        private function rebuildEntityTerms( EntityId $entityId ) {
                $serializedId = $entityId->getSerialization();
 
-               $entityRevision = 
$this->entityRevisionLookup->getEntityRevision( $entityId );
-               $entity = $entityRevision->getEntity();
-
-               $rebuiltTerms = $this->termSqlIndex->getEntityTerms( $entity );
                $existingTerms = $this->termSqlIndex->getTermsOfEntity( 
$entityId );
 
-               $termsToInsert = array_udiff( $rebuiltTerms, $existingTerms, [ 
TermIndexEntry::class, 'compare' ] );
-               $termsToDelete = array_udiff( $existingTerms, $rebuiltTerms, [ 
TermIndexEntry::class, 'compare' ] );
+               $entityRevision = null;
+               $termsChanged = false;
 
-               $termsChanged = $termsToInsert || $termsToDelete;
+               if ( $this->rebuildAllEntityTerms ) {
+                       $entityRevision = 
$this->entityRevisionLookup->getEntityRevision( $entityId );
+                       $entity = $entityRevision->getEntity();
+
+                       $rebuiltTerms = $this->termSqlIndex->getEntityTerms( 
$entity );
+
+                       $termsToInsert = array_udiff( $rebuiltTerms, 
$existingTerms, [ TermIndexEntry::class, 'compare' ] );
+                       $termsToDelete = array_udiff( $existingTerms, 
$rebuiltTerms, [ TermIndexEntry::class, 'compare' ] );
+
+                       $termsChanged = $termsToInsert || $termsToDelete;
+               }
 
                $needToPopulateEntityIdColumn = !$this->readFullEntityIdColumn 
&&
                        $this->writeFullEntityIdColumn &&
@@ -194,6 +220,10 @@
                        return;
                }
 
+               if ( $entityRevision === null ) {
+                       $entityRevision = 
$this->entityRevisionLookup->getEntityRevision( $entityId );
+               }
+
                $success = $this->termSqlIndex->saveTermsOfEntity( 
$entityRevision->getEntity() );
 
                if ( !$success ) {
diff --git a/repo/maintenance/rebuildTermSqlIndex.php 
b/repo/maintenance/rebuildTermSqlIndex.php
index 665ee32..7e3a448 100644
--- a/repo/maintenance/rebuildTermSqlIndex.php
+++ b/repo/maintenance/rebuildTermSqlIndex.php
@@ -39,6 +39,9 @@
                        false,
                        true
                );
+               $this->addOption(
+                       'rebuild-all-terms', 'Rebuilds all terms of the entity 
(requires loading data of each processed entity)'
+               );
                $this->addOption( 'from-id', "First row (page id) to start 
updating from", false, true );
        }
 
@@ -57,6 +60,7 @@
        private function getTermIndexBuilder() {
                $batchSize = (int)$this->getOption( 'batch-size', 1000 );
                $fromId = $this->getOption( 'from-id', null );
+               $rebuildAllEntityTerms = $this->getOption( 'rebuild-all-terms', 
false );
 
                $wikibaseRepo = WikibaseRepo::getDefaultInstance();
                $idParser = $wikibaseRepo->getEntityIdParser();
@@ -91,6 +95,9 @@
                if ( $fromId !== null ) {
                        $builder->setFromId( (int)$fromId );
                }
+               if ( $rebuildAllEntityTerms ) {
+                       $builder->setRebuildAllEntityTerms();
+               }
 
                return $builder;
        }

-- 
To view, visit https://gerrit.wikimedia.org/r/358531
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia0409ebfd7c905b178c8ebe17945226a845e010c
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Wikibase
Gerrit-Branch: master
Gerrit-Owner: WMDE-leszek <leszek.mani...@wikimedia.de>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to