Matthias Mullie has uploaded a new change for review. https://gerrit.wikimedia.org/r/233432
Change subject: [WIP] Fix references/storage ...................................................................... [WIP] Fix references/storage Patch should be done, but I want to do some more testing Bug: T94569 Change-Id: I23147fd711425e770369f092ad25d0de2d354c2f --- M container.php M includes/Data/Index/FeatureIndex.php M includes/Data/Index/TopKIndex.php M includes/Data/Listener/ReferenceRecorder.php M includes/Data/Mapper/BasicObjectMapper.php M includes/Data/Mapper/CachingObjectMapper.php M includes/Data/ObjectManager.php M includes/Data/ObjectMapper.php M includes/Data/ObjectStorage.php M includes/Data/Storage/BasicDbStorage.php M includes/Data/Storage/DbStorage.php M includes/Data/Storage/PostRevisionStorage.php M includes/Data/Storage/RevisionStorage.php M includes/Data/Storage/TopicHistoryStorage.php M includes/LinksTableUpdater.php 15 files changed, 135 insertions(+), 53 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Flow refs/changes/32/233432/1 diff --git a/container.php b/container.php index 6f5e023..1aa0341 100644 --- a/container.php +++ b/container.php @@ -172,16 +172,17 @@ $c['storage.workflow.class'] = 'Flow\Model\Workflow'; $c['storage.workflow.table'] = 'flow_workflow'; $c['storage.workflow.primary_key'] = array( 'workflow_id' ); -$c['storage.workflow.backend'] = function( $c ) { - return new BasicDbStorage( - $c['db.factory'], - $c['storage.workflow.table'], - $c['storage.workflow.primary_key'] - ); -}; $c['storage.workflow.mapper'] = function( $c ) { return CachingObjectMapper::model( $c['storage.workflow.class'], + $c['storage.workflow.primary_key'] + ); +}; +$c['storage.workflow.backend'] = function( $c ) { + return new BasicDbStorage( + $c['db.factory'], + $c['storage.workflow.mapper'], + $c['storage.workflow.table'], $c['storage.workflow.primary_key'] ); }; @@ -262,7 +263,7 @@ }; $c['storage.board_history.backend'] = function( $c ) { - return new BoardHistoryStorage( $c['db.factory'] ); + return new BoardHistoryStorage( $c['db.factory'], $c['storage.board_history.mapper'] ); }; $c['storage.board_history.indexes.primary'] = function( $c ) { return new BoardHistoryIndex( @@ -346,6 +347,7 @@ global $wgFlowExternalStore; return new HeaderRevisionStorage( $c['db.factory'], + $c['storage.header.mapper'], $wgFlowExternalStore ); @@ -425,6 +427,7 @@ global $wgFlowExternalStore; return new PostSummaryRevisionStorage( $c['db.factory'], + $c['storage.post_summary.mapper'], $wgFlowExternalStore ); }; @@ -475,6 +478,7 @@ $c['storage.topic_list.indexes.last_updated.backend'] = function( $c ) { return new TopicListLastUpdatedStorage( $c['db.factory'], + $c['storage.topic_list.mapper'], $c['storage.topic_list.table'], $c['storage.topic_list.primary_key'] ); @@ -489,6 +493,7 @@ return new TopicListStorage( // factory and table $c['db.factory'], + $c['storage.topic_list.mapper'], $c['storage.topic_list.table'], $c['storage.topic_list.primary_key'] ); @@ -553,6 +558,7 @@ global $wgFlowExternalStore; return new PostRevisionStorage( $c['db.factory'], + $c['storage.post.mapper'], $wgFlowExternalStore, $c['repository.tree'] ); @@ -651,10 +657,10 @@ }; $c['storage.topic_history.primary_key'] = array( 'rev_id' ); $c['storage.topic_history.backend'] = function( $c ) { - global $wgFlowExternalStore; return new TopicHistoryStorage( - new PostRevisionStorage( $c['db.factory'], $wgFlowExternalStore, $c['repository.tree'] ), - new PostSummaryRevisionStorage( $c['db.factory'], $wgFlowExternalStore ), + $c['storage.post.backend'], + $c['storage.post_summary.backend'], + $c['storage.topic_history.mapper'], $c['repository.tree'] ); }; @@ -1054,6 +1060,7 @@ $c['storage.wiki_reference.backend'] = function( $c ) { return new BasicDbStorage( $c['db.factory'], + $c['storage.wiki_reference.mapper'], $c['storage.wiki_reference.table'], $c['storage.wiki_reference.primary_key'] ); @@ -1120,6 +1127,7 @@ return new BasicDbStorage( // factory and table $c['db.factory'], + $c['storage.url_reference.mapper'], $c['storage.url_reference.table'], $c['storage.url_reference.primary_key'] ); @@ -1203,7 +1211,8 @@ $c['reference.extractor'], $c['reference.updater.links-tables'], $c['storage'], - $c['repository.tree'] + $c['repository.tree'], + $c['deferred_queue'] ); }; diff --git a/includes/Data/Index/FeatureIndex.php b/includes/Data/Index/FeatureIndex.php index a7a7f87..b7e3f7a 100644 --- a/includes/Data/Index/FeatureIndex.php +++ b/includes/Data/Index/FeatureIndex.php @@ -347,7 +347,8 @@ if ( !$indexed ) { throw new DataModelException( 'Unindexable row: ' . FormatJson::encode( $old ), 'process-data' ); } - $this->removeFromIndex( $indexed, $old ); + $compacted = $this->rowCompactor->compactRow( UUID::convertUUIDs( $old, 'alphadecimal' ) ); + $this->removeFromIndex( $indexed, $compacted ); } /** @@ -412,6 +413,11 @@ continue; } + // normalize rows read from DB: schema may be different than the + // real data (e.g. nullable rows no longer used or in preparation + // for changes) + $rows = array_map( array( $this->storage, 'normalize' ), $rows ); + $compacted = $this->rowCompactor->compactRows( $rows ); $callback = function( \BagOStuff $cache, $key, $value ) use ( $compacted ) { if ( $value !== false ) { diff --git a/includes/Data/Index/TopKIndex.php b/includes/Data/Index/TopKIndex.php index b79e93f..beb53a1 100644 --- a/includes/Data/Index/TopKIndex.php +++ b/includes/Data/Index/TopKIndex.php @@ -9,6 +9,7 @@ use Flow\Data\Compactor\ShallowCompactor; use Flow\Data\Utils\SortArrayByKeys; use Flow\Exception\InvalidInputException; +use Flow\Model\UUID; /** * Holds the top k items with matching $indexed columns. List is sorted and truncated to specified size. @@ -68,14 +69,17 @@ protected function addToIndex( array $indexed, array $row ) { $self = $this; + // If this used redis instead of memcached, could it add to index in position // without retry possibility? need a single number that will properly sort rows. $this->cache->merge( $this->cacheKey( $indexed ), - function( BagOStuff $cache, $key, $value ) use( $self, $row ) { + function( BagOStuff $cache, $key, $value ) use( $self, $row, $indexed ) { if ( $value === false ) { return false; } + $value = $self->normalize( $value, $indexed ); + $idx = array_search( $row, $value ); if ( $idx !== false ) { return false; // This row already exists somehow @@ -95,12 +99,16 @@ } protected function removeFromIndex( array $indexed, array $row ) { + $self = $this; + $this->cache->merge( $this->cacheKey( $indexed ), - function( BagOStuff $cache, $key, $value ) use( $row ) { + function( BagOStuff $cache, $key, $value ) use( $self, $row, $indexed ) { if ( $value === false ) { return false; } + $value = $self->normalize( $value, $indexed ); + $idx = array_search( $row, $value ); if ( $idx === false ) { return false; @@ -115,11 +123,13 @@ $self = $this; $this->cache->merge( $this->cacheKey( $indexed ), - function( BagOStuff $cache, $key, $value ) use( $self, $oldRow, $newRow ) { + function( BagOStuff $cache, $key, $value ) use( $self, $oldRow, $newRow, $indexed ) { if ( $value === false ) { return false; } + $value = $self->normalize( $value, $indexed ); $retval = $value; + $idx = array_search( $oldRow, $retval ); if ( $idx !== false ) { unset( $retval[$idx] ); @@ -137,6 +147,23 @@ ); } + /** + * In order to be able to reliably find a row in an array of + * cached rows, we need to normalize those rows: they may be + * outdated (e.g. missing/additional rows in cache) + * + * @param array $rows + * @param array $indexed + * @return array + */ + protected function normalize( $rows, $indexed ) { + $fromCache = array( 'from-cache' => $rows ); + $keyToQuery = array( 'from-cache' => UUID::convertUUIDs( $indexed, 'alphadecimal' ) ); + $rows = $this->rowCompactor->expandCacheResult( $fromCache, $keyToQuery ); + $rows = array_map( array( $this->storage, 'normalize' ), $rows['from-cache'] ); + return $this->rowCompactor->compactRows( $rows ); + } + // INTERNAL: in 5.4 it can be protected public function sortIndex( array $values ) { // I dont think this is a valid way to sort a 128bit integer string diff --git a/includes/Data/Listener/ReferenceRecorder.php b/includes/Data/Listener/ReferenceRecorder.php index f4e47ef..6a1f3f9 100644 --- a/includes/Data/Listener/ReferenceRecorder.php +++ b/includes/Data/Listener/ReferenceRecorder.php @@ -14,6 +14,7 @@ use Flow\Model\Reference; use Flow\Parsoid\ReferenceExtractor; use Flow\Repository\TreeRepository; +use SplQueue; /** * Listens for new revisions to be inserted. Calculates the difference in @@ -42,16 +43,23 @@ */ protected $treeRepository; + /** + * @var SplQueue + */ + protected $deferredQueue; + public function __construct( ReferenceExtractor $referenceExtractor, LinksTableUpdater $linksTableUpdater, ManagerGroup $storage, - TreeRepository $treeRepository + TreeRepository $treeRepository, + SplQueue $deferredQueue ) { $this->referenceExtractor = $referenceExtractor; $this->linksTableUpdater = $linksTableUpdater; $this->storage = $storage; $this->treeRepository = $treeRepository; + $this->deferredQueue = $deferredQueue; } public function onAfterLoad( $object, array $old ) { @@ -65,6 +73,7 @@ if ( !$revision instanceof AbstractRevision ) { throw new InvalidDataException( 'ReferenceRecorder can only attach to AbstractRevision storage'); } + /** @var Workflow $workflow */ $workflow = $metadata['workflow']; if ( $revision instanceof PostRevision && $revision->isTopicTitle() ) { @@ -76,8 +85,12 @@ $this->storage->multiPut( $added ); $this->storage->multiRemove( $removed ); - // Data updates - $this->linksTableUpdater->doUpdate( $workflow ); + // Data has not yet been committed at this points, so let's delay + // updating `categorylinks` + $linksTableUpdater = $this->linksTableUpdater; + $this->deferredQueue->push( function() use ( $linksTableUpdater, $workflow ) { + $linksTableUpdater->doUpdate( $workflow ); + } ); } /** diff --git a/includes/Data/Mapper/BasicObjectMapper.php b/includes/Data/Mapper/BasicObjectMapper.php index 4e74abf..2d33815 100644 --- a/includes/Data/Mapper/BasicObjectMapper.php +++ b/includes/Data/Mapper/BasicObjectMapper.php @@ -42,11 +42,6 @@ return null; } - public function normalizeRow( array $row ) { - $object = $this->fromStorageRow( $row ); - return $this->toStorageRow( $object ); - } - public function clear() { // noop } diff --git a/includes/Data/Mapper/CachingObjectMapper.php b/includes/Data/Mapper/CachingObjectMapper.php index 87a42a9..7abac0c 100644 --- a/includes/Data/Mapper/CachingObjectMapper.php +++ b/includes/Data/Mapper/CachingObjectMapper.php @@ -120,11 +120,6 @@ } } - public function normalizeRow( array $row ) { - $object = call_user_func( $this->fromStorageRow, $row ); - return call_user_func( $this->toStorageRow, $object ); - } - public function clear() { $this->loaded = new MultiDimArray; } diff --git a/includes/Data/ObjectManager.php b/includes/Data/ObjectManager.php index 8ab5461..3b0d603 100644 --- a/includes/Data/ObjectManager.php +++ b/includes/Data/ObjectManager.php @@ -183,7 +183,7 @@ throw new FlowException( 'Object was not loaded through this object manager, use ObjectManager::merge if necessary' ); } $old = $this->loaded[$object]; - $old = $this->mapper->normalizeRow( $old ); + $old = $this->storage->normalize( $old ); $this->storage->remove( $old ); foreach ( $this->lifecycleHandlers as $handler ) { $handler->onAfterRemove( $object, $old, $metadata ); @@ -285,7 +285,7 @@ */ protected function updateSingle( $object, array $metadata ) { $old = $this->loaded[$object]; - $old = $this->mapper->normalizeRow( $old ); + $old = $this->storage->normalize( $old ); $new = $this->mapper->toStorageRow( $object ); if ( self::arrayEquals( $old, $new ) ) { return; diff --git a/includes/Data/ObjectMapper.php b/includes/Data/ObjectMapper.php index 365feac..b8bbba4 100644 --- a/includes/Data/ObjectMapper.php +++ b/includes/Data/ObjectMapper.php @@ -35,17 +35,6 @@ function get( array $primaryKey ); /** - * Accepts a row representing domain model & returns that same row, - * normalized. It'll roundtrip the row from- & toStorageRow to cleanup data. - * We want to make sure that data type differences cause no false positives, - * like $row containing strings, & new row has integers with the same value. - * - * @param array $row Assoc array representing the domain model - * @return array Normalized row - */ - function normalizeRow( array $row ); - - /** * Clear any internally cached information */ function clear(); diff --git a/includes/Data/ObjectStorage.php b/includes/Data/ObjectStorage.php index fdf6266..24f2aa3 100644 --- a/includes/Data/ObjectStorage.php +++ b/includes/Data/ObjectStorage.php @@ -60,6 +60,17 @@ function remove( array $row ); /** + * Accepts a row representing domain model & returns that same row, + * normalized. It'll roundtrip the row from- & toStorageRow to cleanup data. + * We want to make sure that data type differences cause no false positives, + * like $row containing strings, & new row has integers with the same value. + * + * @param array $row Assoc array representing the domain model + * @return array Normalized row + */ + function normalize( array $row ); + + /** * Returns a boolean true/false to indicate if the result of a particular * query is valid & can be cached. * In some cases, the retrieved data should not be cached. E.g. revisions diff --git a/includes/Data/Storage/BasicDbStorage.php b/includes/Data/Storage/BasicDbStorage.php index d1bb3ef..4be1baf 100644 --- a/includes/Data/Storage/BasicDbStorage.php +++ b/includes/Data/Storage/BasicDbStorage.php @@ -2,6 +2,7 @@ namespace Flow\Data\Storage; +use Flow\Data\ObjectMapper; use Flow\Model\UUID; use Flow\DbFactory; use Flow\Data\ObjectManager; @@ -30,15 +31,16 @@ /** * @param DbFactory $dbFactory + * @param ObjectMapper $mapper * @param string $table * @param string[] $primaryKey * @throws DataModelException */ - public function __construct( DbFactory $dbFactory, $table, array $primaryKey ) { + public function __construct( DbFactory $dbFactory, ObjectMapper $mapper, $table, array $primaryKey ) { if ( !$primaryKey ) { throw new DataModelException( 'PK required', 'process-data' ); } - parent::__construct( $dbFactory ); + parent::__construct( $dbFactory, $mapper ); $this->table = $table; $this->primaryKey = $primaryKey; } @@ -164,7 +166,7 @@ public function findMulti( array $queries, array $options = array() ) { $keys = array_keys( reset( $queries ) ); - $pks = $this->getPrimaryKeyColumns(); + $pks = $this->getPrimaryKeyColumns(); if ( count( $keys ) !== count( $pks ) || array_diff( $keys, $pks ) ) { return $this->fallbackFindMulti( $queries, $options ); } diff --git a/includes/Data/Storage/DbStorage.php b/includes/Data/Storage/DbStorage.php index 7f6bb39..2cd4560 100644 --- a/includes/Data/Storage/DbStorage.php +++ b/includes/Data/Storage/DbStorage.php @@ -3,6 +3,7 @@ namespace Flow\Data\Storage; use Flow\Data\ObjectManager; +use Flow\Data\ObjectMapper; use Flow\Data\ObjectStorage; use Flow\Data\Utils\RawSql; use Flow\Model\UUID; @@ -23,6 +24,11 @@ protected $dbFactory; /** + * @var ObjectMapper + */ + protected $mapper; + + /** * The revision columns allowed to be updated * * @var string[]|true Allow of selective columns to allow, or true to allow @@ -41,9 +47,11 @@ /** * @param DbFactory $dbFactory + * @param ObjectMapper $mapper */ - public function __construct( DbFactory $dbFactory ) { + public function __construct( DbFactory $dbFactory, ObjectMapper $mapper ) { $this->dbFactory = $dbFactory; + $this->mapper = $mapper; } /** @@ -194,6 +202,14 @@ /** * {@inheritDoc} */ + public function normalize( array $row ) { + $object = $this->mapper->fromStorageRow( $row ); + return $this->mapper->toStorageRow( $object ); + } + + /** + * {@inheritDoc} + */ public function validate( array $row ) { return true; } diff --git a/includes/Data/Storage/PostRevisionStorage.php b/includes/Data/Storage/PostRevisionStorage.php index c49dfa5..95cc380 100644 --- a/includes/Data/Storage/PostRevisionStorage.php +++ b/includes/Data/Storage/PostRevisionStorage.php @@ -2,6 +2,7 @@ namespace Flow\Data\Storage; +use Flow\Data\ObjectMapper; use Flow\DbFactory; use Flow\Model\UUID; use Flow\Repository\TreeRepository; @@ -18,12 +19,13 @@ /** * @param DbFactory $dbFactory + * @param ObjectMapper $mapper * @param array|false List of external store servers available for insert * or false to disable. See $wgFlowExternalStore. * @param TreeRepository $treeRepo */ - public function __construct( DbFactory $dbFactory, $externalStore, TreeRepository $treeRepo ) { - parent::__construct( $dbFactory, $externalStore ); + public function __construct( DbFactory $dbFactory, ObjectMapper $mapper, $externalStore, TreeRepository $treeRepo ) { + parent::__construct( $dbFactory, $mapper, $externalStore ); $this->treeRepo = $treeRepo; } diff --git a/includes/Data/Storage/RevisionStorage.php b/includes/Data/Storage/RevisionStorage.php index a79b10a..c4df038 100644 --- a/includes/Data/Storage/RevisionStorage.php +++ b/includes/Data/Storage/RevisionStorage.php @@ -4,6 +4,7 @@ use DatabaseBase; use ExternalStore; +use Flow\Data\ObjectMapper; use Flow\Data\Utils\Merger; use Flow\Data\Utils\ResultDuplicator; use Flow\Data\ObjectManager; @@ -96,11 +97,12 @@ /** * @param DbFactory $dbFactory + * @param ObjectMapper $mapper * @param array|false List of external store servers available for insert * or false to disable. See $wgFlowExternalStore. */ - public function __construct( DbFactory $dbFactory, $externalStore ) { - parent::__construct( $dbFactory ); + public function __construct( DbFactory $dbFactory, ObjectMapper $mapper, $externalStore ) { + parent::__construct( $dbFactory, $mapper ); $this->externalStore = $externalStore; } diff --git a/includes/Data/Storage/TopicHistoryStorage.php b/includes/Data/Storage/TopicHistoryStorage.php index 7db0751..16f435e 100644 --- a/includes/Data/Storage/TopicHistoryStorage.php +++ b/includes/Data/Storage/TopicHistoryStorage.php @@ -2,6 +2,7 @@ namespace Flow\Data\Storage; +use Flow\Data\ObjectMapper; use Flow\Data\ObjectStorage; use Flow\Exception\DataModelException; use Flow\Model\UUID; @@ -25,6 +26,11 @@ protected $postSummaryStorage; /** + * @var ObjectMapper + */ + protected $mapper; + + /** * @var TreeRepository */ protected $treeRepository; @@ -32,11 +38,13 @@ /** * @param ObjectStorage $postRevisionStorage * @param ObjectStorage $postSummaryStorage + * @param ObjectMapper $mapper * @param TreeRepository $treeRepo */ - public function __construct( ObjectStorage $postRevisionStorage, ObjectStorage $postSummaryStorage, TreeRepository $treeRepo ) { + public function __construct( ObjectStorage $postRevisionStorage, ObjectStorage $postSummaryStorage, ObjectMapper $mapper, TreeRepository $treeRepo ) { $this->postRevisionStorage = $postRevisionStorage; $this->postSummaryStorage = $postSummaryStorage; + $this->mapper = $mapper; $this->treeRepository = $treeRepo; } @@ -140,6 +148,11 @@ throw new DataModelException( __CLASS__ . ' does not support remove action', 'process-data' ); } + public function normalize( array $row ) { + $object = $this->mapper->fromStorageRow( $row ); + return $this->mapper->toStorageRow( $object ); + } + public function validate( array $row ) { return true; } diff --git a/includes/LinksTableUpdater.php b/includes/LinksTableUpdater.php index 9346dff..773786b 100644 --- a/includes/LinksTableUpdater.php +++ b/includes/LinksTableUpdater.php @@ -26,14 +26,16 @@ $this->storage = $storage; } - public function doUpdate( Workflow $workflow ) { + public function doUpdate( Workflow $workflow) { $title = $workflow->getArticleTitle(); $page = WikiPage::factory( $title ); $content = $page->getContent(); if ( $content === null ) { $updates = array(); } else { - $updates = $content->getSecondaryDataUpdates( $title ); + // Make sure linksTableUpdater receives a $parserOutput object with the old + // category links, so it can compare and update against current state + $updates = $content->getSecondaryDataUpdates( $title ); //, null, true, $parserOutput ); } DataUpdate::runUpdates( $updates ); -- To view, visit https://gerrit.wikimedia.org/r/233432 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I23147fd711425e770369f092ad25d0de2d354c2f Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/Flow Gerrit-Branch: master Gerrit-Owner: Matthias Mullie <mmul...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits