Mattflaschen has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/230723

Change subject: WIP: Eliminate some memory leaks affecting LQT->Flow:
......................................................................

WIP: Eliminate some memory leaks affecting LQT->Flow:

* LocalBufferedBagOStuff does not clear its in-process cache
  after commit, so have the importer use BufferedBagOStuff instead.

  The only downside is that it won't have reads cached in-process
  at all, but for the importer's usage pattern this downside seems
  unlikely to come up much.

  Also rename in container.php for clarity.

* Call ManagerGroup->clear(), which clears all the ObjectManager-s,
  their mappers, and triggers onAfterClear.  I suspect that
  ObjectManager and CachingObjectMapper together may be a major part of
  the memory leak.

  However, this requires being very careful to avoid inserting an object
  that is already in the database.  See clearManagerGroup.

* Change UserNameBatch to use MapCacheLRU and limit to 250 per wiki.
  This may not be strictly required for the importer's use case, since
  UserNameListener has a onAfterClear listener (which is triggered by
  ObjectManager->clear). However, there are other uses of UserNameBatch,
  and this adds an extra layer of protection.

* Add some notes about other possible improvements that apparently
  wouldn't impact the importer, but might impact other code.

* Some comment updates I noticed along the way.

Bug: T106614
Bug: T108601
Change-Id: I988dff81de9e32da1470a89cc7cc5dfc07ef82e8
---
M container.php
M includes/Data/BagOStuff/LocalBufferedBagOStuff.php
M includes/Formatter/AbstractQuery.php
M includes/Import/Importer.php
M includes/Model/WikiReference.php
M includes/Repository/UserNameBatch.php
M includes/Templating.php
7 files changed, 102 insertions(+), 33 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Flow 
refs/changes/23/230723/1

diff --git a/container.php b/container.php
index 548b562..9bdfdfd 100644
--- a/container.php
+++ b/container.php
@@ -37,7 +37,7 @@
 $c['repository.tree'] = function( $c ) {
        return new Flow\Repository\TreeRepository(
                $c['db.factory'],
-               $c['memcache.buffered']
+               $c['memcache.local_buffered']
        );
 };
 
@@ -132,14 +132,28 @@
 use Flow\Model\PostRevision;
 use Flow\Model\PostSummary;
 
-$c['memcache.buffered'] = function( $c ) {
+// This currently never clears $this->bag, which makes it unusuable for 
long-running batch.
+// Use 'memcache.buffered' for those instead.
+$c['memcache.local_buffered'] = function( $c ) {
        global $wgFlowCacheTime;
 
-       // This is the real buffered cached that will allow transactional-like 
cache
+       // This is the real buffered cached that will allow transactional-like 
cache.
+       // It also caches all reads in-memory.
        $bufferedCache = new Flow\Data\BagOStuff\LocalBufferedBagOStuff( 
$c['memcache'] );
        // This is Flow's wrapper around it, to have a fixed cache expiry time
        return new BufferedCache( $bufferedCache, $wgFlowCacheTime );
 };
+
+$c['memcache.buffered'] = function( $c ) {
+       global $wgFlowCacheTime;
+
+       // This is the real buffered cached that will allow transactional-like 
cache
+       $bufferedCache = new Flow\Data\BagOStuff\BufferedBagOStuff( 
$c['memcache'] );
+
+       // This is Flow's wrapper around it, to have a fixed cache expiry time
+       return new BufferedCache( $bufferedCache, $wgFlowCacheTime );
+};
+
 // Batched username loader
 $c['repository.username.query'] = function( $c ) {
        return new Flow\Repository\UserName\TwoStepUserNameQuery(
@@ -173,7 +187,7 @@
 };
 $c['storage.workflow.indexes.primary'] = function( $c ) {
        return new UniqueFeatureIndex(
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['storage.workflow.backend'],
                'flow_workflow:v2:pk',
                $c['storage.workflow.primary_key']
@@ -181,7 +195,7 @@
 };
 $c['storage.workflow.indexes.title_lookup'] = function( $c ) {
        return new TopKIndex(
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['storage.workflow.backend'],
                'flow_workflow:title:v2:',
                array( 'workflow_wiki', 'workflow_namespace', 
'workflow_title_text', 'workflow_type' ),
@@ -252,7 +266,7 @@
 };
 $c['storage.board_history.indexes.primary'] = function( $c ) {
        return new BoardHistoryIndex(
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                // backend storage
                $c['storage.board_history.backend'],
                // key prefix
@@ -338,7 +352,7 @@
 };
 $c['storage.header.indexes.primary'] = function( $c ) {
        return new UniqueFeatureIndex(
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['storage.header.backend'],
                'flow_header:v2:pk',
                $c['storage.header.primary_key']
@@ -346,7 +360,7 @@
 };
 $c['storage.header.indexes.topic_lookup'] = function( $c ) {
        return new TopKIndex(
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['storage.header.backend'],
                'flow_header:workflow',
                array( 'rev_type_id' ),
@@ -416,7 +430,7 @@
 };
 $c['storage.post_summary.indexes.primary'] = function( $c ) {
        return new UniqueFeatureIndex(
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['storage.post_summary.backend'],
                'flow_post_summary:v2:pk',
                $c['storage.post_summary.primary_key']
@@ -424,7 +438,7 @@
 };
 $c['storage.post_summary.indexes.topic_lookup'] = function( $c ) {
        return new TopKIndex(
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['storage.post_summary.backend'],
                'flow_post_summary:workflow',
                array( 'rev_type_id' ),
@@ -482,7 +496,7 @@
 // Lookup from topic_id to its owning board id
 $c['storage.topic_list.indexes.primary'] = function( $c ) {
        return new UniqueFeatureIndex(
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['storage.topic_list.backend'],
                'flow_topic_list:topic',
                array( 'topic_id' )
@@ -493,7 +507,7 @@
 /// In reverse order by topic_id
 $c['storage.topic_list.indexes.reverse_lookup'] = function( $c ) {
        return new TopicListTopKIndex(
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['storage.topic_list.backend'],
                'flow_topic_list:list',
                array( 'topic_list_id' ),
@@ -503,7 +517,7 @@
 /// In reverse order by topic last_updated
 $c['storage.topic_list.indexes.last_updated'] = function( $c ) {
        return new TopicListTopKIndex(
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['storage.topic_list.indexes.last_updated.backend'],
                'flow_topic_list_last_updated:list',
                array( 'topic_list_id' ),
@@ -595,7 +609,7 @@
 };
 $c['storage.post.indexes.primary'] = function( $c ) {
        return new UniqueFeatureIndex(
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['storage.post.backend'],
                'flow_revision:v4:pk',
                $c['storage.post.primary_key']
@@ -604,7 +618,7 @@
 // Each bucket holds a list of revisions in a single post
 $c['storage.post.indexes.post_lookup'] = function( $c ) {
        return new TopKIndex(
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['storage.post.backend'],
                'flow_revision:descendant',
                array( 'rev_type_id' ),
@@ -646,7 +660,7 @@
 };
 $c['storage.topic_history.indexes.primary'] = function( $c ) {
        return new UniqueFeatureIndex(
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['storage.topic_history.backend'],
                'flow_revision:v4:pk',
                $c['storage.topic_history.primary_key']
@@ -654,7 +668,7 @@
 };
 $c['storage.topic_history.indexes.topic_lookup'] = function( $c ) {
        return new TopicHistoryIndex(
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['storage.topic_history.backend'],
                $c['repository.tree'],
                'flow_revision:topic:v2',
@@ -764,7 +778,7 @@
        return new Flow\SubmissionHandler(
                $c['storage'],
                $c['db.factory'],
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['deferred_queue']
        );
 };
@@ -1046,7 +1060,7 @@
 };
 $c['storage.wiki_reference.indexes.source_lookup'] = function( $c ) {
        return new TopKIndex(
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['storage.wiki_reference.backend'],
                'flow_ref:wiki:by-source',
                array(
@@ -1061,7 +1075,7 @@
 };
 $c['storage.wiki_reference.indexes.revision_lookup'] = function( $c ) {
        return new TopKIndex(
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['storage.wiki_reference.backend'],
                'flow_ref:wiki:by-revision:v2',
                array(
@@ -1113,7 +1127,7 @@
 
 $c['storage.url_reference.indexes.revision_lookup'] = function( $c ) {
        return new TopKIndex(
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['storage.url_reference.backend'],
                'flow_ref:url:by-source',
                array(
@@ -1128,7 +1142,7 @@
 };
 $c['storage.url_reference.indexes.source_lookup'] = function( $c ) {
        return new TopKIndex(
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['storage.url_reference.backend'],
                'flow_ref:url:by-revision:v2',
                array(
@@ -1230,7 +1244,7 @@
 $c['board_mover'] = function( $c ) {
        return new Flow\BoardMover(
                $c['db.factory'],
-               $c['memcache.buffered'],
+               $c['memcache.local_buffered'],
                $c['storage'],
                $c['occupation_controller']->getTalkpageManager()
        );
diff --git a/includes/Data/BagOStuff/LocalBufferedBagOStuff.php 
b/includes/Data/BagOStuff/LocalBufferedBagOStuff.php
index 948b67a..22ae4ac 100755
--- a/includes/Data/BagOStuff/LocalBufferedBagOStuff.php
+++ b/includes/Data/BagOStuff/LocalBufferedBagOStuff.php
@@ -36,6 +36,10 @@
 
        protected function clearLocal() {
                // contrary to BufferedBagOStuff, don't clear $this->bag
+               //
+               // TODO: Reconsider this, or at least bound $this->bag somehow? 
 This
+               // *might* be acceptable for web requests, but $this->bag 
growing
+               // unbounded can easily cause problems for batch.
                $this->buffer = array();
                $this->committed = array();
        }
diff --git a/includes/Formatter/AbstractQuery.php 
b/includes/Formatter/AbstractQuery.php
index c506ccc..5c0cd36 100644
--- a/includes/Formatter/AbstractQuery.php
+++ b/includes/Formatter/AbstractQuery.php
@@ -30,6 +30,8 @@
         */
        protected $treeRepository;
 
+       // Consider converting these in-process caches to MapCacheLRU to avoid
+       // memory leaks.  Should only be an issue if a batch is repeatedly 
doing queries.
        /**
         * @var UUID[] Associative array of post ID to root post's UUID object.
         */
diff --git a/includes/Import/Importer.php b/includes/Import/Importer.php
index 9b13ef9..0f09951 100644
--- a/includes/Import/Importer.php
+++ b/includes/Import/Importer.php
@@ -282,12 +282,29 @@
        /**
         * Gets the given object from storage
         *
+        * WARNING: Before calling this method, ensure that you follow the rule
+        * given in clearManagerGroup.
+        *
         * @param  string $type Class name to retrieve
         * @param  UUID   $id   ID of the object to retrieve
         * @return Object|false
         */
        public function get( $type, UUID $id ) {
                return $this->storage->get( $type, $id );
+       }
+
+       /**
+        * Clears information about which objects are loaded, to avoid memory 
leaks.
+        * This will also:
+        * * Clear the mapper associated with each ObjectManager that has been 
used.
+        * * Trigger onAfterClear on any listeners.
+        *
+        * WARNING: You can *NOT* call ->get before calling clearManagerGroup, 
then ->put
+        * after calling clearManagerGroup, on the same object.  This will 
cause a
+        * duplicate object to be inserted.
+        */
+       public function clearManagerGroup() {
+               $this->storage->clear();
        }
 
        /**
@@ -589,6 +606,8 @@
                                $this->importTopic( $topicState, $topic );
                                $state->commit();
                                $state->postprocessor->afterTopicImported( 
$topicState, $topic );
+                               $state->clearManagerGroup();
+
                                $imported++;
                        } catch ( ImportSourceStoreException $e ) {
                                // errors from the source store are more 
serious and shuld
diff --git a/includes/Model/WikiReference.php b/includes/Model/WikiReference.php
index 43c4f08..0fdae05 100644
--- a/includes/Model/WikiReference.php
+++ b/includes/Model/WikiReference.php
@@ -72,8 +72,14 @@
        }
 
        /**
-        * Many loaded references typically point to the same Title, cache 
those instead
-        * of generating a bunch of duplicate title classes.
+        * Gets a title give a namespace number and title text
+        *
+        * Many loaded references typically point to the same Title, so we 
cache those
+        * instead of generating a bunch of duplicate title classes.
+        *
+        * @param int $namespace Namespace number
+        * @param string $title Title text
+        * @return Title
         */
        public static function makeTitle( $namespace, $title ) {
                try {
diff --git a/includes/Repository/UserNameBatch.php 
b/includes/Repository/UserNameBatch.php
index e3a86ee..bcd1e4d 100644
--- a/includes/Repository/UserNameBatch.php
+++ b/includes/Repository/UserNameBatch.php
@@ -12,6 +12,9 @@
  * Batch together queries for a bunch of wiki+userid -> username
  */
 class UserNameBatch {
+       // Maximum number of usernames to cache for each wiki
+       const USERNAMES_PER_WIKI = 250;
+
        /**
         * @var UserName\UserNameQuery
         */
@@ -23,7 +26,8 @@
        protected $queued = array();
 
        /**
-        * @var array[] 2-d map from wiki id and user id to display username or 
false
+        * @var array Map from wiki id to MapCacheLRU.  MapCacheLRU is a map of 
user ID (as
+        *  string, though to PHP it's the same anyway...) to username.
         */
        protected $usernames = array();
 
@@ -35,6 +39,17 @@
                $this->query = $query;
                foreach ( $queued as $wiki => $userIds ) {
                        $this->queued[$wiki] = array_map( 'intval', $userIds );
+               }
+       }
+
+       /*
+        * Make sure the LRU for the given wiki is in place.
+        *
+        * @param string $wiki Wiki identifier
+        */
+       protected function ensureLRU( $wiki ) {
+               if ( !isset( $this->usernames[$wiki] ) ) {
+                       $this->usernames[$wiki] = new MapCacheLRU( 
self::USERNAMES_PER_WIKI );
                }
        }
 
@@ -50,9 +65,11 @@
         */
        public function add( $wiki, $userId, $userName = null ) {
                $userId = (int)$userId;
+
+               $this->ensureLRU( $wiki );
                if ( $userName !== null ) {
-                       $this->usernames[$wiki][$userId] = $userName;
-               } elseif ( !isset( $this->usernames[$wiki][$userId] ) ) {
+                       $this->usernames[$wiki]->set( (string) $userId, 
$userName );
+               } elseif ( !$this->usernames[$wiki]->has( (string) $userId ) ) {
                        $this->queued[$wiki][] = $userId;
                }
        }
@@ -79,11 +96,13 @@
                if ( $userId === 0 ) {
                        return $userIp;
                }
-               if ( !isset( $this->usernames[$wiki][$userId] ) ) {
+
+               $this->ensureLRU( $wiki );
+               if ( !$this->usernames[$wiki]->has( (string) $userId ) ) {
                        $this->queued[$wiki][] = $userId;
                        $this->resolve( $wiki );
                }
-               return $this->usernames[$wiki][$userId];
+               return $this->usernames[$wiki]->get( (string) $userId );
        }
 
        /**
@@ -105,15 +124,19 @@
                }
                $queued = array_unique( $this->queued[$wiki] );
                if ( isset( $this->usernames[$wiki] ) ) {
-                       $queued = array_diff( $queued, array_keys( 
$this->usernames[$wiki] ) );
+                       $queued = array_diff( $queued, 
$this->usernames[$wiki]->getAllKeys() );
+               } else {
+                       $this->ensureLRU( $wiki );
                }
+
                $res = $this->query->execute( $wiki, $queued );
                unset( $this->queued[$wiki] );
                if ( $res ) {
                        $usernames = array();
                        foreach ( $res as $row ) {
                                $id = (int)$row->user_id;
-                               $this->usernames[$wiki][$id] = $usernames[$id] 
= $row->user_name;
+                               $usernames[$id] = $row->user_name;
+                               $this->usernames[$wiki]->set( (string) $id, 
$row->user_name );
                        }
                        $this->resolveUserPages( $wiki, $usernames );
                        $missing = array_diff( $queued, array_keys( $usernames 
) );
@@ -121,7 +144,7 @@
                        $missing = $queued;
                }
                foreach ( $missing as $id ) {
-                       $this->usernames[$wiki][$id] = false;
+                       $this->usernames[$wiki]->set( (string) $id, false );
                }
        }
 
diff --git a/includes/Templating.php b/includes/Templating.php
index e3351da..2b71ae6 100644
--- a/includes/Templating.php
+++ b/includes/Templating.php
@@ -92,6 +92,7 @@
                        throw new PermissionException( 'Insufficient 
permissions to see userlinks for rev_id = ' . 
$revision->getRevisionId()->getAlphadecimal() );
                }
 
+               // Convert to use MapCacheLRU?
                // if this specific revision is moderated, its usertext can 
always be
                // displayed, since it will be the moderator user
                static $cache;

-- 
To view, visit https://gerrit.wikimedia.org/r/230723
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I988dff81de9e32da1470a89cc7cc5dfc07ef82e8
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Flow
Gerrit-Branch: master
Gerrit-Owner: Mattflaschen <mflasc...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to