jenkins-bot has submitted this change and it was merged.
Change subject: Eliminate some memory leaks affecting LQT->Flow:
......................................................................
Eliminate some memory leaks affecting LQT->Flow:
* LocalBufferedBagOStuff does not clear its in-process cache
after commit, so have the importer use BufferedBagOStuff instead.
The only downside is that it won't have reads cached in-process
at all, but for the importer's usage pattern this downside seems
unlikely to come up much.
Also rename in container.php for clarity.
* Call ManagerGroup->clear(), which clears all the ObjectManager-s,
their mappers, and triggers onAfterClear.
This requires being very careful to avoid inserting an object
that is already in the database. See clearManagerGroup.
* Change UserNameBatch to use MapCacheLRU and limit to 250 per wiki.
This may not be strictly required for the importer's use case, since
UserNameListener has a onAfterClear listener (which is triggered by
ObjectManager->clear). However, there are other uses of UserNameBatch,
and this adds an extra layer of protection.
* Add some notes about other possible improvements that apparently
wouldn't impact the importer, but might impact other code.
* Some comment updates I noticed along the way.
Bug: T106614
Bug: T108601
Change-Id: I988dff81de9e32da1470a89cc7cc5dfc07ef82e8
(cherry picked from commit d9e250d1339afb064d9b4c92031559067ac65eaf)
---
M container.php
M includes/Data/BagOStuff/LocalBufferedBagOStuff.php
M includes/Formatter/AbstractQuery.php
M includes/Import/Importer.php
M includes/Model/WikiReference.php
M includes/Repository/UserNameBatch.php
M includes/Templating.php
7 files changed, 103 insertions(+), 33 deletions(-)
Approvals:
Mattflaschen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/container.php b/container.php
index 548b562..9bdfdfd 100644
--- a/container.php
+++ b/container.php
@@ -37,7 +37,7 @@
$c['repository.tree'] = function( $c ) {
return new Flow\Repository\TreeRepository(
$c['db.factory'],
- $c['memcache.buffered']
+ $c['memcache.local_buffered']
);
};
@@ -132,14 +132,28 @@
use Flow\Model\PostRevision;
use Flow\Model\PostSummary;
-$c['memcache.buffered'] = function( $c ) {
+// This currently never clears $this->bag, which makes it unusuable for
long-running batch.
+// Use 'memcache.buffered' for those instead.
+$c['memcache.local_buffered'] = function( $c ) {
global $wgFlowCacheTime;
- // This is the real buffered cached that will allow transactional-like
cache
+ // This is the real buffered cached that will allow transactional-like
cache.
+ // It also caches all reads in-memory.
$bufferedCache = new Flow\Data\BagOStuff\LocalBufferedBagOStuff(
$c['memcache'] );
// This is Flow's wrapper around it, to have a fixed cache expiry time
return new BufferedCache( $bufferedCache, $wgFlowCacheTime );
};
+
+$c['memcache.buffered'] = function( $c ) {
+ global $wgFlowCacheTime;
+
+ // This is the real buffered cached that will allow transactional-like
cache
+ $bufferedCache = new Flow\Data\BagOStuff\BufferedBagOStuff(
$c['memcache'] );
+
+ // This is Flow's wrapper around it, to have a fixed cache expiry time
+ return new BufferedCache( $bufferedCache, $wgFlowCacheTime );
+};
+
// Batched username loader
$c['repository.username.query'] = function( $c ) {
return new Flow\Repository\UserName\TwoStepUserNameQuery(
@@ -173,7 +187,7 @@
};
$c['storage.workflow.indexes.primary'] = function( $c ) {
return new UniqueFeatureIndex(
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['storage.workflow.backend'],
'flow_workflow:v2:pk',
$c['storage.workflow.primary_key']
@@ -181,7 +195,7 @@
};
$c['storage.workflow.indexes.title_lookup'] = function( $c ) {
return new TopKIndex(
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['storage.workflow.backend'],
'flow_workflow:title:v2:',
array( 'workflow_wiki', 'workflow_namespace',
'workflow_title_text', 'workflow_type' ),
@@ -252,7 +266,7 @@
};
$c['storage.board_history.indexes.primary'] = function( $c ) {
return new BoardHistoryIndex(
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
// backend storage
$c['storage.board_history.backend'],
// key prefix
@@ -338,7 +352,7 @@
};
$c['storage.header.indexes.primary'] = function( $c ) {
return new UniqueFeatureIndex(
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['storage.header.backend'],
'flow_header:v2:pk',
$c['storage.header.primary_key']
@@ -346,7 +360,7 @@
};
$c['storage.header.indexes.topic_lookup'] = function( $c ) {
return new TopKIndex(
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['storage.header.backend'],
'flow_header:workflow',
array( 'rev_type_id' ),
@@ -416,7 +430,7 @@
};
$c['storage.post_summary.indexes.primary'] = function( $c ) {
return new UniqueFeatureIndex(
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['storage.post_summary.backend'],
'flow_post_summary:v2:pk',
$c['storage.post_summary.primary_key']
@@ -424,7 +438,7 @@
};
$c['storage.post_summary.indexes.topic_lookup'] = function( $c ) {
return new TopKIndex(
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['storage.post_summary.backend'],
'flow_post_summary:workflow',
array( 'rev_type_id' ),
@@ -482,7 +496,7 @@
// Lookup from topic_id to its owning board id
$c['storage.topic_list.indexes.primary'] = function( $c ) {
return new UniqueFeatureIndex(
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['storage.topic_list.backend'],
'flow_topic_list:topic',
array( 'topic_id' )
@@ -493,7 +507,7 @@
/// In reverse order by topic_id
$c['storage.topic_list.indexes.reverse_lookup'] = function( $c ) {
return new TopicListTopKIndex(
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['storage.topic_list.backend'],
'flow_topic_list:list',
array( 'topic_list_id' ),
@@ -503,7 +517,7 @@
/// In reverse order by topic last_updated
$c['storage.topic_list.indexes.last_updated'] = function( $c ) {
return new TopicListTopKIndex(
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['storage.topic_list.indexes.last_updated.backend'],
'flow_topic_list_last_updated:list',
array( 'topic_list_id' ),
@@ -595,7 +609,7 @@
};
$c['storage.post.indexes.primary'] = function( $c ) {
return new UniqueFeatureIndex(
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['storage.post.backend'],
'flow_revision:v4:pk',
$c['storage.post.primary_key']
@@ -604,7 +618,7 @@
// Each bucket holds a list of revisions in a single post
$c['storage.post.indexes.post_lookup'] = function( $c ) {
return new TopKIndex(
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['storage.post.backend'],
'flow_revision:descendant',
array( 'rev_type_id' ),
@@ -646,7 +660,7 @@
};
$c['storage.topic_history.indexes.primary'] = function( $c ) {
return new UniqueFeatureIndex(
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['storage.topic_history.backend'],
'flow_revision:v4:pk',
$c['storage.topic_history.primary_key']
@@ -654,7 +668,7 @@
};
$c['storage.topic_history.indexes.topic_lookup'] = function( $c ) {
return new TopicHistoryIndex(
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['storage.topic_history.backend'],
$c['repository.tree'],
'flow_revision:topic:v2',
@@ -764,7 +778,7 @@
return new Flow\SubmissionHandler(
$c['storage'],
$c['db.factory'],
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['deferred_queue']
);
};
@@ -1046,7 +1060,7 @@
};
$c['storage.wiki_reference.indexes.source_lookup'] = function( $c ) {
return new TopKIndex(
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['storage.wiki_reference.backend'],
'flow_ref:wiki:by-source',
array(
@@ -1061,7 +1075,7 @@
};
$c['storage.wiki_reference.indexes.revision_lookup'] = function( $c ) {
return new TopKIndex(
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['storage.wiki_reference.backend'],
'flow_ref:wiki:by-revision:v2',
array(
@@ -1113,7 +1127,7 @@
$c['storage.url_reference.indexes.revision_lookup'] = function( $c ) {
return new TopKIndex(
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['storage.url_reference.backend'],
'flow_ref:url:by-source',
array(
@@ -1128,7 +1142,7 @@
};
$c['storage.url_reference.indexes.source_lookup'] = function( $c ) {
return new TopKIndex(
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['storage.url_reference.backend'],
'flow_ref:url:by-revision:v2',
array(
@@ -1230,7 +1244,7 @@
$c['board_mover'] = function( $c ) {
return new Flow\BoardMover(
$c['db.factory'],
- $c['memcache.buffered'],
+ $c['memcache.local_buffered'],
$c['storage'],
$c['occupation_controller']->getTalkpageManager()
);
diff --git a/includes/Data/BagOStuff/LocalBufferedBagOStuff.php
b/includes/Data/BagOStuff/LocalBufferedBagOStuff.php
index 948b67a..22ae4ac 100755
--- a/includes/Data/BagOStuff/LocalBufferedBagOStuff.php
+++ b/includes/Data/BagOStuff/LocalBufferedBagOStuff.php
@@ -36,6 +36,10 @@
protected function clearLocal() {
// contrary to BufferedBagOStuff, don't clear $this->bag
+ //
+ // TODO: Reconsider this, or at least bound $this->bag somehow?
This
+ // *might* be acceptable for web requests, but $this->bag
growing
+ // unbounded can easily cause problems for batch.
$this->buffer = array();
$this->committed = array();
}
diff --git a/includes/Formatter/AbstractQuery.php
b/includes/Formatter/AbstractQuery.php
index c506ccc..5c0cd36 100644
--- a/includes/Formatter/AbstractQuery.php
+++ b/includes/Formatter/AbstractQuery.php
@@ -30,6 +30,8 @@
*/
protected $treeRepository;
+ // Consider converting these in-process caches to MapCacheLRU to avoid
+ // memory leaks. Should only be an issue if a batch is repeatedly
doing queries.
/**
* @var UUID[] Associative array of post ID to root post's UUID object.
*/
diff --git a/includes/Import/Importer.php b/includes/Import/Importer.php
index 9b13ef9..0f09951 100644
--- a/includes/Import/Importer.php
+++ b/includes/Import/Importer.php
@@ -282,12 +282,29 @@
/**
* Gets the given object from storage
*
+ * WARNING: Before calling this method, ensure that you follow the rule
+ * given in clearManagerGroup.
+ *
* @param string $type Class name to retrieve
* @param UUID $id ID of the object to retrieve
* @return Object|false
*/
public function get( $type, UUID $id ) {
return $this->storage->get( $type, $id );
+ }
+
+ /**
+ * Clears information about which objects are loaded, to avoid memory
leaks.
+ * This will also:
+ * * Clear the mapper associated with each ObjectManager that has been
used.
+ * * Trigger onAfterClear on any listeners.
+ *
+ * WARNING: You can *NOT* call ->get before calling clearManagerGroup,
then ->put
+ * after calling clearManagerGroup, on the same object. This will
cause a
+ * duplicate object to be inserted.
+ */
+ public function clearManagerGroup() {
+ $this->storage->clear();
}
/**
@@ -589,6 +606,8 @@
$this->importTopic( $topicState, $topic );
$state->commit();
$state->postprocessor->afterTopicImported(
$topicState, $topic );
+ $state->clearManagerGroup();
+
$imported++;
} catch ( ImportSourceStoreException $e ) {
// errors from the source store are more
serious and shuld
diff --git a/includes/Model/WikiReference.php b/includes/Model/WikiReference.php
index 43c4f08..59d4c25 100644
--- a/includes/Model/WikiReference.php
+++ b/includes/Model/WikiReference.php
@@ -72,8 +72,14 @@
}
/**
- * Many loaded references typically point to the same Title, cache
those instead
- * of generating a bunch of duplicate title classes.
+ * Gets a Title given a namespace number and title text
+ *
+ * Many loaded references typically point to the same Title, so we
cache those
+ * instead of generating a bunch of duplicate title classes.
+ *
+ * @param int $namespace Namespace number
+ * @param string $title Title text
+ * @return Title
*/
public static function makeTitle( $namespace, $title ) {
try {
diff --git a/includes/Repository/UserNameBatch.php
b/includes/Repository/UserNameBatch.php
index e3a86ee..7df7eeb 100644
--- a/includes/Repository/UserNameBatch.php
+++ b/includes/Repository/UserNameBatch.php
@@ -6,12 +6,16 @@
namespace Flow\Repository;
use Flow\Model\UserTuple;
+use MapCacheLRU;
use User;
/**
* Batch together queries for a bunch of wiki+userid -> username
*/
class UserNameBatch {
+ // Maximum number of usernames to cache for each wiki
+ const USERNAMES_PER_WIKI = 250;
+
/**
* @var UserName\UserNameQuery
*/
@@ -23,7 +27,8 @@
protected $queued = array();
/**
- * @var array[] 2-d map from wiki id and user id to display username or
false
+ * @var array Map from wiki id to MapCacheLRU. MapCacheLRU is a map of
user ID (as
+ * string, though to PHP it's the same anyway...) to username.
*/
protected $usernames = array();
@@ -35,6 +40,17 @@
$this->query = $query;
foreach ( $queued as $wiki => $userIds ) {
$this->queued[$wiki] = array_map( 'intval', $userIds );
+ }
+ }
+
+ /*
+ * Make sure the LRU for the given wiki is in place.
+ *
+ * @param string $wiki Wiki identifier
+ */
+ protected function ensureLRU( $wiki ) {
+ if ( !isset( $this->usernames[$wiki] ) ) {
+ $this->usernames[$wiki] = new MapCacheLRU(
self::USERNAMES_PER_WIKI );
}
}
@@ -50,9 +66,11 @@
*/
public function add( $wiki, $userId, $userName = null ) {
$userId = (int)$userId;
+
+ $this->ensureLRU( $wiki );
if ( $userName !== null ) {
- $this->usernames[$wiki][$userId] = $userName;
- } elseif ( !isset( $this->usernames[$wiki][$userId] ) ) {
+ $this->usernames[$wiki]->set( (string) $userId,
$userName );
+ } elseif ( !$this->usernames[$wiki]->has( (string) $userId ) ) {
$this->queued[$wiki][] = $userId;
}
}
@@ -79,11 +97,13 @@
if ( $userId === 0 ) {
return $userIp;
}
- if ( !isset( $this->usernames[$wiki][$userId] ) ) {
+
+ $this->ensureLRU( $wiki );
+ if ( !$this->usernames[$wiki]->has( (string) $userId ) ) {
$this->queued[$wiki][] = $userId;
$this->resolve( $wiki );
}
- return $this->usernames[$wiki][$userId];
+ return $this->usernames[$wiki]->get( (string) $userId );
}
/**
@@ -105,15 +125,19 @@
}
$queued = array_unique( $this->queued[$wiki] );
if ( isset( $this->usernames[$wiki] ) ) {
- $queued = array_diff( $queued, array_keys(
$this->usernames[$wiki] ) );
+ $queued = array_diff( $queued,
$this->usernames[$wiki]->getAllKeys() );
+ } else {
+ $this->ensureLRU( $wiki );
}
+
$res = $this->query->execute( $wiki, $queued );
unset( $this->queued[$wiki] );
if ( $res ) {
$usernames = array();
foreach ( $res as $row ) {
$id = (int)$row->user_id;
- $this->usernames[$wiki][$id] = $usernames[$id]
= $row->user_name;
+ $usernames[$id] = $row->user_name;
+ $this->usernames[$wiki]->set( (string) $id,
$row->user_name );
}
$this->resolveUserPages( $wiki, $usernames );
$missing = array_diff( $queued, array_keys( $usernames
) );
@@ -121,7 +145,7 @@
$missing = $queued;
}
foreach ( $missing as $id ) {
- $this->usernames[$wiki][$id] = false;
+ $this->usernames[$wiki]->set( (string) $id, false );
}
}
diff --git a/includes/Templating.php b/includes/Templating.php
index e3351da..2b71ae6 100644
--- a/includes/Templating.php
+++ b/includes/Templating.php
@@ -92,6 +92,7 @@
throw new PermissionException( 'Insufficient
permissions to see userlinks for rev_id = ' .
$revision->getRevisionId()->getAlphadecimal() );
}
+ // Convert to use MapCacheLRU?
// if this specific revision is moderated, its usertext can
always be
// displayed, since it will be the moderator user
static $cache;
--
To view, visit https://gerrit.wikimedia.org/r/230929
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I988dff81de9e32da1470a89cc7cc5dfc07ef82e8
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Flow
Gerrit-Branch: wmf/1.26wmf18
Gerrit-Owner: Mattflaschen <[email protected]>
Gerrit-Reviewer: Mattflaschen <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits