Lucas Werkmeister (WMDE) has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/379222 )
Change subject: Use per-regex cache map to cache regex check results ...................................................................... Use per-regex cache map to cache regex check results Instead of caching each regex check result under an individual, per-regex and per-text key, we cache an array for each regex, where we cache results for the least recently used texts. The size of that cache map is configurable with the WBQualityConstraintsFormatCacheMapSize option, defaulting to 100 entries. The code in SparqlHelper is mostly taken from this Phabricator comment: https://phabricator.wikimedia.org/T173696#3620307 Bug: T173696 Change-Id: I5e9fff029010551736c2733dcb93d9b33b110381 --- M extension.json M includes/ConstraintCheck/Helper/SparqlHelper.php 2 files changed, 61 insertions(+), 7 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/WikibaseQualityConstraints refs/changes/22/379222/1 diff --git a/extension.json b/extension.json index f5f0a56..f079926 100644 --- a/extension.json +++ b/extension.json @@ -137,6 +137,11 @@ "description": "Whether or not to check the 'format' constraint. If this flag is set to false, any check of the 'format' constraint will return a 'todo' status with the 'wbqc-violation-message-security-reason' message.", "public": true }, + "WBQualityConstraintsFormatCacheMapSize": { + "value": 100, + "description": "Size of the per-regex cache map for format/regex check results. For each regex, up to this many values will have cached whether they match the regex or not, on a least-recently-used basis.", + "public": true + }, "WBQualityConstraintsInstanceOfId": { "value": "P31", "description": "The property ID of the 'instance of' property (data type: item), which specifies the class(es) of an item.", diff --git a/includes/ConstraintCheck/Helper/SparqlHelper.php b/includes/ConstraintCheck/Helper/SparqlHelper.php index 5bb6e48..a4ed411 100644 --- a/includes/ConstraintCheck/Helper/SparqlHelper.php +++ b/includes/ConstraintCheck/Helper/SparqlHelper.php @@ -188,26 +188,75 @@ */ public function matchesRegularExpression( $text, $regex ) { // caching wrapper around matchesRegularExpressionWithSparql - return (bool)$this->cache->getWithSetCallback( + + $regexHash = hash( 'sha256', $regex ); + $textHash = hash( 'sha256', $text ); + $cacheMapSize = $this->config->get( 'WBQualityConstraintsFormatCacheMapSize' ); + + $cacheMap = $this->cache->getWithSetCallback( $this->cache->makeKey( 'WikibaseQualityConstraints', // extension 'regex', // action 'WDQS-Java', // regex flavor - hash( 'sha256', $regex ), - hash( 'sha256', $text ) + $regexHash ), WANObjectCache::TTL_DAY, - function() use ( $text, $regex ) { - $this->dataFactory->increment( 'wikibase.quality.constraints.regex.cachemiss' ); - // convert to int because boolean false is interpreted as value not found - return (int)$this->matchesRegularExpressionWithSparql( $text, $regex ); + function( $curCacheMap ) use ( $text, $regex, $textHash, $regexHash, $cacheMapSize ) { + $this->dataFactory->increment( 'wikibase.quality.constraints.regex.cache.refresh' ); + // Initialize the cache map if not set + if ( $curCacheMap === false ) { + $curCacheMap = []; + } + // Refresh triggered by hotTTR... + // Add regex-text check result to top of the LRU map if present + if ( isset( $curCacheMap[$textHash] ) ) { + $this->dataFactory->increment( 'wikibase.quality.constraints.regex.cache.refresh.hit' ); + return [ $textHash => $curCacheMap[$textHash] ] + $curCacheMap; + } + $this->dataFactory->increment( 'wikibase.quality.constraints.regex.cache.refresh.miss' ); + // Get the regex-text check result + $value = $this->matchesRegularExpressionWithSparql( $text, $regex ); + // Add regex-text check to the bottom 3/8s of the LRU map + $index = intval( count( $curCacheMap ) * 5/8 ); + $newCacheMap = []; + $pos = 0; + foreach ( $curCacheMap as $k => $v ) { + if ( $pos == $index ) { + $newCacheMap[$textHash] = $value; // inject + ++$pos; + } + if ( $pos++ >= $cacheMapSize ) { + break; // prune to size + } + $newCacheMap[$k] = $v; // preserve + } + if ( !array_key_exists( $textHash, $newCacheMap ) && + count( $newCacheMap ) < $cacheMapSize ) { + // injection failed, e. g. due to empty $curCacheMap + $newCacheMap[$textHash] = $value; + } + + return $newCacheMap; }, [ + // Once map is > 1 sec old, consider refreshing + 'ageNew' => 1, + // Increase likelihood of refresh to certainty once 1 minute old; + // the most common keys are more likely to trigger this + 'hotTTR' => 60, // avoid querying cache servers multiple times in a request // (e. g. when checking format of a reference URL used multiple times on an entity) 'pcTTL' => WANObjectCache::TTL_PROC_LONG, ] ); + + if ( isset( $cacheMap[$textHash] ) ) { + $this->dataFactory->increment( 'wikibase.quality.constraints.regex.cache.hit' ); + return $cacheMap[$textHash]; + } else { + $this->dataFactory->increment( 'wikibase.quality.constraints.regex.cache.miss' ); + return $this->matchesRegularExpressionWithSparql( $text, $regex ); + } } /** -- To view, visit https://gerrit.wikimedia.org/r/379222 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I5e9fff029010551736c2733dcb93d9b33b110381 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/WikibaseQualityConstraints Gerrit-Branch: master Gerrit-Owner: Lucas Werkmeister (WMDE) <lucas.werkmeis...@wikimedia.de> Gerrit-Reviewer: Aaron Schulz <asch...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits