Lucas Werkmeister (WMDE) has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/379222 )

Change subject: Use per-regex cache map to cache regex check results
......................................................................

Use per-regex cache map to cache regex check results

Instead of caching each regex check result under an individual,
per-regex and per-text key, we cache an array for each regex, where we
cache results for the least recently used texts. The size of that cache
map is configurable with the WBQualityConstraintsFormatCacheMapSize
option, defaulting to 100 entries.

The code in SparqlHelper is mostly taken from this Phabricator comment:
https://phabricator.wikimedia.org/T173696#3620307

Bug: T173696
Change-Id: I5e9fff029010551736c2733dcb93d9b33b110381
---
M extension.json
M includes/ConstraintCheck/Helper/SparqlHelper.php
2 files changed, 61 insertions(+), 7 deletions(-)


  git pull 
ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/WikibaseQualityConstraints
 refs/changes/22/379222/1

diff --git a/extension.json b/extension.json
index f5f0a56..f079926 100644
--- a/extension.json
+++ b/extension.json
@@ -137,6 +137,11 @@
                        "description": "Whether or not to check the 'format' 
constraint. If this flag is set to false, any check of the 'format' constraint 
will return a 'todo' status with the 'wbqc-violation-message-security-reason' 
message.",
                        "public": true
                },
+               "WBQualityConstraintsFormatCacheMapSize": {
+                       "value": 100,
+                       "description": "Size of the per-regex cache map for 
format/regex check results. For each regex, up to this many values will have 
cached whether they match the regex or not, on a least-recently-used basis.",
+                       "public": true
+               },
                "WBQualityConstraintsInstanceOfId": {
                        "value": "P31",
                        "description": "The property ID of the 'instance of' 
property (data type: item), which specifies the class(es) of an item.",
diff --git a/includes/ConstraintCheck/Helper/SparqlHelper.php 
b/includes/ConstraintCheck/Helper/SparqlHelper.php
index 5bb6e48..a4ed411 100644
--- a/includes/ConstraintCheck/Helper/SparqlHelper.php
+++ b/includes/ConstraintCheck/Helper/SparqlHelper.php
@@ -188,26 +188,75 @@
         */
        public function matchesRegularExpression( $text, $regex ) {
                // caching wrapper around matchesRegularExpressionWithSparql
-               return (bool)$this->cache->getWithSetCallback(
+
+               $regexHash = hash( 'sha256', $regex );
+               $textHash = hash( 'sha256', $text );
+               $cacheMapSize = $this->config->get( 
'WBQualityConstraintsFormatCacheMapSize' );
+
+               $cacheMap = $this->cache->getWithSetCallback(
                        $this->cache->makeKey(
                                'WikibaseQualityConstraints', // extension
                                'regex', // action
                                'WDQS-Java', // regex flavor
-                               hash( 'sha256', $regex ),
-                               hash( 'sha256', $text )
+                               $regexHash
                        ),
                        WANObjectCache::TTL_DAY,
-                       function() use ( $text, $regex ) {
-                               $this->dataFactory->increment( 
'wikibase.quality.constraints.regex.cachemiss' );
-                               // convert to int because boolean false is 
interpreted as value not found
-                               return 
(int)$this->matchesRegularExpressionWithSparql( $text, $regex );
+                       function( $curCacheMap ) use ( $text, $regex, 
$textHash, $regexHash, $cacheMapSize ) {
+                               $this->dataFactory->increment( 
'wikibase.quality.constraints.regex.cache.refresh' );
+                               // Initialize the cache map if not set
+                               if ( $curCacheMap === false ) {
+                                       $curCacheMap = [];
+                               }
+                               // Refresh triggered by hotTTR...
+                               // Add regex-text check result to top of the 
LRU map if present
+                               if ( isset( $curCacheMap[$textHash] ) ) {
+                                       $this->dataFactory->increment( 
'wikibase.quality.constraints.regex.cache.refresh.hit' );
+                                       return [ $textHash => 
$curCacheMap[$textHash] ] + $curCacheMap;
+                               }
+                               $this->dataFactory->increment( 
'wikibase.quality.constraints.regex.cache.refresh.miss' );
+                               // Get the regex-text check result
+                               $value = 
$this->matchesRegularExpressionWithSparql( $text, $regex );
+                               // Add regex-text check to the bottom 3/8s of 
the LRU map
+                               $index = intval( count( $curCacheMap ) * 5/8 );
+                               $newCacheMap = [];
+                               $pos = 0;
+                               foreach ( $curCacheMap as $k => $v ) {
+                                       if ( $pos == $index ) {
+                                               $newCacheMap[$textHash] = 
$value; // inject
+                                               ++$pos;
+                                       }
+                                       if ( $pos++ >= $cacheMapSize ) {
+                                               break; // prune to size
+                                       }
+                                       $newCacheMap[$k] = $v; // preserve
+                               }
+                               if ( !array_key_exists( $textHash, $newCacheMap 
) &&
+                                       count( $newCacheMap ) < $cacheMapSize ) 
{
+                                       // injection failed, e. g. due to empty 
$curCacheMap
+                                       $newCacheMap[$textHash] = $value;
+                               }
+
+                               return $newCacheMap;
                        },
                        [
+                               // Once map is > 1 sec old, consider refreshing
+                               'ageNew' => 1,
+                               // Increase likelihood of refresh to certainty 
once 1 minute old;
+                               // the most common keys are more likely to 
trigger this
+                               'hotTTR' => 60,
                                // avoid querying cache servers multiple times 
in a request
                                // (e. g. when checking format of a reference 
URL used multiple times on an entity)
                                'pcTTL' => WANObjectCache::TTL_PROC_LONG,
                        ]
                );
+
+               if ( isset( $cacheMap[$textHash] ) ) {
+                       $this->dataFactory->increment( 
'wikibase.quality.constraints.regex.cache.hit' );
+                       return $cacheMap[$textHash];
+               } else {
+                       $this->dataFactory->increment( 
'wikibase.quality.constraints.regex.cache.miss' );
+                       return $this->matchesRegularExpressionWithSparql( 
$text, $regex );
+               }
        }
 
        /**

-- 
To view, visit https://gerrit.wikimedia.org/r/379222
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I5e9fff029010551736c2733dcb93d9b33b110381
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/WikibaseQualityConstraints
Gerrit-Branch: master
Gerrit-Owner: Lucas Werkmeister (WMDE) <lucas.werkmeis...@wikimedia.de>
Gerrit-Reviewer: Aaron Schulz <asch...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to