jenkins-bot has submitted this change and it was merged.

Change subject: Some more tweaks to the parsing limits patch
......................................................................


Some more tweaks to the parsing limits patch

* Enforce resource limits in emitChunk because the
  backtracking cache is enabled selectively in the tokenizer.
* Reduce list item default limit to 30K
* Increase transclusion limit to 10K.

Change-Id: Ib21b152b239b1262778aa9dcb0081ceaccf35867
---
M lib/config/ParsoidConfig.js
M lib/wt2html/pegTokenizer.pegjs.txt
M lib/wt2html/tokenizer.utils.js
3 files changed, 33 insertions(+), 14 deletions(-)

Approvals:
  Arlolra: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/lib/config/ParsoidConfig.js b/lib/config/ParsoidConfig.js
index 976042a..4a5182a 100644
--- a/lib/config/ParsoidConfig.js
+++ b/lib/config/ParsoidConfig.js
@@ -77,22 +77,23 @@
                parsoidCacheReq: 0,
        },
 
-       // SSS FIXME: Somewhat arbitrary numbers for starters.
-       // If these limits are breached, we return a http 500.
+       // Somewhat arbitrary numbers for starters.
+       // If these limits are breached, we return a http 413 (Payload too 
large)
        limits: {
                wt2html: {
                        // We won't handle pages beyond this size
                        maxWikitextSize: 1000000, // 1M
 
                        // Max list items per page
-                       maxListItems: 40000,
+                       maxListItems: 30000,
 
                        // Max table cells per page
                        maxTableCells: 30000,
 
                        // Max transclusions per page
-                       maxTransclusions: 5000,
+                       maxTransclusions: 10000,
 
+                       // DISABLED for now
                        // Max images per page
                        maxImages: 1000,
 
diff --git a/lib/wt2html/pegTokenizer.pegjs.txt 
b/lib/wt2html/pegTokenizer.pegjs.txt
index 9524262..07e5c4a 100644
--- a/lib/wt2html/pegTokenizer.pegjs.txt
+++ b/lib/wt2html/pegTokenizer.pegjs.txt
@@ -50,12 +50,20 @@
         // Shift tsr of all tokens by the pipeline offset
         Util.shiftTokenTSR(tokens, options.pipelineOffset);
         env.log("trace/peg", options.pegTokenizer.pipelineId, "---->  ", 
tokens);
+
+        var i;
+        var n = tokens.length;
+
+        // Enforce parsing resource limits
+        for (i = 0; i < n; i++) {
+            tu.enforceParserResourceLimits(env, tokens[i]);
+        }
+
         // limit the size of individual chunks
         var chunkLimit = 100000;
-        if (tokens.length > chunkLimit) {
-            var i = 0;
-            var l = tokens.length;
-            while (i < l) {
+        if (n > chunkLimit) {
+            i = 0;
+            while (i < n) {
                 options.cb(tokens.slice(i, i + chunkLimit));
                 i += chunkLimit;
             }
@@ -658,7 +666,6 @@
       // again.
       params.unshift(new KV(tu.flattenIfArray(target.tokens), '', 
target.srcOffsets));
       var obj = new SelfclosingTagTk('template', params, { tsr: tsrOffsets(), 
src: text() });
-      env.bumpParserResourceUse('transclusion');
       return obj;
     }
 
@@ -1394,7 +1401,6 @@
     var tsr = tsrOffsets('start');
     tsr[1] += bullets.length;
     var li = new TagTk('listItem', [], { tsr: tsr });
-    env.bumpParserResourceUse('listItem');
     li.bullets = bullets;
     return [ li, c ];
 }
@@ -1416,7 +1422,6 @@
     var tsr = tsrOffsets('start');
     tsr[1] += bullets.length;
     var li = new TagTk('listItem', [], { tsr: tsr });
-    env.bumpParserResourceUse('listItem');
     li.bullets = bullets;
     return tu.flattenIfArray([li, tbl || [], s || []]);
 }
@@ -1442,7 +1447,6 @@
         li1.bullets.push(";");
         // TSR: -1 for the intermediate ":"
         var li2 = new TagTk('listItem', [], { tsr: [cpos - 1, cpos], stx: 
'row' });
-        env.bumpParserResourceUse('listItem', 2);
         li2.bullets = bullets.slice();
         li2.bullets.push(":");
 
@@ -1672,7 +1676,6 @@
     tagEndPos:("" { return endOffset(); })
     td:nested_block_in_table*
     {
-        env.bumpParserResourceUse('tableCell');
         return tu.buildTableTokens("td", "|", arg, [startOffset(), tagEndPos], 
endOffset(), td);
     }
 
@@ -1711,7 +1714,6 @@
       }
       return true;
     } d:nested_block_in_table { return d; } )* {
-        env.bumpParserResourceUse('tableCell');
         return tu.buildTableTokens("th", "!", arg, [startOffset(), tagEndPos], 
endOffset(), c);
     }
 
diff --git a/lib/wt2html/tokenizer.utils.js b/lib/wt2html/tokenizer.utils.js
index 986454c..e746129 100644
--- a/lib/wt2html/tokenizer.utils.js
+++ b/lib/wt2html/tokenizer.utils.js
@@ -306,6 +306,22 @@
                }
        },
 
+       enforceParserResourceLimits: function(env, token) {
+               if (token && (token.constructor === TagTk || token.constructor 
=== SelfclosingTagTk)) {
+                       switch (token.name) {
+                               case 'listItem':
+                                       env.bumpParserResourceUse('listItem');
+                                       break;
+                               case 'template':
+                                       
env.bumpParserResourceUse('transclusion');
+                                       break;
+                               case 'td':
+                               case 'th':
+                                       env.bumpParserResourceUse('tableCell');
+                                       break;
+                       }
+               }
+       },
 };
 
 

-- 
To view, visit https://gerrit.wikimedia.org/r/258469
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ib21b152b239b1262778aa9dcb0081ceaccf35867
Gerrit-PatchSet: 3
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <ssas...@wikimedia.org>
Gerrit-Reviewer: Arlolra <abrea...@wikimedia.org>
Gerrit-Reviewer: Cscott <canan...@wikimedia.org>
Gerrit-Reviewer: Subramanya Sastry <ssas...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to