Subramanya Sastry has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/161108

Change subject: WIP: Make table_lines return tokens after each row.
......................................................................

WIP: Make table_lines return tokens after each row.

* Still some failing tests ... productions need addtl. tweaking.

* This doesn't seem to affect parsability of the large table.
  I can see the HTML tree builder getting the stream of processed
  tokens (try parsing with --trace html). So, I suspect we have to
  look elsewhere to fix memory issues. I was able to parse about
  1800+ lines earlier in 50+ secs and the same is true even now.
  Nothing has changed. When I go up to 2200+ lines, things get
  realllly slow. Let me run it with --trace html next to see
  what comes up.

Change-Id: I84aab52a04269ce58c274ad3f279600b358e8e14
---
M lib/pegTokenizer.pegjs.txt
1 file changed, 43 insertions(+), 29 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/services/parsoid 
refs/changes/08/161108/1

diff --git a/lib/pegTokenizer.pegjs.txt b/lib/pegTokenizer.pegjs.txt
index 4b63049..e558e88 100644
--- a/lib/pegTokenizer.pegjs.txt
+++ b/lib/pegTokenizer.pegjs.txt
@@ -849,14 +849,14 @@
 // This production is identical to the 'inline' fragment except
 // that tables are allowed inside image captions.
 link_text_fragment
-  = c:((sol table_lines) / urltext
-          / (!inline_breaks
-              !pre_start
-              r:( inline_element / '[' text_char+ ']' / . ) {
-                  return r;
-              })
+  = c:((sol full_table)
+       / urltext
+       / (!inline_breaks
+          !pre_start
+          r:( inline_element / '[' text_char+ ']' / . ) { return r; }
+         )
     )+ {
-      //console.warn('inline out:' + pp(c));
+      // console.warn('inline out:' + 
JSON.stringify(tu.flatten_stringlist(c)));
       return tu.flatten_stringlist( c );
   }
 
@@ -1511,14 +1511,14 @@
  * Fürstenberg.
  */
 hacky_dl_uses = bullets:":"+
-               c:table_lines
+               tbl:full_table
                s:space* // Do we really need to RT this?
                &comment_space_eolf
 {
     // Leave bullets as an array -- list handler expects this
     var li = new TagTk( 'listItem', [], { tsr: [peg$reportedPos, 
peg$reportedPos + bullets.length] }  );
     li.bullets = bullets;
-    return tu.flattenIfArray([li, c || [], s || []]);
+    return tu.flattenIfArray([li, tbl || [], s || []]);
 }
 
 dtdd
@@ -1566,37 +1566,52 @@
  * directly to block_lines.
  *********************************************************************/
 
+full_table
+  = // & { console.warn('enter table_lines: ' + input.substr(peg$currPos, 
20)); return true; }
+    (! inline_breaks / & '{{!}}' )
+    r:(
+        & { return stops.push('table', true); }
+        tbl:(
+            table_start_tag optionalNewlines
+            (sol table_content_line optionalNewlines)*
+            sol table_end_tag)
+        {
+            stops.pop('table');
+            return tbl;
+        }
+      / & { return stops.pop('table'); }
+    ) { return r; }
+
 table_lines
-  = //& { console.warn('enter table_lines: ' + input.substr(peg$currPos, 20)); 
return true; }
+  = // & { console.warn('enter table_lines: ' + input.substr(peg$currPos, 
20)); return true; }
     (! inline_breaks / & '{{!}}' )
     r:(
         & { return stops.push('table', true); }
         tl:table_line
-        tls:(
-            nls:optionalNewlines
-            s:sol
-            tl2:table_line { return nls.concat(s, tl2); }
-        )*
+        nls:optionalNewlines
         {
             stops.pop('table');
             //console.warn('table_lines: ' + pp(tl.concat(tls)));
-            return tl.concat( tls );
+            return tl.concat(nls);
         }
       / & { return stops.pop('table'); }
     ) { return r; }
 
 // This production assumes start-of-line position!
 table_line
-  = (space / comment)* (table_start_tag
-  / table_heading_tags
-  / table_row_tag
-  / table_data_tags
-  / table_caption_tag
-  / table_end_tag)
+  = table_start_tag
+  / table_content_line
+  / table_end_tag
 
+table_content_line = (space / comment)* (
+    table_heading_tags
+    / table_row_tag
+    / table_data_tags
+    / table_caption_tag
+  )
 
 table_start_tag
-  = b:"{" p:pipe
+  = sc:(space / comment)* b:"{" p:pipe
     // ok to normalize away stray |} on rt (see bug 57360)
     & { return stops.push('table', false); }
     ta:(generic_attribute / broken_table_attribute_name_char)*
@@ -1613,7 +1628,7 @@
             tblStart.attribs = ta;
         }
 
-        return [tblStart];
+        return sc.concat([tblStart]);
     }
 
 table_caption_tag
@@ -1627,9 +1642,8 @@
             .concat([new EndTagTk('caption')]);
     }
 
-
 table_row_tag
-  = //& { console.warn("table row enter @" + input.substr(peg$currPos, 30)); 
return true; }
+  = // & { console.warn("table row enter @" + input.substr(peg$currPos, 30)); 
return true; }
     // avoid recursion via nested_block_in_table
     ! { return stops.onStack('tableDataBlock'); }
     p:pipe dashes:"-"+
@@ -1696,7 +1710,7 @@
     }
 
 table_data_tag
-  = //& { dp("table_data enter, pos=" + peg$currPos + 
input.substr(peg$currPos,10)); return true; }
+  = // & { console.warn("table_data_tag enter @" + input.substr(peg$currPos, 
30)); return true; }
     ! "}"
     arg:row_syntax_table_args?
     //& { console.warn("past attrib, pos=" + peg$currPos + 
input.substr(peg$currPos,10)); return true; }
@@ -1740,14 +1754,14 @@
     / & { return stops.pop('th'); }
 
 table_end_tag
-  = space* p:pipe b:"}" {
+  = sc:(space / comment)* p:pipe b:"}" {
       var tblEnd = new EndTagTk( 'table', [], { tsr: [peg$reportedPos, 
peg$currPos] } );
       if (p !== "|") {
           // p+"<brace-char>" is triggering some bug in pegJS
           // I cannot even use that expression in the comment!
           tblEnd.dataAttribs.endTagSrc = p+b;
       }
-      return [ tblEnd ];
+      return sc.concat([tblEnd]);
   }
 
 /**

-- 
To view, visit https://gerrit.wikimedia.org/r/161108
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I84aab52a04269ce58c274ad3f279600b358e8e14
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/services/parsoid
Gerrit-Branch: master
Gerrit-Owner: Subramanya Sastry <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to