http://www.mediawiki.org/wiki/Special:Code/MediaWiki/62754

Revision: 62754
Author:   than4213
Date:     2010-02-21 01:00:36 +0000 (Sun, 21 Feb 2010)

Log Message:
-----------
Created parse sequences and split up the parsing of templates to there 
different parts

Modified Paths:
--------------
    branches/parser-work/phase3/includes/parser/ParseTree.php
    branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php

Modified: branches/parser-work/phase3/includes/parser/ParseTree.php
===================================================================
--- branches/parser-work/phase3/includes/parser/ParseTree.php   2010-02-20 
20:21:06 UTC (rev 62753)
+++ branches/parser-work/phase3/includes/parser/ParseTree.php   2010-02-21 
01:00:36 UTC (rev 62754)
@@ -4,9 +4,15 @@
  * Interface for Parse Object each with a specialized task while parsing
  * @ingroup Parser
  */
-interface ParseObject {
+abstract class ParseObject {
+       protected $mName;
+
+       function __construct($name) {
+               $this->mName = $name;
+       }
+
        // Does the parse task specific to each parse object
-       function parse(&$text, &$rules, $endTag = NULL);
+       abstract function parse(&$text, &$rules, $endTag = NULL);
 }
 
 /**
@@ -18,36 +24,35 @@
  * mChildRule - What Parse rule to use to gather children for this element
  * @ingroup Parser
  */
-class ParseRule implements ParseObject {
-       private $mName, $mBeginTag, $mEndTag, $mChildRule;
+class ParseRule extends ParseObject {
+       private $mBeginTag, $mChildRule, $mEndTag;
 
-       function __construct($name, $beginTag, $endTag = NULL, $childRule = 
NULL) {
-               $this->mName = $name;
+       function __construct($name, $beginTag, $childRule = NULL, $endTag = 
NULL) {
+               parent::__construct($name);
                $this->mBeginTag = $beginTag;
+               $this->mChildRule = $childRule;
                $this->mEndTag = $endTag;
-               $this->mChildRule = $childRule;
        }
 
        function parse(&$text, &$rules, $endTag = NULL) {
                if (! preg_match($this->mBeginTag, $text, $matches)) {
                        return NULL;
                }
-               $newText = substr($text, strlen($matches[0]));
-               $children = NULL;
+               $text = substr($text, strlen($matches[0]));
+               $child = NULL;
                if ($this->mChildRule != NULL) {
-                       $endTag = $this->mEndTag;
-                       if ($endTag != NULL) {
+                       if ($this->mEndTag != NULL) {
+                               $endTag = $this->mEndTag;
                                foreach ($matches as $i => $crrnt) {
                                        $endTag = str_replace('~' . $i, $crrnt, 
$endTag);
                                }
                        }
-                       $children = $rules[$this->mChildRule]->parse($newText, 
$rules, $endTag);
-                       if ($children == NULL) {
+                       $child = $rules[$this->mChildRule]->parse($text, 
$rules, $endTag);
+                       if ($child == NULL) {
                                return NULL;
                        }
                }
-               $text = $newText;
-               return new ParseTree($this->mName, $matches, $children);
+               return new ParseTree($this->mName, $matches, array($child));
        }
 }
 
@@ -61,11 +66,11 @@
  * mMaxChildren - Maximum amount of children for this rule, 0 means unlimited
  * @ingroup Parser
  */
-class ParseQuant implements ParseObject {
-       private $mName, $mChildRule, $mEndTag, $mMinChildren, $mMaxChildren;
+class ParseQuant extends ParseObject {
+       private $mChildRule, $mEndTag, $mMinChildren, $mMaxChildren;
 
        function __construct($name, $childRule, $endTag = NULL, $minChildren = 
0, $maxChildren = 0) {
-               $this->mName = $name;
+               parent::__construct($name);
                $this->mChildRule = $childRule;
                $this->mEndTag = $endTag;
                $this->mMinChildren = $minChildren;
@@ -73,17 +78,21 @@
        }
 
        function parse(&$text, &$rules, $endTag = NULL) {
+               $endRegEx = $this->mEndTag;
+               if ($this->mEndTag != NULL && $endTag != NULL) {
+                       $endRegEx = str_replace('~r', $endTag, $this->mEndTag);
+               }
                $children = array();
-               for ($i = 0; $i < $minChildren || (($this->mEndTag == NULL || ! 
preg_match($this->mEndTag, $text, $matches)) &&
-                       ($endTag == NULL || ! preg_match($endTag, $text, 
$matches)) && ($maxChildren <= 0 || $i < $maxChildren)); $i ++) {
+               for ($i = 0; $i < $this->mMinChildren || (($endRegEx == NULL || 
! preg_match($endRegEx, $text, $matches)) && 
+                       ($this->mMaxChildren <= 0 || $i < 
$this->mMaxChildren)); $i ++) {
                        $child = $rules[$this->mChildRule]->parse($text, 
$rules, $endTag);
                        if ($child == NULL) {
                                return NULL;
                        }
                        $children[] = $child;
                }
-               if ($endTag != NULL) {
-                       if (!isset($matches[0])) {
+               if ($this->mEndTag != NULL) {
+                       if (! isset($matches[0])) {
                                return NULL;
                        }
                        $text = substr($text, strlen($matches[0]));
@@ -97,18 +106,21 @@
  * mList - The list of rules
  * @ingroup Parser
  */
-class ParseList implements ParseObject {
+class ParseList extends ParseObject {
        private $mList;
 
-       function __construct($list) {
+       function __construct($name, $list) {
+               parent::__construct($name);
                $this->mList = $list;
        }
 
        function parse(&$text, &$rules, $endTag = NULL) {
                foreach ($this->mList as $crrnt) {
-                       $child = $rules[$crrnt]->parse($text, $rules, $endTag);
+                       $newText = $text;
+                       $child = $rules[$crrnt]->parse($newText, $rules, 
$endTag);
                        if ($child != NULL) {
-                               return $child;
+                               $text = $newText;
+                               return new ParseTree($this->mName, NULL, 
array($child));
                        }
                }
                return NULL;
@@ -116,6 +128,33 @@
 }
 
 /**
+ * Contains a sequence of rules all of which must pass
+ * mName - The name to give the resultant ParseTree object
+ * mList - The sequence of rules
+ * @ingroup Parser
+ */
+class ParseSeq extends ParseObject {
+       private $mList;
+
+       function __construct($name, $list) {
+               parent::__construct($name);
+               $this->mList = $list;
+       }
+
+       function parse(&$text, &$rules, $endTag = NULL) {
+               $children = array();
+               foreach ($this->mList as $crrnt) {
+                       $child = $rules[$crrnt]->parse($text, $rules, $endTag);
+                       if ($child == NULL) {
+                               return NULL;
+                       }
+                       $children[] = $child;
+               }
+               return new ParseTree($this->mName, NULL, $children);
+       }
+}
+
+/**
  * The parse tree of the data.
  * printTree translates the parse tree to xml, eventually this should be 
seperated into a data and engine layer.
  * mName - Indicates what ParseRule was used to create this node
@@ -170,50 +209,18 @@
                        }
                        $retString = "<" . $this->mName . ">" . $retString . 
"</" . $this->mName . ">";
                } elseif ($this->mName == "link") {
-                       $retString = htmlspecialchars($this->mMatches[0]) . 
$this->mChildren->printTree() . "]]";
+                       $retString = htmlspecialchars($this->mMatches[0]) . 
$this->mChildren[0]->printTree() . "]]";
                } elseif ($this->mName == "h") {
-                       $retString = "<h>" . 
htmlspecialchars($this->mMatches[2]) . $this->mChildren->printTree() . 
+                       $retString = "<h>" . 
htmlspecialchars($this->mMatches[2]) . $this->mChildren[0]->printTree() . 
                                htmlspecialchars($this->mMatches[2]) . "</h>";
                        if ($this->mMatches[1] == "\n") {
                                $retString = "\n" . $retString;
                        }
-               } elseif ($this->mName == "template" || $this->mName == 
"tplarg") {
-                       $retString = "<" . $this->mName . ">" . 
$this->mChildren->printTree() . "</" . $this->mName . ">";
-               } elseif ($this->mName == "templatequant") {
-                       $inTitle = true;
-                       $foundEquals = false;
-                       $currentItem = "";
-                       $this->mChildren[] = new ParseTree("pipe", NULL, NULL);
-                       foreach ($this->mChildren as $crrnt) {
-                               if ($crrnt->getName() == "pipe") {
-                                       if ($inTitle) {
-                                               $retString .= "<title>" . 
$currentItem . "</title>";
-                                               $inTitle = false;
-                                       } else {
-                                               if (! $foundEquals) {
-                                                       $retString .= "<part>";
-                                               }
-                                               $retString .= "<value>" . 
$currentItem . "</value></part>";
-                                               $foundEquals = false;
-                                       }
-                                       $currentItem = "";
-                               } elseif ($crrnt->getName() == "equals") {
-                                       if (! $inTitle && ! $foundEquals) {
-                                               $retString .= "<part><name>" . 
$currentItem . "</name>";
-                                               $foundEquals = true;
-                                               $currentItem = "";
-                                       } else {
-                                               $currentItem .= "=";
-                                       }
-                               } else {
-                                       $currentItem .= $crrnt->printTree();
-                               }
-                       }
                } else {
                        foreach ($this->mChildren as $crrnt) {
                                $retString .= $crrnt->printTree();
                        }
-                       if ($this->mName == "root") {
+                       if ($this->mName != "unnamed") {
                                $retString = "<" . $this->mName . ">" . 
$retString . "</" . $this->mName . ">";
                        }
                }

Modified: branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php
===================================================================
--- branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php    
2010-02-20 20:21:06 UTC (rev 62753)
+++ branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php    
2010-02-21 01:00:36 UTC (rev 62754)
@@ -70,10 +70,11 @@
                // To XML
                $xmlishRegex = implode('|', $this->parser->getStripList());
                $rules = array(
-                       "Template" => new ParseRule("template", 
'/^{{(?!{[^{])/s', '/^}}/s', "TemplateQuant"),
-                       "TplArg" => new ParseRule("tplarg", '/^{{{/s', 
'/^}}}/s', "TemplateQuant"),
-                       "Link" => new ParseRule("link", '/^\[\[/s', '/^]]/s', 
"MainQuant"),
-                       "Heading" => new ParseRule("h", 
'/^(\n|~BOF)(={1,6})/s', '/^~2(?=(?: *<!--.*?-->)*(?:\n|$))/s', "MainQuant"),
+                       "Template" => new ParseRule("template", 
'/^{{(?!{[^{])/s', "TemplateSeq", '}}'),
+                       "TplArg" => new ParseRule("tplarg", '/^{{{/s', 
"TemplateSeq", '}}}'),
+                       "TplPart" => new ParseRule("part", '/^\|/s', 
"TplPartList"),
+                       "Link" => new ParseRule("link", '/^\[\[/s', 
"MainQuant", ']]'),
+                       "Heading" => new ParseRule("h", 
'/^(\n|~BOF)(={1,6})/s', "HeadingQuant", '~2'),
                        "CommentLine" => new ParseRule("commentline", 
'/^\n((?:<!--.*?-->\n)+)/s'),
                        "Comment" => new ParseRule("comment", 
'/^<!--.*?(?:-->|$)/s'),
                        "OnlyInclude" => new ParseRule("ignore", 
'/^<\/?onlyinclude>/s'),
@@ -83,13 +84,18 @@
                        "XmlOpened" => new ParseRule("ext", '/^<(' . 
$xmlishRegex . ')(.*?)>(.*?)(<\/\1>)/si'),
                        "BeginFile" => new ParseRule("bof", '/^~BOF/s'),
                        "MainText" => new ParseRule("text", 
'/^.[^{}\[\]<\n|=]*/s'),
-                       "TplPipe" => new ParseRule("pipe", '/^\|/s'),
-                       "TplEquals" => new ParseRule("equals", '/^=/s'),
                        "Root" => new ParseQuant("root", "MainList", '/^$/'),
-                       "MainQuant" => new ParseQuant("mainquant", "MainList"),
-                       "TemplateQuant" => new ParseQuant("templatequant", 
"TemplateList"),
-                       "MainList" => new ParseList(array("Template", "TplArg", 
"Link", "Heading", "CommentLine", "Comment",                                    
"OnlyInclude", "NoInclude", "IncludeOnly", "XmlClosed", "XmlOpened", 
"BeginFile", "MainText")),
-                       "TemplateList" => new ParseList(array("TplPipe", 
"TplEquals", "MainList")));
+                       "MainQuant" => new ParseQuant("unnamed", "MainList", 
'/^~r/s'),
+                       "HeadingQuant" => new ParseQuant("unnamed", "MainList", 
'/^~r(?=(?: *<!--.*?-->)*(?:\n|$))/s'),
+                       "TplTitle" => new ParseQuant("title", "MainList", 
'/^(?=~r|\|)/s'),
+                       "TplPartQuant" => new ParseQuant("unnamed", "TplPart", 
'/^~r/s'),
+                       "TplTest" => new ParseQuant("unnamed", "MainList", 
'/^(?=~r|\||=(?!~r|\|))/s'),
+                       "TplName" => new ParseQuant("name", "TplTest", '/^=/s', 
0, 1),
+                       "TplValue" => new ParseQuant("value", "MainList", 
'/^(?=~r|\|)/s'),
+                       "MainList" => new ParseList("unnamed", 
array("Template", "TplArg", "Link", "Heading", "CommentLine", "Comment",        
                                 "OnlyInclude", "NoInclude", "IncludeOnly", 
"XmlClosed", "XmlOpened", "BeginFile", "MainText")),
+                       "TplPartList" => new ParseList("unnamed", 
array("TplPartSeq", "TplValue")),
+                       "TemplateSeq" => new ParseSeq("unnamed", 
array("TplTitle", "TplPartQuant")),
+                       "TplPartSeq" => new ParseSeq("unnamed", 
array("TplName", "TplValue")));
                if ($flags & Parser::PTD_FOR_INCLUSION) {
                        $rules["OnlyInclude"] = new ParseRule("ignore", 
'/^<\/onlyinclude>.*?(?:<onlyinclude>|$)/s');
                        $rules["NoInclude"] = new ParseRule("ignore", 
'/^<noinclude>.*?<\/noinclude>/s');



_______________________________________________
MediaWiki-CVS mailing list
MediaWiki-CVS@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to