http://www.mediawiki.org/wiki/Special:Code/MediaWiki/62754
Revision: 62754 Author: than4213 Date: 2010-02-21 01:00:36 +0000 (Sun, 21 Feb 2010) Log Message: ----------- Created parse sequences and split up the parsing of templates to there different parts Modified Paths: -------------- branches/parser-work/phase3/includes/parser/ParseTree.php branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php Modified: branches/parser-work/phase3/includes/parser/ParseTree.php =================================================================== --- branches/parser-work/phase3/includes/parser/ParseTree.php 2010-02-20 20:21:06 UTC (rev 62753) +++ branches/parser-work/phase3/includes/parser/ParseTree.php 2010-02-21 01:00:36 UTC (rev 62754) @@ -4,9 +4,15 @@ * Interface for Parse Object each with a specialized task while parsing * @ingroup Parser */ -interface ParseObject { +abstract class ParseObject { + protected $mName; + + function __construct($name) { + $this->mName = $name; + } + // Does the parse task specific to each parse object - function parse(&$text, &$rules, $endTag = NULL); + abstract function parse(&$text, &$rules, $endTag = NULL); } /** @@ -18,36 +24,35 @@ * mChildRule - What Parse rule to use to gather children for this element * @ingroup Parser */ -class ParseRule implements ParseObject { - private $mName, $mBeginTag, $mEndTag, $mChildRule; +class ParseRule extends ParseObject { + private $mBeginTag, $mChildRule, $mEndTag; - function __construct($name, $beginTag, $endTag = NULL, $childRule = NULL) { - $this->mName = $name; + function __construct($name, $beginTag, $childRule = NULL, $endTag = NULL) { + parent::__construct($name); $this->mBeginTag = $beginTag; + $this->mChildRule = $childRule; $this->mEndTag = $endTag; - $this->mChildRule = $childRule; } function parse(&$text, &$rules, $endTag = NULL) { if (! preg_match($this->mBeginTag, $text, $matches)) { return NULL; } - $newText = substr($text, strlen($matches[0])); - $children = NULL; + $text = substr($text, strlen($matches[0])); + $child = NULL; if ($this->mChildRule != NULL) { - $endTag = $this->mEndTag; - if ($endTag != NULL) { + if ($this->mEndTag != NULL) { + $endTag = $this->mEndTag; foreach ($matches as $i => $crrnt) { $endTag = str_replace('~' . $i, $crrnt, $endTag); } } - $children = $rules[$this->mChildRule]->parse($newText, $rules, $endTag); - if ($children == NULL) { + $child = $rules[$this->mChildRule]->parse($text, $rules, $endTag); + if ($child == NULL) { return NULL; } } - $text = $newText; - return new ParseTree($this->mName, $matches, $children); + return new ParseTree($this->mName, $matches, array($child)); } } @@ -61,11 +66,11 @@ * mMaxChildren - Maximum amount of children for this rule, 0 means unlimited * @ingroup Parser */ -class ParseQuant implements ParseObject { - private $mName, $mChildRule, $mEndTag, $mMinChildren, $mMaxChildren; +class ParseQuant extends ParseObject { + private $mChildRule, $mEndTag, $mMinChildren, $mMaxChildren; function __construct($name, $childRule, $endTag = NULL, $minChildren = 0, $maxChildren = 0) { - $this->mName = $name; + parent::__construct($name); $this->mChildRule = $childRule; $this->mEndTag = $endTag; $this->mMinChildren = $minChildren; @@ -73,17 +78,21 @@ } function parse(&$text, &$rules, $endTag = NULL) { + $endRegEx = $this->mEndTag; + if ($this->mEndTag != NULL && $endTag != NULL) { + $endRegEx = str_replace('~r', $endTag, $this->mEndTag); + } $children = array(); - for ($i = 0; $i < $minChildren || (($this->mEndTag == NULL || ! preg_match($this->mEndTag, $text, $matches)) && - ($endTag == NULL || ! preg_match($endTag, $text, $matches)) && ($maxChildren <= 0 || $i < $maxChildren)); $i ++) { + for ($i = 0; $i < $this->mMinChildren || (($endRegEx == NULL || ! preg_match($endRegEx, $text, $matches)) && + ($this->mMaxChildren <= 0 || $i < $this->mMaxChildren)); $i ++) { $child = $rules[$this->mChildRule]->parse($text, $rules, $endTag); if ($child == NULL) { return NULL; } $children[] = $child; } - if ($endTag != NULL) { - if (!isset($matches[0])) { + if ($this->mEndTag != NULL) { + if (! isset($matches[0])) { return NULL; } $text = substr($text, strlen($matches[0])); @@ -97,18 +106,21 @@ * mList - The list of rules * @ingroup Parser */ -class ParseList implements ParseObject { +class ParseList extends ParseObject { private $mList; - function __construct($list) { + function __construct($name, $list) { + parent::__construct($name); $this->mList = $list; } function parse(&$text, &$rules, $endTag = NULL) { foreach ($this->mList as $crrnt) { - $child = $rules[$crrnt]->parse($text, $rules, $endTag); + $newText = $text; + $child = $rules[$crrnt]->parse($newText, $rules, $endTag); if ($child != NULL) { - return $child; + $text = $newText; + return new ParseTree($this->mName, NULL, array($child)); } } return NULL; @@ -116,6 +128,33 @@ } /** + * Contains a sequence of rules all of which must pass + * mName - The name to give the resultant ParseTree object + * mList - The sequence of rules + * @ingroup Parser + */ +class ParseSeq extends ParseObject { + private $mList; + + function __construct($name, $list) { + parent::__construct($name); + $this->mList = $list; + } + + function parse(&$text, &$rules, $endTag = NULL) { + $children = array(); + foreach ($this->mList as $crrnt) { + $child = $rules[$crrnt]->parse($text, $rules, $endTag); + if ($child == NULL) { + return NULL; + } + $children[] = $child; + } + return new ParseTree($this->mName, NULL, $children); + } +} + +/** * The parse tree of the data. * printTree translates the parse tree to xml, eventually this should be seperated into a data and engine layer. * mName - Indicates what ParseRule was used to create this node @@ -170,50 +209,18 @@ } $retString = "<" . $this->mName . ">" . $retString . "</" . $this->mName . ">"; } elseif ($this->mName == "link") { - $retString = htmlspecialchars($this->mMatches[0]) . $this->mChildren->printTree() . "]]"; + $retString = htmlspecialchars($this->mMatches[0]) . $this->mChildren[0]->printTree() . "]]"; } elseif ($this->mName == "h") { - $retString = "<h>" . htmlspecialchars($this->mMatches[2]) . $this->mChildren->printTree() . + $retString = "<h>" . htmlspecialchars($this->mMatches[2]) . $this->mChildren[0]->printTree() . htmlspecialchars($this->mMatches[2]) . "</h>"; if ($this->mMatches[1] == "\n") { $retString = "\n" . $retString; } - } elseif ($this->mName == "template" || $this->mName == "tplarg") { - $retString = "<" . $this->mName . ">" . $this->mChildren->printTree() . "</" . $this->mName . ">"; - } elseif ($this->mName == "templatequant") { - $inTitle = true; - $foundEquals = false; - $currentItem = ""; - $this->mChildren[] = new ParseTree("pipe", NULL, NULL); - foreach ($this->mChildren as $crrnt) { - if ($crrnt->getName() == "pipe") { - if ($inTitle) { - $retString .= "<title>" . $currentItem . "</title>"; - $inTitle = false; - } else { - if (! $foundEquals) { - $retString .= "<part>"; - } - $retString .= "<value>" . $currentItem . "</value></part>"; - $foundEquals = false; - } - $currentItem = ""; - } elseif ($crrnt->getName() == "equals") { - if (! $inTitle && ! $foundEquals) { - $retString .= "<part><name>" . $currentItem . "</name>"; - $foundEquals = true; - $currentItem = ""; - } else { - $currentItem .= "="; - } - } else { - $currentItem .= $crrnt->printTree(); - } - } } else { foreach ($this->mChildren as $crrnt) { $retString .= $crrnt->printTree(); } - if ($this->mName == "root") { + if ($this->mName != "unnamed") { $retString = "<" . $this->mName . ">" . $retString . "</" . $this->mName . ">"; } } Modified: branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php =================================================================== --- branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php 2010-02-20 20:21:06 UTC (rev 62753) +++ branches/parser-work/phase3/includes/parser/Preprocessor_DOM.php 2010-02-21 01:00:36 UTC (rev 62754) @@ -70,10 +70,11 @@ // To XML $xmlishRegex = implode('|', $this->parser->getStripList()); $rules = array( - "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', '/^}}/s', "TemplateQuant"), - "TplArg" => new ParseRule("tplarg", '/^{{{/s', '/^}}}/s', "TemplateQuant"), - "Link" => new ParseRule("link", '/^\[\[/s', '/^]]/s', "MainQuant"), - "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', '/^~2(?=(?: *<!--.*?-->)*(?:\n|$))/s', "MainQuant"), + "Template" => new ParseRule("template", '/^{{(?!{[^{])/s', "TemplateSeq", '}}'), + "TplArg" => new ParseRule("tplarg", '/^{{{/s', "TemplateSeq", '}}}'), + "TplPart" => new ParseRule("part", '/^\|/s', "TplPartList"), + "Link" => new ParseRule("link", '/^\[\[/s', "MainQuant", ']]'), + "Heading" => new ParseRule("h", '/^(\n|~BOF)(={1,6})/s', "HeadingQuant", '~2'), "CommentLine" => new ParseRule("commentline", '/^\n((?:<!--.*?-->\n)+)/s'), "Comment" => new ParseRule("comment", '/^<!--.*?(?:-->|$)/s'), "OnlyInclude" => new ParseRule("ignore", '/^<\/?onlyinclude>/s'), @@ -83,13 +84,18 @@ "XmlOpened" => new ParseRule("ext", '/^<(' . $xmlishRegex . ')(.*?)>(.*?)(<\/\1>)/si'), "BeginFile" => new ParseRule("bof", '/^~BOF/s'), "MainText" => new ParseRule("text", '/^.[^{}\[\]<\n|=]*/s'), - "TplPipe" => new ParseRule("pipe", '/^\|/s'), - "TplEquals" => new ParseRule("equals", '/^=/s'), "Root" => new ParseQuant("root", "MainList", '/^$/'), - "MainQuant" => new ParseQuant("mainquant", "MainList"), - "TemplateQuant" => new ParseQuant("templatequant", "TemplateList"), - "MainList" => new ParseList(array("Template", "TplArg", "Link", "Heading", "CommentLine", "Comment", "OnlyInclude", "NoInclude", "IncludeOnly", "XmlClosed", "XmlOpened", "BeginFile", "MainText")), - "TemplateList" => new ParseList(array("TplPipe", "TplEquals", "MainList"))); + "MainQuant" => new ParseQuant("unnamed", "MainList", '/^~r/s'), + "HeadingQuant" => new ParseQuant("unnamed", "MainList", '/^~r(?=(?: *<!--.*?-->)*(?:\n|$))/s'), + "TplTitle" => new ParseQuant("title", "MainList", '/^(?=~r|\|)/s'), + "TplPartQuant" => new ParseQuant("unnamed", "TplPart", '/^~r/s'), + "TplTest" => new ParseQuant("unnamed", "MainList", '/^(?=~r|\||=(?!~r|\|))/s'), + "TplName" => new ParseQuant("name", "TplTest", '/^=/s', 0, 1), + "TplValue" => new ParseQuant("value", "MainList", '/^(?=~r|\|)/s'), + "MainList" => new ParseList("unnamed", array("Template", "TplArg", "Link", "Heading", "CommentLine", "Comment", "OnlyInclude", "NoInclude", "IncludeOnly", "XmlClosed", "XmlOpened", "BeginFile", "MainText")), + "TplPartList" => new ParseList("unnamed", array("TplPartSeq", "TplValue")), + "TemplateSeq" => new ParseSeq("unnamed", array("TplTitle", "TplPartQuant")), + "TplPartSeq" => new ParseSeq("unnamed", array("TplName", "TplValue"))); if ($flags & Parser::PTD_FOR_INCLUSION) { $rules["OnlyInclude"] = new ParseRule("ignore", '/^<\/onlyinclude>.*?(?:<onlyinclude>|$)/s'); $rules["NoInclude"] = new ParseRule("ignore", '/^<noinclude>.*?<\/noinclude>/s'); _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs