Author: kn
Date: Tue Feb 26 15:55:48 2008
New Revision: 7455
Log:
- Parse simple paragraphs
Added:
experimental/Document/src/document/rst/nodes/paragraph.php
experimental/Document/tests/files/rst/parser/003_simple_text.rst
experimental/Document/tests/files/rst/parser/003_simple_text.txt
Modified:
experimental/Document/design/class_diagram.png
experimental/Document/src/document/rst/parser.php
experimental/Document/src/document/rst/token.php
experimental/Document/src/document_autoload.php
Modified: experimental/Document/design/class_diagram.png
==============================================================================
Binary files - no diff available.
Added: experimental/Document/src/document/rst/nodes/paragraph.php
==============================================================================
--- experimental/Document/src/document/rst/nodes/paragraph.php (added)
+++ experimental/Document/src/document/rst/nodes/paragraph.php [iso-8859-1] Tue
Feb 26 15:55:48 2008
@@ -1,0 +1,52 @@
+<?php
+/**
+ * File containing the ezcDocumentRstParagraphNode struct
+ *
+ * @package TextLine
+ * @version //autogen//
+ * @copyright Copyright (C) 2005-2008 eZ systems as. All rights reserved.
+ * @license http://ez.no/licenses/new_bsd New BSD License
+ */
+
+/**
+ * The paragraph AST node
+ *
+ * @package TextLine
+ * @version //autogen//
+ * @copyright Copyright (C) 2005-2008 eZ systems as. All rights reserved.
+ * @license http://ez.no/licenses/new_bsd New BSD License
+ */
+class ezcDocumentRstParagraphNode extends ezcDocumentRstNode
+{
+ /**
+ * Construct RST document node
+ *
+ * @param array $nodes
+ * @return void
+ */
+ public function __construct( ezcDocumentRstToken $token )
+ {
+ // Perhaps check, that only node of type section and metadata are
+ // added.
+ parent::__construct( $token, self::PARAGRAPH );
+ }
+
+ /**
+ * Set state after var_export
+ *
+ * @param array $properties
+ * @return void
+ * @ignore
+ */
+ public static function __set_state( $properties )
+ {
+ $node = new ezcDocumentRstParagraphNode(
+ $properties['token']
+ );
+
+ $node->nodes = $properties['nodes'];
+ return $node;
+ }
+}
+
+?>
Modified: experimental/Document/src/document/rst/parser.php
==============================================================================
--- experimental/Document/src/document/rst/parser.php [iso-8859-1] (original)
+++ experimental/Document/src/document/rst/parser.php [iso-8859-1] Tue Feb 26
15:55:48 2008
@@ -64,6 +64,7 @@
ezcDocumentRstToken::WHITESPACE => array(
),
ezcDocumentRstToken::NEWLINE => array(
+ 'shiftParagraph'
),
ezcDocumentRstToken::BACKSLASH => array(
'shiftBackslash',
@@ -97,9 +98,10 @@
* @var array
*/
protected $reductions = array(
- ezcDocumentRstNode::DOCUMENT => 'reduceSection',
- ezcDocumentRstNode::SECTION => 'reduceSection',
- ezcDocumentRstNode::TITLE => 'reduceTitle',
+ ezcDocumentRstNode::DOCUMENT => 'reduceSection',
+ ezcDocumentRstNode::SECTION => 'reduceSection',
+ ezcDocumentRstNode::TITLE => 'reduceTitle',
+ ezcDocumentRstNode::PARAGRAPH => 'reduceParagraph',
);
/**
@@ -145,9 +147,11 @@
*/
public function parse( array $tokens )
{
+ // echo "\n\nStart parser\n============\n\n";
+
while ( ( $token = array_shift( $tokens ) ) !== null )
{
- // echo "[T] Token: {$token->type}\n";
+ // echo "[T] Token: " . ezcDocumentRstToken::getTokenName(
$token->type ) . " ({$token->type})\n";
// First shift given token by the defined reduction methods
foreach ( $this->shifts[$token->type] as $method )
@@ -205,7 +209,7 @@
* @param array $tokens
* @return ezcDocumentRstDocumentNode
*/
- protected function shiftDocument( ezcDocumentRstToken $token, array
$tokens )
+ protected function shiftDocument( ezcDocumentRstToken $token, array
&$tokens )
{
// If there are any tokens left after the end of the file, something
// went seriously wrong in the tokenizer.
@@ -227,7 +231,7 @@
* @param array $tokens
* @return ezcDocumentRstTitleNode
*/
- protected function shiftTitle( ezcDocumentRstToken $token, array $tokens )
+ protected function shiftTitle( ezcDocumentRstToken $token, array &$tokens )
{
if ( ( $token->position !== 1 ) ||
( $tokens[0]->type !== ezcDocumentRstToken::NEWLINE ) )
@@ -262,9 +266,36 @@
* @param array $tokens
* @return ezcDocumentRstTitleNode
*/
- protected function shiftText( ezcDocumentRstToken $token, array $tokens )
+ protected function shiftText( ezcDocumentRstToken $token, array &$tokens )
{
return new ezcDocumentRstTextLineNode(
+ $token
+ );
+ }
+
+ /**
+ * Shift a paragraph node on two newlines
+ *
+ * @param ezcDocumentRstToken $token
+ * @param array $tokens
+ * @return ezcDocumentRstTitleNode
+ */
+ protected function shiftParagraph( ezcDocumentRstToken $token, array
&$tokens )
+ {
+ if ( $tokens[0]->type !== ezcDocumentRstToken::NEWLINE )
+ {
+ // For now we only check for paragraphs closed with two newlines.
+ return false;
+ }
+
+ // Remove all following newlines...
+ while ( isset( $tokens[0] ) &&
+ ( $tokens[0]->type === ezcDocumentRstToken::NEWLINE ) )
+ {
+ array_shift( $tokens );
+ }
+
+ return new ezcDocumentRstParagraphNode(
$token
);
}
@@ -383,7 +414,6 @@
ezcDocumentRstNode::TABLE,
), true ) )
{
- // @TODO: Enhance error message
throw new ezcDocumentRstParserException(
$child->token,
"Unexpected node: " . ezcDocumentRstNode::getTokenName(
$child->type ) . "."
@@ -417,7 +447,10 @@
// If the section level is higher then in our new node and
// lower the the last node, reduce sections.
// echo " -> Reduce section {$child->depth}.";
- $child->nodes = $collected;
+ $child->nodes = array_merge(
+ $child->nodes,
+ $collected
+ );
$collected = array();
}
@@ -429,9 +462,43 @@
$collected[] = $child;
}
- $node->nodes = $collected;
+ $node->nodes = array_merge(
+ $node->nodes,
+ $collected
+ );
array_unshift( $this->documentStack, $node );
}
+
+ /**
+ * Reduce paragraph
+ *
+ * Aggregates all nodes which are allowed as subnodes into a paragraph.
+ *
+ * @param ezcDocumentRstParagraphNode $node
+ * @return void
+ */
+ protected function reduceParagraph( ezcDocumentRstNode $node )
+ {
+ $found = 0;
+
+ // Include all paragraphs, tables, lists and sections with a higher
+ // nesting depth
+ while ( isset( $this->documentStack[0] ) &&
+ in_array( $this->documentStack[0]->type, array(
+ ezcDocumentRstNode::TEXT_LINE,
+ ), true ) )
+ {
+ // echo " - Append text to paragraph\n";
+ array_unshift( $node->nodes, array_shift( $this->documentStack ) );
+ ++$found;
+ }
+
+ if ( $found > 0 )
+ {
+ // echo " - Create paragraph\n";
+ array_unshift( $this->documentStack, $node );
+ }
+ }
}
?>
Modified: experimental/Document/src/document/rst/token.php
==============================================================================
--- experimental/Document/src/document/rst/token.php [iso-8859-1] (original)
+++ experimental/Document/src/document/rst/token.php [iso-8859-1] Tue Feb 26
15:55:48 2008
@@ -77,6 +77,33 @@
}
/**
+ * Get token name from type
+ *
+ * Return a user readable name from the numeric token type.
+ *
+ * @param int $type
+ * @return string
+ */
+ public static function getTokenName( $type )
+ {
+ $names = array(
+ self::WHITESPACE => 'Whitespace',
+ self::NEWLINE => 'Newline',
+ self::BACKSLASH => 'Backslash',
+ self::SPECIAL_CHARS => 'Special character group',
+ self::TEXT_LINE => 'Text',
+ self::EOF => 'End Of File',
+ );
+
+ if ( !isset( $names[$type] ) )
+ {
+ return 'Unknown';
+ }
+
+ return $names[$type];
+ }
+
+ /**
* Set state after var_export
*
* @param array $properties
Modified: experimental/Document/src/document_autoload.php
==============================================================================
--- experimental/Document/src/document_autoload.php [iso-8859-1] (original)
+++ experimental/Document/src/document_autoload.php [iso-8859-1] Tue Feb 26
15:55:48 2008
@@ -37,6 +37,7 @@
'ezcDocumentRst' => 'Document/document/rst.php',
'ezcDocumentRstDocumentNode' =>
'Document/document/rst/nodes/document.php',
'ezcDocumentRstOptions' =>
'Document/options/document_rst.php',
+ 'ezcDocumentRstParagraphNode' =>
'Document/document/rst/nodes/paragraph.php',
'ezcDocumentRstParser' =>
'Document/document/rst/parser.php',
'ezcDocumentRstSectionNode' =>
'Document/document/rst/nodes/section.php',
'ezcDocumentRstTextLineNode' =>
'Document/document/rst/nodes/text_line.php',
Added: experimental/Document/tests/files/rst/parser/003_simple_text.rst
==============================================================================
--- experimental/Document/tests/files/rst/parser/003_simple_text.rst (added)
+++ experimental/Document/tests/files/rst/parser/003_simple_text.rst
[iso-8859-1] Tue Feb 26 15:55:48 2008
@@ -1,0 +1,131 @@
+<?php
+
+return ezcDocumentRstDocumentNode::__set_state(array(
+ 'depth' => 0,
+ 'line' => 0,
+ 'position' => 0,
+ 'type' => 0,
+ 'nodes' =>
+ array (
+ 0 =>
+ ezcDocumentRstSectionNode::__set_state(array(
+ 'title' => 'Main Title',
+ 'depth' => 1,
+ 'line' => 2,
+ 'position' => 1,
+ 'type' => 1,
+ 'nodes' =>
+ array (
+ 0 =>
+ ezcDocumentRstParagraphNode::__set_state(array(
+ 'line' => 5,
+ 'position' => 12,
+ 'type' => 3,
+ 'nodes' =>
+ array (
+ 0 =>
+ ezcDocumentRstTextLineNode::__set_state(array(
+ 'line' => 5,
+ 'position' => 1,
+ 'type' => 4,
+ 'nodes' =>
+ array (
+ ),
+ 'token' =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 5,
+ 'content' => 'Hello world',
+ 'line' => 5,
+ 'position' => 1,
+ )),
+ )),
+ ),
+ 'token' =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 2,
+ 'content' => '
+',
+ 'line' => 5,
+ 'position' => 12,
+ )),
+ )),
+ 1 =>
+ ezcDocumentRstSectionNode::__set_state(array(
+ 'title' => 'Subsection',
+ 'depth' => 2,
+ 'line' => 7,
+ 'position' => 1,
+ 'type' => 1,
+ 'nodes' =>
+ array (
+ 0 =>
+ ezcDocumentRstParagraphNode::__set_state(array(
+ 'line' => 11,
+ 'position' => 20,
+ 'type' => 3,
+ 'nodes' =>
+ array (
+ 0 =>
+ ezcDocumentRstTextLineNode::__set_state(array(
+ 'line' => 10,
+ 'position' => 1,
+ 'type' => 4,
+ 'nodes' =>
+ array (
+ ),
+ 'token' =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 5,
+ 'content' => 'A bit more text, which exceeds one line to
test for a simple paragraph with',
+ 'line' => 10,
+ 'position' => 1,
+ )),
+ )),
+ 1 =>
+ ezcDocumentRstTextLineNode::__set_state(array(
+ 'line' => 11,
+ 'position' => 1,
+ 'type' => 4,
+ 'nodes' =>
+ array (
+ ),
+ 'token' =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 5,
+ 'content' => 'more then one line.',
+ 'line' => 11,
+ 'position' => 1,
+ )),
+ )),
+ ),
+ 'token' =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 2,
+ 'content' => '
+',
+ 'line' => 11,
+ 'position' => 20,
+ )),
+ )),
+ ),
+ 'token' =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 5,
+ 'content' => 'Subsection',
+ 'line' => 7,
+ 'position' => 1,
+ )),
+ )),
+ ),
+ 'token' =>
+ ezcDocumentRstToken::__set_state(array(
+ 'type' => 5,
+ 'content' => 'Main Title',
+ 'line' => 2,
+ 'position' => 1,
+ )),
+ )),
+ ),
+ 'token' => NULL,
+));
+
Added: experimental/Document/tests/files/rst/parser/003_simple_text.txt
==============================================================================
--- experimental/Document/tests/files/rst/parser/003_simple_text.txt (added)
+++ experimental/Document/tests/files/rst/parser/003_simple_text.txt
[iso-8859-1] Tue Feb 26 15:55:48 2008
@@ -1,0 +1,12 @@
+==========
+Main Title
+==========
+
+Hello world
+
+Subsection
+==========
+
+A bit more text, which exceeds one line to test for a simple paragraph with
+more then one line.
+
--
svn-components mailing list
[email protected]
http://lists.ez.no/mailman/listinfo/svn-components