Author: kn
Date: Fri Feb 8 12:35:13 2008
New Revision: 7322
Log:
- Added tokenizer test infrastructure
- Added basic tokenizer
- Tokinizer works for empty files! ;)
Added:
experimental/Document/src/document/rst/
experimental/Document/src/document/rst/token.php (with props)
experimental/Document/src/document/rst/tokenizer.php (with props)
experimental/Document/tests/document_rst_tokenizer_tests.php (with props)
experimental/Document/tests/files/rst/tokenizer/empty.tokens
Modified:
experimental/Document/design/class_diagram.png
experimental/Document/src/document_autoload.php
experimental/Document/tests/suite.php
Modified: experimental/Document/design/class_diagram.png
==============================================================================
Binary files - no diff available.
Added: experimental/Document/src/document/rst/token.php
==============================================================================
--- experimental/Document/src/document/rst/token.php (added)
+++ experimental/Document/src/document/rst/token.php [iso-8859-1] Fri Feb 8
12:35:13 2008
@@ -1,0 +1,72 @@
+<?php
+/**
+ * File containing the ezcDocumentRstToken struct
+ *
+ * @package Document
+ * @version //autogen//
+ * @copyright Copyright (C) 2005-2008 eZ systems as. All rights reserved.
+ * @license http://ez.no/licenses/new_bsd New BSD License
+ */
+
+/**
+ * Struct for RST document document tokens
+ *
+ * @package Document
+ * @version //autogen//
+ * @copyright Copyright (C) 2005-2008 eZ systems as. All rights reserved.
+ * @license http://ez.no/licenses/new_bsd New BSD License
+ */
+class ezcDocumentRstToken extends ezcBaseStruct
+{
+ // Whitespace tokens
+ const WHITESPACE = 1;
+ const NEWLINE = 2;
+
+ /**
+ * Token type
+ *
+ * @var int
+ */
+ public $type;
+
+ /**
+ * Token content
+ *
+ * @var mixed
+ */
+ public $content;
+
+ /**
+ * Line of the token in the source file
+ *
+ * @var int
+ */
+ public $line;
+
+ /**
+ * Position of the token in its line.
+ *
+ * @var int
+ */
+ public $position;
+
+ /**
+ * Construct RST token
+ *
+ * @ignore
+ * @param int $type
+ * @param mixed $content
+ * @param int $line
+ * @param int $position
+ * @return void
+ */
+ public function __construct( $type, $content, $line, $position = 0 )
+ {
+ $this->type = $type;
+ $this->content = $content;
+ $this->line = $line;
+ $this->position = $position;
+ }
+}
+
+?>
Propchange: experimental/Document/src/document/rst/token.php
------------------------------------------------------------------------------
svn:eol-style = native
Added: experimental/Document/src/document/rst/tokenizer.php
==============================================================================
--- experimental/Document/src/document/rst/tokenizer.php (added)
+++ experimental/Document/src/document/rst/tokenizer.php [iso-8859-1] Fri Feb
8 12:35:13 2008
@@ -1,0 +1,114 @@
+<?php
+/**
+ * File containing the ezcDocumentRstTokenizer
+ *
+ * @package Document
+ * @version //autogen//
+ * @copyright Copyright (C) 2005-2008 eZ systems as. All rights reserved.
+ * @license http://ez.no/licenses/new_bsd New BSD License
+ */
+
+/**
+ * Tokenizer for RST documents
+ *
+ * @package Document
+ * @version //autogen//
+ * @copyright Copyright (C) 2005-2008 eZ systems as. All rights reserved.
+ * @license http://ez.no/licenses/new_bsd New BSD License
+ */
+class ezcDocumentRstTokenizer
+{
+ /**
+ * List with tokens and a regular expression matching the given token.
+ *
+ * The tokens are matched in the given order.
+ *
+ * @var array
+ */
+ protected $tokens = array(
+ ezcDocumentRstToken::WHITESPACE =>
+ '(^(?P<value>[ \t]+))',
+ ezcDocumentRstToken::NEWLINE =>
+ '(^(?P<value>\r\n|\r|\n))',
+ );
+
+ /**
+ * Tokenize the given file
+ *
+ * The method tries to tokenize the passed files and returns an array of
+ * ezcDocumentRstToken struct on succes, or throws a
+ * ezcDocumentTokenizerException, if something could not be matched by any
+ * token.
+ *
+ * @param string $file
+ * @return array
+ */
+ public function tokenizeFile( $file )
+ {
+ if ( !file_exists( $file ) || !is_readable( $file ) )
+ {
+ ezcBaseFileNotFoundException( $file );
+ }
+
+ return $this->tokenizeString( file_get_contents( $file ) );
+ }
+
+ /**
+ * Tokenize the given string
+ *
+ * The method tries to tokenize the passed strings and returns an array of
+ * ezcDocumentRstToken struct on succes, or throws a
+ * ezcDocumentTokenizerException, if something could not be matched by any
+ * token.
+ *
+ * @param string $string
+ * @return array
+ */
+ public function tokenizeString( $string )
+ {
+ $line = 1;
+ $position = 1;
+ $tokens = array();
+
+ while ( strlen( $string ) > 0 )
+ {
+ foreach ( $this->tokens as $token => $expression )
+ {
+ if ( preg_match( $expression, $string, $matches ) )
+ {
+ // A token matched, so add the matched token to the token
+ // list and update all variables.
+ $tokens[] = new ezcDocumentRstToken(
+ $token,
+ ( isset( $matches['value'] ) ? $matches['value'] :
null ),
+ $line,
+ $position
+ );
+
+ // Removed matched stuff from input string
+ $string = substr( $string, $position += strlen(
$matches[0] ) );
+
+ // On a newline token reset the line position and increase
the line value
+ if ( $token === ezcDocumentRstToken::NEWLINE )
+ {
+ ++$line;
+ $position = 1;
+ }
+ }
+ }
+
+ // None of the token definitions matched the input string. We throw
+ // an exception with the position of the content in the input
+ // string and the contents we could not match.
+ throw new ezcDocumentTokenizerException(
+ $line,
+ $position,
+ $string
+ );
+ }
+
+ return $tokens;
+ }
+}
+
+?>
Propchange: experimental/Document/src/document/rst/tokenizer.php
------------------------------------------------------------------------------
svn:eol-style = native
Modified: experimental/Document/src/document_autoload.php
==============================================================================
--- experimental/Document/src/document_autoload.php [iso-8859-1] (original)
+++ experimental/Document/src/document_autoload.php [iso-8859-1] Fri Feb 8
12:35:13 2008
@@ -33,6 +33,8 @@
'ezcDocumentRelaxNgValidator' =>
'Document/validator/realxng.php',
'ezcDocumentRst' => 'Document/document/rst.php',
'ezcDocumentRstOptions' =>
'Document/options/document_rst.php',
+ 'ezcDocumentRstToken' =>
'Document/document/rst/token.php',
+ 'ezcDocumentRstTokenizer' =>
'Document/document/rst/tokenizer.php',
'ezcDocumentXhtml' =>
'Document/document/xml/xhtml.php',
'ezcDocumentXhtmlOptions' =>
'Document/options/document_xhtml.php',
'ezcDocumentXhtmlToDocbookConverter' =>
'Document/converters/xslt/xhtml_docbook.php',
Added: experimental/Document/tests/document_rst_tokenizer_tests.php
==============================================================================
--- experimental/Document/tests/document_rst_tokenizer_tests.php (added)
+++ experimental/Document/tests/document_rst_tokenizer_tests.php [iso-8859-1]
Fri Feb 8 12:35:13 2008
@@ -1,0 +1,70 @@
+<?php
+/**
+ * ezcDocumentConverterEzp3TpEzp4Tests
+ *
+ * @package Document
+ * @version //autogen//
+ * @subpackage Tests
+ * @copyright Copyright (C) 2005-2007 eZ systems as. All rights reserved.
+ * @license http://ez.no/licenses/new_bsd New BSD License
+ */
+
+/**
+ * Test suite for class.
+ *
+ * @package Document
+ * @subpackage Tests
+ */
+class ezcDocumentRstTokenizerTests extends ezcTestCase
+{
+ protected static $testDocuments = null;
+
+ public static function suite()
+ {
+ return new PHPUnit_Framework_TestSuite( __CLASS__ );
+ }
+
+ public static function getTestDocuments()
+ {
+ if ( self::$testDocuments === null )
+ {
+ // Get a list of all test files from the respektive folder
+ $testFiles = glob( dirname( __FILE__ ) .
'/files/rst/tokenizer/*.txt' );
+
+ // Create array with the test file and the expected result file
+ foreach ( $testFiles as $file )
+ {
+ self::$testDocuments[] = array(
+ $file,
+ substr( $file, 0, -3 ) . 'tokens'
+ );
+ }
+ }
+
+ return self::$testDocuments;
+ }
+
+ /**
+ * @dataProvider getTestDocuments
+ */
+ public function testLoadXmlDocumentFromFile( $from, $to )
+ {
+ if ( !is_file( $to ) )
+ {
+ $this->markTestSkipped( "Comparision file '$to' not yet defined."
);
+ }
+
+ $tokenizer = new ezcDocumentRstTokenizer();
+ $tokens = $tokenizer->tokenizeFile( $from );
+
+ $expected = include $to;
+
+ $this->assertEquals(
+ $expected,
+ $tokens,
+ 'Extracted tokens do not match expected tokens.'
+ );
+ }
+}
+
+?>
Propchange: experimental/Document/tests/document_rst_tokenizer_tests.php
------------------------------------------------------------------------------
svn:eol-style = native
Added: experimental/Document/tests/files/rst/tokenizer/empty.tokens
==============================================================================
--- experimental/Document/tests/files/rst/tokenizer/empty.tokens (added)
+++ experimental/Document/tests/files/rst/tokenizer/empty.tokens [iso-8859-1]
Fri Feb 8 12:35:13 2008
@@ -1,0 +1,3 @@
+<?php
+
+return array();
Modified: experimental/Document/tests/suite.php
==============================================================================
--- experimental/Document/tests/suite.php [iso-8859-1] (original)
+++ experimental/Document/tests/suite.php [iso-8859-1] Fri Feb 8 12:35:13 2008
@@ -19,6 +19,7 @@
require_once 'document_options_xml_base_test.php';
require_once 'document_xml_base_test.php';
+require_once 'document_rst_tokenizer_tests.php';
require_once 'converter_options_ezp3_ezp4_test.php';
require_once 'converter_ezp3_ezp4_test.php';
@@ -39,6 +40,7 @@
$this->addTest( ezcDocumentOptionsXmlBaseTests::suite() );
$this->addTest( ezcDocumentXmlBaseTests::suite() );
+ $this->addTest( ezcDocumentRstTokenizerTests::suite() );
$this->addTest( ezcDocumentConverterOptionsEzp3ToEzp4Tests::suite() );
$this->addTest( ezcDocumentConverterEzp3ToEzp4Tests::suite() );
--
svn-components mailing list
[email protected]
http://lists.ez.no/mailman/listinfo/svn-components