Author: kn Date: Fri Feb 8 12:35:13 2008 New Revision: 7322 Log: - Added tokenizer test infrastructure - Added basic tokenizer - Tokinizer works for empty files! ;)
Added: experimental/Document/src/document/rst/ experimental/Document/src/document/rst/token.php (with props) experimental/Document/src/document/rst/tokenizer.php (with props) experimental/Document/tests/document_rst_tokenizer_tests.php (with props) experimental/Document/tests/files/rst/tokenizer/empty.tokens Modified: experimental/Document/design/class_diagram.png experimental/Document/src/document_autoload.php experimental/Document/tests/suite.php Modified: experimental/Document/design/class_diagram.png ============================================================================== Binary files - no diff available. Added: experimental/Document/src/document/rst/token.php ============================================================================== --- experimental/Document/src/document/rst/token.php (added) +++ experimental/Document/src/document/rst/token.php [iso-8859-1] Fri Feb 8 12:35:13 2008 @@ -1,0 +1,72 @@ +<?php +/** + * File containing the ezcDocumentRstToken struct + * + * @package Document + * @version //autogen// + * @copyright Copyright (C) 2005-2008 eZ systems as. All rights reserved. + * @license http://ez.no/licenses/new_bsd New BSD License + */ + +/** + * Struct for RST document document tokens + * + * @package Document + * @version //autogen// + * @copyright Copyright (C) 2005-2008 eZ systems as. All rights reserved. + * @license http://ez.no/licenses/new_bsd New BSD License + */ +class ezcDocumentRstToken extends ezcBaseStruct +{ + // Whitespace tokens + const WHITESPACE = 1; + const NEWLINE = 2; + + /** + * Token type + * + * @var int + */ + public $type; + + /** + * Token content + * + * @var mixed + */ + public $content; + + /** + * Line of the token in the source file + * + * @var int + */ + public $line; + + /** + * Position of the token in its line. + * + * @var int + */ + public $position; + + /** + * Construct RST token + * + * @ignore + * @param int $type + * @param mixed $content + * @param int $line + * @param int $position + * @return void + */ + public function __construct( $type, $content, $line, $position = 0 ) + { + $this->type = $type; + $this->content = $content; + $this->line = $line; + $this->position = $position; + } +} + +?> Propchange: experimental/Document/src/document/rst/token.php ------------------------------------------------------------------------------ svn:eol-style = native Added: experimental/Document/src/document/rst/tokenizer.php ============================================================================== --- experimental/Document/src/document/rst/tokenizer.php (added) +++ experimental/Document/src/document/rst/tokenizer.php [iso-8859-1] Fri Feb 8 12:35:13 2008 @@ -1,0 +1,114 @@ +<?php +/** + * File containing the ezcDocumentRstTokenizer + * + * @package Document + * @version //autogen// + * @copyright Copyright (C) 2005-2008 eZ systems as. All rights reserved. + * @license http://ez.no/licenses/new_bsd New BSD License + */ + +/** + * Tokenizer for RST documents + * + * @package Document + * @version //autogen// + * @copyright Copyright (C) 2005-2008 eZ systems as. All rights reserved. + * @license http://ez.no/licenses/new_bsd New BSD License + */ +class ezcDocumentRstTokenizer +{ + /** + * List with tokens and a regular expression matching the given token. + * + * The tokens are matched in the given order. + * + * @var array + */ + protected $tokens = array( + ezcDocumentRstToken::WHITESPACE => + '(^(?P<value>[ \t]+))', + ezcDocumentRstToken::NEWLINE => + '(^(?P<value>\r\n|\r|\n))', + ); + + /** + * Tokenize the given file + * + * The method tries to tokenize the passed files and returns an array of + * ezcDocumentRstToken struct on succes, or throws a + * ezcDocumentTokenizerException, if something could not be matched by any + * token. + * + * @param string $file + * @return array + */ + public function tokenizeFile( $file ) + { + if ( !file_exists( $file ) || !is_readable( $file ) ) + { + ezcBaseFileNotFoundException( $file ); + } + + return $this->tokenizeString( file_get_contents( $file ) ); + } + + /** + * Tokenize the given string + * + * The method tries to tokenize the passed strings and returns an array of + * ezcDocumentRstToken struct on succes, or throws a + * ezcDocumentTokenizerException, if something could not be matched by any + * token. + * + * @param string $string + * @return array + */ + public function tokenizeString( $string ) + { + $line = 1; + $position = 1; + $tokens = array(); + + while ( strlen( $string ) > 0 ) + { + foreach ( $this->tokens as $token => $expression ) + { + if ( preg_match( $expression, $string, $matches ) ) + { + // A token matched, so add the matched token to the token + // list and update all variables. + $tokens[] = new ezcDocumentRstToken( + $token, + ( isset( $matches['value'] ) ? $matches['value'] : null ), + $line, + $position + ); + + // Removed matched stuff from input string + $string = substr( $string, $position += strlen( $matches[0] ) ); + + // On a newline token reset the line position and increase the line value + if ( $token === ezcDocumentRstToken::NEWLINE ) + { + ++$line; + $position = 1; + } + } + } + + // None of the token definitions matched the input string. We throw + // an exception with the position of the content in the input + // string and the contents we could not match. + throw new ezcDocumentTokenizerException( + $line, + $position, + $string + ); + } + + return $tokens; + } +} + +?> Propchange: experimental/Document/src/document/rst/tokenizer.php ------------------------------------------------------------------------------ svn:eol-style = native Modified: experimental/Document/src/document_autoload.php ============================================================================== --- experimental/Document/src/document_autoload.php [iso-8859-1] (original) +++ experimental/Document/src/document_autoload.php [iso-8859-1] Fri Feb 8 12:35:13 2008 @@ -33,6 +33,8 @@ 'ezcDocumentRelaxNgValidator' => 'Document/validator/realxng.php', 'ezcDocumentRst' => 'Document/document/rst.php', 'ezcDocumentRstOptions' => 'Document/options/document_rst.php', + 'ezcDocumentRstToken' => 'Document/document/rst/token.php', + 'ezcDocumentRstTokenizer' => 'Document/document/rst/tokenizer.php', 'ezcDocumentXhtml' => 'Document/document/xml/xhtml.php', 'ezcDocumentXhtmlOptions' => 'Document/options/document_xhtml.php', 'ezcDocumentXhtmlToDocbookConverter' => 'Document/converters/xslt/xhtml_docbook.php', Added: experimental/Document/tests/document_rst_tokenizer_tests.php ============================================================================== --- experimental/Document/tests/document_rst_tokenizer_tests.php (added) +++ experimental/Document/tests/document_rst_tokenizer_tests.php [iso-8859-1] Fri Feb 8 12:35:13 2008 @@ -1,0 +1,70 @@ +<?php +/** + * ezcDocumentConverterEzp3TpEzp4Tests + * + * @package Document + * @version //autogen// + * @subpackage Tests + * @copyright Copyright (C) 2005-2007 eZ systems as. All rights reserved. + * @license http://ez.no/licenses/new_bsd New BSD License + */ + +/** + * Test suite for class. + * + * @package Document + * @subpackage Tests + */ +class ezcDocumentRstTokenizerTests extends ezcTestCase +{ + protected static $testDocuments = null; + + public static function suite() + { + return new PHPUnit_Framework_TestSuite( __CLASS__ ); + } + + public static function getTestDocuments() + { + if ( self::$testDocuments === null ) + { + // Get a list of all test files from the respektive folder + $testFiles = glob( dirname( __FILE__ ) . '/files/rst/tokenizer/*.txt' ); + + // Create array with the test file and the expected result file + foreach ( $testFiles as $file ) + { + self::$testDocuments[] = array( + $file, + substr( $file, 0, -3 ) . 'tokens' + ); + } + } + + return self::$testDocuments; + } + + /** + * @dataProvider getTestDocuments + */ + public function testLoadXmlDocumentFromFile( $from, $to ) + { + if ( !is_file( $to ) ) + { + $this->markTestSkipped( "Comparision file '$to' not yet defined." ); + } + + $tokenizer = new ezcDocumentRstTokenizer(); + $tokens = $tokenizer->tokenizeFile( $from ); + + $expected = include $to; + + $this->assertEquals( + $expected, + $tokens, + 'Extracted tokens do not match expected tokens.' + ); + } +} + +?> Propchange: experimental/Document/tests/document_rst_tokenizer_tests.php ------------------------------------------------------------------------------ svn:eol-style = native Added: experimental/Document/tests/files/rst/tokenizer/empty.tokens ============================================================================== --- experimental/Document/tests/files/rst/tokenizer/empty.tokens (added) +++ experimental/Document/tests/files/rst/tokenizer/empty.tokens [iso-8859-1] Fri Feb 8 12:35:13 2008 @@ -1,0 +1,3 @@ +<?php + +return array(); Modified: experimental/Document/tests/suite.php ============================================================================== --- experimental/Document/tests/suite.php [iso-8859-1] (original) +++ experimental/Document/tests/suite.php [iso-8859-1] Fri Feb 8 12:35:13 2008 @@ -19,6 +19,7 @@ require_once 'document_options_xml_base_test.php'; require_once 'document_xml_base_test.php'; +require_once 'document_rst_tokenizer_tests.php'; require_once 'converter_options_ezp3_ezp4_test.php'; require_once 'converter_ezp3_ezp4_test.php'; @@ -39,6 +40,7 @@ $this->addTest( ezcDocumentOptionsXmlBaseTests::suite() ); $this->addTest( ezcDocumentXmlBaseTests::suite() ); + $this->addTest( ezcDocumentRstTokenizerTests::suite() ); $this->addTest( ezcDocumentConverterOptionsEzp3ToEzp4Tests::suite() ); $this->addTest( ezcDocumentConverterEzp3ToEzp4Tests::suite() ); -- svn-components mailing list svn-components@lists.ez.no http://lists.ez.no/mailman/listinfo/svn-components