Author: kn Date: Fri Feb 8 14:59:49 2008 New Revision: 7325 Log: - Added support for bullet lists in tokenizer
Added: experimental/Document/tests/files/rst/tokenizer/bullet_list.tokens experimental/Document/tests/files/rst/tokenizer/definition_list.tokens Modified: experimental/Document/src/document/rst/token.php experimental/Document/src/document/rst/tokenizer.php experimental/Document/tests/document_rst_tokenizer_tests.php experimental/Document/tests/files/rst/tokenizer/ (props changed) Modified: experimental/Document/src/document/rst/token.php ============================================================================== --- experimental/Document/src/document/rst/token.php [iso-8859-1] (original) +++ experimental/Document/src/document/rst/token.php [iso-8859-1] Fri Feb 8 14:59:49 2008 @@ -23,6 +23,8 @@ const NEWLINE = 2; const HEADLINE = 11; + + const BULLET_POINT = 21; const TEXT_LINE = 99; Modified: experimental/Document/src/document/rst/tokenizer.php ============================================================================== --- experimental/Document/src/document/rst/tokenizer.php [iso-8859-1] (original) +++ experimental/Document/src/document/rst/tokenizer.php [iso-8859-1] Fri Feb 8 14:59:49 2008 @@ -19,6 +19,12 @@ class ezcDocumentRstTokenizer { /** + * Common whitespace characters. The vertical tab is excluded, because it + * causes strange problems with PCRE. + */ + const WHITESPACE_CHARS = ' \\t'; + + /** * Allowed character sets for headlines. * * @see http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#sections @@ -76,12 +82,15 @@ { $this->tokens = array( ezcDocumentRstToken::WHITESPACE => - '(\\A(?P<value>[ \\t]+))', + '(\\A(?P<value>[' . self::WHITESPACE_CHARS . ']+))', ezcDocumentRstToken::NEWLINE => '(\\A(?P<value>\\r\\n|\\r|\\n))', ezcDocumentRstToken::HEADLINE => '(\\A(?P<value>([' . self::HEADLINE_CHARS . '])\\2*)$)m', + + ezcDocumentRstToken::BULLET_POINT => + '(\\A(?P<value>[' . self::BULLET_LIST_CHARS . '][' . self::WHITESPACE_CHARS . ']))u', // This should be last match ezcDocumentRstToken::TEXT_LINE => @@ -166,6 +175,8 @@ if ( preg_match( $expression, $string, $matches ) ) { +// echo "- Matched token $token (" . $matches['value'] . ":" . strlen( $matches['value'] ) . ")\n"; + // A token matched, so add the matched token to the token // list and update all variables. $tokens[] = new ezcDocumentRstToken( Modified: experimental/Document/tests/document_rst_tokenizer_tests.php ============================================================================== --- experimental/Document/tests/document_rst_tokenizer_tests.php [iso-8859-1] (original) +++ experimental/Document/tests/document_rst_tokenizer_tests.php [iso-8859-1] Fri Feb 8 14:59:49 2008 @@ -59,13 +59,18 @@ $expected = include $to; - file_put_contents( $to . '.test', "<?php\n\nreturn " . var_export( $tokens, true ) . ";\n\n" ); + // Store test file, to have something to compare on failure + $tempDir = $this->createTempDir( 'rst_tokenizer' ) . '/'; + file_put_contents( $tempDir . basename( $to ), "<?php\n\nreturn " . var_export( $tokens, true ) . ";\n\n" ); $this->assertEquals( $expected, $tokens, 'Extracted tokens do not match expected tokens.' ); + + // Remove tempdir, when nothing failed. + $this->removeTempDir(); } public function testNotExistantFile() Propchange: experimental/Document/tests/files/rst/tokenizer/ ('svn:ignore' removed) Added: experimental/Document/tests/files/rst/tokenizer/bullet_list.tokens ============================================================================== --- experimental/Document/tests/files/rst/tokenizer/bullet_list.tokens (added) +++ experimental/Document/tests/files/rst/tokenizer/bullet_list.tokens [iso-8859-1] Fri Feb 8 14:59:49 2008 @@ -1,0 +1,197 @@ +<?php + +return array ( + 0 => + ezcDocumentRstToken::__set_state(array( + 'type' => 21, + 'content' => '- ', + 'line' => 1, + 'position' => 1, + )), + 1 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'This', + 'line' => 1, + 'position' => 3, + )), + 2 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 1, + 'position' => 7, + )), + 3 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'is', + 'line' => 1, + 'position' => 8, + )), + 4 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 1, + 'position' => 10, + )), + 5 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'a', + 'line' => 1, + 'position' => 11, + )), + 6 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 1, + 'position' => 12, + )), + 7 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'bullet', + 'line' => 1, + 'position' => 13, + )), + 8 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 1, + 'position' => 19, + )), + 9 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'list.', + 'line' => 1, + 'position' => 20, + )), + 10 => + ezcDocumentRstToken::__set_state(array( + 'type' => 2, + 'content' => ' +', + 'line' => 1, + 'position' => 25, + )), + 11 => + ezcDocumentRstToken::__set_state(array( + 'type' => 2, + 'content' => ' +', + 'line' => 2, + 'position' => 1, + )), + 12 => + ezcDocumentRstToken::__set_state(array( + 'type' => 21, + 'content' => '- ', + 'line' => 3, + 'position' => 1, + )), + 13 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'Bullets', + 'line' => 3, + 'position' => 3, + )), + 14 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 3, + 'position' => 10, + )), + 15 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'can', + 'line' => 3, + 'position' => 11, + )), + 16 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 3, + 'position' => 14, + )), + 17 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'be', + 'line' => 3, + 'position' => 15, + )), + 18 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 3, + 'position' => 17, + )), + 19 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => '"*",', + 'line' => 3, + 'position' => 18, + )), + 20 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 3, + 'position' => 22, + )), + 21 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => '"+",', + 'line' => 3, + 'position' => 23, + )), + 22 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 3, + 'position' => 27, + )), + 23 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'or', + 'line' => 3, + 'position' => 28, + )), + 24 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 3, + 'position' => 30, + )), + 25 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => '"-".', + 'line' => 3, + 'position' => 31, + )), + 26 => + ezcDocumentRstToken::__set_state(array( + 'type' => 2, + 'content' => ' +', + 'line' => 3, + 'position' => 35, + )), +); + Added: experimental/Document/tests/files/rst/tokenizer/definition_list.tokens ============================================================================== --- experimental/Document/tests/files/rst/tokenizer/definition_list.tokens (added) +++ experimental/Document/tests/files/rst/tokenizer/definition_list.tokens [iso-8859-1] Fri Feb 8 14:59:49 2008 @@ -1,0 +1,495 @@ +<?php + +return array ( + 0 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'what', + 'line' => 1, + 'position' => 1, + )), + 1 => + ezcDocumentRstToken::__set_state(array( + 'type' => 2, + 'content' => ' +', + 'line' => 1, + 'position' => 5, + )), + 2 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 2, + 'position' => 1, + )), + 3 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'Definition', + 'line' => 2, + 'position' => 5, + )), + 4 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 2, + 'position' => 15, + )), + 5 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'lists', + 'line' => 2, + 'position' => 16, + )), + 6 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 2, + 'position' => 21, + )), + 7 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'associate', + 'line' => 2, + 'position' => 22, + )), + 8 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 2, + 'position' => 31, + )), + 9 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'a', + 'line' => 2, + 'position' => 32, + )), + 10 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 2, + 'position' => 33, + )), + 11 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'term', + 'line' => 2, + 'position' => 34, + )), + 12 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 2, + 'position' => 38, + )), + 13 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'with', + 'line' => 2, + 'position' => 39, + )), + 14 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 2, + 'position' => 43, + )), + 15 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'a', + 'line' => 2, + 'position' => 44, + )), + 16 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 2, + 'position' => 45, + )), + 17 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'definition.', + 'line' => 2, + 'position' => 46, + )), + 18 => + ezcDocumentRstToken::__set_state(array( + 'type' => 2, + 'content' => ' +', + 'line' => 2, + 'position' => 57, + )), + 19 => + ezcDocumentRstToken::__set_state(array( + 'type' => 2, + 'content' => ' +', + 'line' => 3, + 'position' => 1, + )), + 20 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'how', + 'line' => 4, + 'position' => 1, + )), + 21 => + ezcDocumentRstToken::__set_state(array( + 'type' => 2, + 'content' => ' +', + 'line' => 4, + 'position' => 4, + )), + 22 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 5, + 'position' => 1, + )), + 23 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'The', + 'line' => 5, + 'position' => 5, + )), + 24 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 5, + 'position' => 8, + )), + 25 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'term', + 'line' => 5, + 'position' => 9, + )), + 26 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 5, + 'position' => 13, + )), + 27 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'is', + 'line' => 5, + 'position' => 14, + )), + 28 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 5, + 'position' => 16, + )), + 29 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'a', + 'line' => 5, + 'position' => 17, + )), + 30 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 5, + 'position' => 18, + )), + 31 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'one-line', + 'line' => 5, + 'position' => 19, + )), + 32 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 5, + 'position' => 27, + )), + 33 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'phrase,', + 'line' => 5, + 'position' => 28, + )), + 34 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 5, + 'position' => 35, + )), + 35 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'and', + 'line' => 5, + 'position' => 36, + )), + 36 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 5, + 'position' => 39, + )), + 37 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'the', + 'line' => 5, + 'position' => 40, + )), + 38 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 5, + 'position' => 43, + )), + 39 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'definition', + 'line' => 5, + 'position' => 44, + )), + 40 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 5, + 'position' => 54, + )), + 41 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'is', + 'line' => 5, + 'position' => 55, + )), + 42 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 5, + 'position' => 57, + )), + 43 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'one', + 'line' => 5, + 'position' => 58, + )), + 44 => + ezcDocumentRstToken::__set_state(array( + 'type' => 2, + 'content' => ' +', + 'line' => 5, + 'position' => 61, + )), + 45 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 6, + 'position' => 1, + )), + 46 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'or', + 'line' => 6, + 'position' => 5, + )), + 47 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 6, + 'position' => 7, + )), + 48 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'more', + 'line' => 6, + 'position' => 8, + )), + 49 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 6, + 'position' => 12, + )), + 50 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'paragraphs', + 'line' => 6, + 'position' => 13, + )), + 51 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 6, + 'position' => 23, + )), + 52 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'or', + 'line' => 6, + 'position' => 24, + )), + 53 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 6, + 'position' => 26, + )), + 54 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'body', + 'line' => 6, + 'position' => 27, + )), + 55 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 6, + 'position' => 31, + )), + 56 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'elements,', + 'line' => 6, + 'position' => 32, + )), + 57 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 6, + 'position' => 41, + )), + 58 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'indented', + 'line' => 6, + 'position' => 42, + )), + 59 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 6, + 'position' => 50, + )), + 60 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'relative', + 'line' => 6, + 'position' => 51, + )), + 61 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 6, + 'position' => 59, + )), + 62 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'to', + 'line' => 6, + 'position' => 60, + )), + 63 => + ezcDocumentRstToken::__set_state(array( + 'type' => 2, + 'content' => ' +', + 'line' => 6, + 'position' => 62, + )), + 64 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 7, + 'position' => 1, + )), + 65 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'the', + 'line' => 7, + 'position' => 5, + )), + 66 => + ezcDocumentRstToken::__set_state(array( + 'type' => 1, + 'content' => ' ', + 'line' => 7, + 'position' => 8, + )), + 67 => + ezcDocumentRstToken::__set_state(array( + 'type' => 99, + 'content' => 'term.', + 'line' => 7, + 'position' => 9, + )), + 68 => + ezcDocumentRstToken::__set_state(array( + 'type' => 2, + 'content' => ' +', + 'line' => 7, + 'position' => 14, + )), +); + -- svn-components mailing list svn-components@lists.ez.no http://lists.ez.no/mailman/listinfo/svn-components